From d75bb1d518cadde5f656c9b53c2ce080cfaecabe Mon Sep 17 00:00:00 2001 From: Jack Li Date: Tue, 13 May 2025 16:40:16 -0400 Subject: [PATCH 1/3] refactor: Remove access parameters in client --- polaris/benchmark/_base.py | 4 +--- polaris/dataset/_base.py | 2 -- polaris/dataset/_dataset.py | 4 +--- polaris/dataset/_dataset_v2.py | 5 ++--- polaris/evaluate/_results.py | 4 +--- polaris/hub/client.py | 29 ++++++----------------------- polaris/model/__init__.py | 9 ++++----- polaris/utils/types.py | 5 ----- 8 files changed, 15 insertions(+), 47 deletions(-) diff --git a/polaris/benchmark/_base.py b/polaris/benchmark/_base.py index 67d73ba8..3069eede 100644 --- a/polaris/benchmark/_base.py +++ b/polaris/benchmark/_base.py @@ -29,7 +29,6 @@ from polaris.utils.dict2html import dict2html from polaris.utils.errors import InvalidBenchmarkError from polaris.utils.types import ( - AccessType, HubOwner, IncomingPredictionsType, TargetType, @@ -171,7 +170,6 @@ def upload_to_hub( self, settings: PolarisHubSettings | None = None, cache_auth_token: bool = True, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, **kwargs: dict, @@ -188,7 +186,7 @@ def upload_to_hub( **kwargs, ) as client: return client.upload_benchmark( - self, access=access, owner=owner, parent_artifact_id=parent_artifact_id + self, owner=owner, parent_artifact_id=parent_artifact_id ) def to_json(self, destination: str) -> str: diff --git a/polaris/dataset/_base.py b/polaris/dataset/_base.py index 8ecccf98..e9ddadd1 100644 --- a/polaris/dataset/_base.py +++ b/polaris/dataset/_base.py @@ -29,7 +29,6 @@ from polaris.utils.dict2html import dict2html from polaris.utils.errors import InvalidDatasetError from polaris.utils.types import ( - AccessType, ChecksumStrategy, DatasetIndex, HttpUrlString, @@ -304,7 +303,6 @@ def get_data( @abc.abstractmethod def upload_to_hub( self, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, ): diff --git a/polaris/dataset/_dataset.py b/polaris/dataset/_dataset.py index df2325c1..30383749 100644 --- a/polaris/dataset/_dataset.py +++ b/polaris/dataset/_dataset.py @@ -18,7 +18,6 @@ from polaris.mixins._checksum import ChecksumMixin from polaris.utils.errors import InvalidDatasetError from polaris.utils.types import ( - AccessType, ChecksumStrategy, HubOwner, ZarrConflictResolution, @@ -220,7 +219,6 @@ def get_data( def upload_to_hub( self, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, ): @@ -231,7 +229,7 @@ def upload_to_hub( from polaris.hub.client import PolarisHubClient with PolarisHubClient() as client: - client.upload_dataset(self, owner=owner, access=access, parent_artifact_id=parent_artifact_id) + client.upload_dataset(self, owner=owner, parent_artifact_id=parent_artifact_id) @classmethod def from_json(cls, path: str): diff --git a/polaris/dataset/_dataset_v2.py b/polaris/dataset/_dataset_v2.py index 40ba3ba5..8a757bdd 100644 --- a/polaris/dataset/_dataset_v2.py +++ b/polaris/dataset/_dataset_v2.py @@ -15,7 +15,7 @@ from polaris.dataset._base import BaseDataset from polaris.dataset.zarr._manifest import calculate_file_md5, generate_zarr_manifest from polaris.utils.errors import InvalidDatasetError -from polaris.utils.types import AccessType, ChecksumStrategy, HubOwner, ZarrConflictResolution +from polaris.utils.types import ChecksumStrategy, HubOwner, ZarrConflictResolution logger = logging.getLogger(__name__) @@ -194,7 +194,6 @@ def get_data(self, row: int, col: str, adapters: dict[str, Adapter] | None = Non def upload_to_hub( self, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, ): @@ -205,7 +204,7 @@ def upload_to_hub( from polaris.hub.client import PolarisHubClient with PolarisHubClient() as client: - client.upload_dataset(self, owner=owner, access=access, parent_artifact_id=parent_artifact_id) + client.upload_dataset(self, owner=owner, parent_artifact_id=parent_artifact_id) @classmethod def from_json(cls, path: str): diff --git a/polaris/evaluate/_results.py b/polaris/evaluate/_results.py index d8c68243..21b0b074 100644 --- a/polaris/evaluate/_results.py +++ b/polaris/evaluate/_results.py @@ -16,7 +16,6 @@ from polaris.utils.errors import InvalidResultError from polaris.utils.misc import slugify from polaris.utils.types import ( - AccessType, HubOwner, SlugCompatibleStringType, ) @@ -173,7 +172,6 @@ def set_benchmark_artifact_id(self): def upload_to_hub( self, - access: AccessType = "private", owner: HubOwner | str | None = None, **kwargs: dict, ): @@ -184,7 +182,7 @@ def upload_to_hub( from polaris.hub.client import PolarisHubClient with PolarisHubClient(**kwargs) as client: - return client.upload_results(self, access=access, owner=owner) + return client.upload_results(self, owner=owner) class BenchmarkResultsV1(EvaluationResultV1, BaseBenchmarkResults): diff --git a/polaris/hub/client.py b/polaris/hub/client.py index 30627312..ab45c69a 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -39,7 +39,6 @@ PolarisUnauthorizedError, ) from polaris.utils.types import ( - AccessType, ChecksumStrategy, HubOwner, SupportedLicenseType, @@ -497,7 +496,6 @@ def _get_v2_benchmark(self, owner: str | HubOwner, slug: str) -> BenchmarkV2Spec def upload_results( self, results: BenchmarkResultsV1 | BenchmarkResultsV2, - access: AccessType = "private", owner: HubOwner | str | None = None, ): """Upload the results to the Polaris Hub. @@ -515,7 +513,6 @@ def upload_results( Args: results: The results to upload. - access: Grant public or private access to result owner: Which Hub user or organization owns the artifact. Takes precedence over `results.owner`. """ with track_progress(description="Uploading results", total=1) as (progress, task): @@ -525,7 +522,7 @@ def upload_results( # Make a request to the Hub response = self._base_request_to_hub( - url="/v2/result", method="POST", json={"access": access, **result_json} + url="/v2/result", method="POST", json={**result_json} ) # Inform the user about where to find their newly created artifact. @@ -538,7 +535,6 @@ def upload_results( def upload_dataset( self, dataset: DatasetV1 | DatasetV2, - access: AccessType = "private", timeout: TimeoutTypes = (10, 200), owner: HubOwner | str | None = None, if_exists: ZarrConflictResolution = "replace", @@ -558,7 +554,6 @@ def upload_dataset( Args: dataset: The dataset to upload. - access: Grant public or private access to result timeout: Request timeout values. User can modify the value when uploading large dataset as needed. This can be a single value with the timeout in seconds for all IO operations, or a more granular tuple with (connect_timeout, write_timeout). The type of the the timout parameter comes from `httpx`. @@ -580,15 +575,14 @@ def upload_dataset( ) if isinstance(dataset, DatasetV1): - self._upload_v1_dataset(dataset, timeout, access, owner, if_exists, parent_artifact_id) + self._upload_v1_dataset(dataset, timeout, owner, if_exists, parent_artifact_id) elif isinstance(dataset, DatasetV2): - self._upload_v2_dataset(dataset, timeout, access, owner, if_exists, parent_artifact_id) + self._upload_v2_dataset(dataset, timeout, owner, if_exists, parent_artifact_id) def _upload_v1_dataset( self, dataset: DatasetV1, timeout: TimeoutTypes, - access: AccessType, owner: HubOwner | str | None, if_exists: ZarrConflictResolution, parent_artifact_id: str | None, @@ -632,7 +626,6 @@ def _upload_v1_dataset( "md5Sum": parquet_md5, }, "zarrContent": [md5sum.model_dump() for md5sum in dataset._zarr_md5sum_manifest], - "access": access, "parentArtifactId": parent_artifact_id, **dataset_json, }, @@ -677,7 +670,6 @@ def _upload_v2_dataset( self, dataset: DatasetV2, timeout: TimeoutTypes, - access: AccessType, owner: HubOwner | str | None, if_exists: ZarrConflictResolution, parent_artifact_id: str | None, @@ -700,7 +692,6 @@ def _upload_v2_dataset( "zarrManifestFileContent": { "md5Sum": dataset.zarr_manifest_md5sum, }, - "access": access, "parentArtifactId": parent_artifact_id, **dataset_json, }, @@ -748,7 +739,6 @@ def _upload_v2_dataset( def upload_benchmark( self, benchmark: BenchmarkV1Specification | BenchmarkV2Specification, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, ): @@ -770,20 +760,18 @@ def upload_benchmark( Args: benchmark: The benchmark to upload. - access: Grant public or private access to result owner: Which Hub user or organization owns the artifact. Takes precedence over `benchmark.owner`. parent_artifact_id: The `owner/slug` of the parent benchmark, if uploading a new version of a benchmark. """ match benchmark: case BenchmarkV1Specification(): - self._upload_v1_benchmark(benchmark, access, owner, parent_artifact_id) + self._upload_v1_benchmark(benchmark, owner, parent_artifact_id) case BenchmarkV2Specification(): - self._upload_v2_benchmark(benchmark, access, owner, parent_artifact_id) + self._upload_v2_benchmark(benchmark, owner, parent_artifact_id) def _upload_v1_benchmark( self, benchmark: BenchmarkV1Specification, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, ): @@ -796,7 +784,6 @@ def _upload_v1_benchmark( benchmark.owner = HubOwner.normalize(owner or benchmark.owner) benchmark_json = benchmark.model_dump(exclude={"dataset"}, exclude_none=True, by_alias=True) benchmark_json["datasetArtifactId"] = benchmark.dataset.artifact_id - benchmark_json["access"] = access url = f"/v1/benchmark/{benchmark.artifact_id}" response = self._base_request_to_hub( @@ -811,7 +798,6 @@ def _upload_v1_benchmark( def _upload_v2_benchmark( self, benchmark: BenchmarkV2Specification, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, ): @@ -834,7 +820,6 @@ def _upload_v2_benchmark( url=url, method="PUT", json={ - "access": access, "datasetArtifactId": benchmark.dataset.artifact_id, "parentArtifactId": parent_artifact_id, **benchmark_json, @@ -940,7 +925,6 @@ def get_model(self, artifact_id: str) -> Model: def upload_model( self, model: Model, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, ): @@ -958,7 +942,6 @@ def upload_model( Args: model: The model to upload. - access: Grant public or private access to result owner: Which Hub user or organization owns the artifact. Takes precedence over `model.owner`. parent_artifact_id: The `owner/slug` of the parent model, if uploading a new version of a model. """ @@ -972,7 +955,7 @@ def upload_model( response = self._base_request_to_hub( url=url, method="PUT", - json={"access": access, "parentArtifactId": parent_artifact_id, **model_json}, + json={"parentArtifactId": parent_artifact_id, **model_json}, ) # NOTE: When we merge in the competition model feature, we will need to update the slug with the inserted model slug to make sure we write to the correct storage location. diff --git a/polaris/model/__init__.py b/polaris/model/__init__.py index a47d1dfa..fc65478c 100644 --- a/polaris/model/__init__.py +++ b/polaris/model/__init__.py @@ -1,6 +1,6 @@ from polaris._artifact import BaseArtifactModel from polaris.utils.types import HttpUrlString -from polaris.utils.types import AccessType, HubOwner +from polaris.utils.types import HubOwner from pydantic import Field @@ -35,8 +35,8 @@ class Model(BaseArtifactModel): artifact_changelog: A description of the changes made in this model version. Methods: - upload_to_hub(access: AccessType = "private", owner: HubOwner | str | None = None): - Uploads the model artifact to the Polaris Hub, associating it with a specified owner and access level. + upload_to_hub(owner: HubOwner | str | None = None): + Uploads the model artifact to the Polaris Hub, associating it with a specified owner. For additional metadata attributes, see the base class. """ @@ -53,7 +53,6 @@ class Model(BaseArtifactModel): def upload_to_hub( self, - access: AccessType = "private", owner: HubOwner | str | None = None, parent_artifact_id: str | None = None, ): @@ -63,4 +62,4 @@ def upload_to_hub( from polaris.hub.client import PolarisHubClient with PolarisHubClient() as client: - client.upload_model(self, owner=owner, access=access, parent_artifact_id=parent_artifact_id) + client.upload_model(self, owner=owner, parent_artifact_id=parent_artifact_id) diff --git a/polaris/utils/types.py b/polaris/utils/types.py index e7964c2e..2bd83182 100644 --- a/polaris/utils/types.py +++ b/polaris/utils/types.py @@ -104,11 +104,6 @@ This can be used to sort the metric score, indicate the optmization direction of endpoint. """ -AccessType: TypeAlias = Literal["public", "private"] -""" -Type to specify access to a dataset, benchmark or result in the Hub. -""" - TimeoutTypes = tuple[int, int] | Literal["timeout", "never"] """ Timeout types for specifying maximum wait times. From 0bf619bba0fb28c0a0c6aa784e676c9a56c527a2 Mon Sep 17 00:00:00 2001 From: Jack Li Date: Tue, 13 May 2025 17:09:20 -0400 Subject: [PATCH 2/3] reformatting files --- polaris/benchmark/_base.py | 4 +--- polaris/hub/client.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/polaris/benchmark/_base.py b/polaris/benchmark/_base.py index 3069eede..31e61527 100644 --- a/polaris/benchmark/_base.py +++ b/polaris/benchmark/_base.py @@ -185,9 +185,7 @@ def upload_to_hub( cache_auth_token=cache_auth_token, **kwargs, ) as client: - return client.upload_benchmark( - self, owner=owner, parent_artifact_id=parent_artifact_id - ) + return client.upload_benchmark(self, owner=owner, parent_artifact_id=parent_artifact_id) def to_json(self, destination: str) -> str: """Save the benchmark to a destination directory as a JSON file. diff --git a/polaris/hub/client.py b/polaris/hub/client.py index ab45c69a..dde15c0d 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -521,9 +521,7 @@ def upload_results( result_json = results.model_dump(by_alias=True, exclude_none=True) # Make a request to the Hub - response = self._base_request_to_hub( - url="/v2/result", method="POST", json={**result_json} - ) + response = self._base_request_to_hub(url="/v2/result", method="POST", json={**result_json}) # Inform the user about where to find their newly created artifact. result_url = urljoin(self.settings.hub_url, response.headers.get("Content-Location")) From 1e66acb52e14a20a7489a3cc106f22acbae01f14 Mon Sep 17 00:00:00 2001 From: Jack Li <73399568+j279li@users.noreply.github.com> Date: Thu, 15 May 2025 09:18:44 -0400 Subject: [PATCH 3/3] Update polaris/hub/client.py Co-authored-by: Andrew Quirke --- polaris/hub/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polaris/hub/client.py b/polaris/hub/client.py index dde15c0d..ed8fd660 100644 --- a/polaris/hub/client.py +++ b/polaris/hub/client.py @@ -521,7 +521,7 @@ def upload_results( result_json = results.model_dump(by_alias=True, exclude_none=True) # Make a request to the Hub - response = self._base_request_to_hub(url="/v2/result", method="POST", json={**result_json}) + response = self._base_request_to_hub(url="/v2/result", method="POST", json=result_json) # Inform the user about where to find their newly created artifact. result_url = urljoin(self.settings.hub_url, response.headers.get("Content-Location"))