From 84cccf4a6e2472ab351e3fbacd29d260b35ecc9c Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Tue, 17 Dec 2024 17:26:18 -0800 Subject: [PATCH 1/5] cleanup docs, add post_conversion --- README.md | 1 - src/unidas.py | 51 +++++++++++++++++++++++++--------------- test/test_unidas.py | 57 +++++++++++++++++++++------------------------ 3 files changed, 58 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index ba57d03..3d58d8e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ [![coverage](https://codecov.io/gh/dasdae/unidas/branch/main/graph/badge.svg)](https://codecov.io/gh/dasdae/unidas) [![PyPI Version](https://img.shields.io/pypi/v/unidas.svg)](https://pypi.python.org/pypi/unidas) -[![supported versions](https://img.shields.io/pypi/pyversions/unidas.svg?label=python_versions)](https://pypi.python.org/pypi/unidas) [![Licence](https://img.shields.io/badge/license-MIT-blue)](https://opensource.org/license/mit) A DAS compatibility package. diff --git a/src/unidas.py b/src/unidas.py index 22d356f..f486fd6 100644 --- a/src/unidas.py +++ b/src/unidas.py @@ -1,7 +1,5 @@ """ -Core functionality for unidas. - -Currently, the base representation is a dictionary of the following form. +Unidas: A DAS Compatibility Package. """ from __future__ import annotations @@ -35,6 +33,7 @@ "xdas": "https://github.com/xdas-dev/xdas", } +# Datetime precision. This can change between python versions. DT_PRECISION = datetime.datetime.resolution.total_seconds() # A generic type variable. @@ -42,7 +41,6 @@ # ------------------------ Utility functions - def optional_import(package_name: str) -> ModuleType: """ Import a module and return the module object if installed, else raise error. @@ -82,7 +80,7 @@ def optional_import(package_name: str) -> ModuleType: def converts_to(target: str): """ - A decorator which marks a method as conversion function. + Marks a method on a `Converter` as a conversion function. Parameters ---------- @@ -99,8 +97,12 @@ def decorator(func): return decorator -def get_object_key(object_class): - """Get the tuple which defines the objects unique id.""" +def get_class_key(object_class)->str: + """ + Get a string which defines the class's identifier. + + The general format is "{package_name}.{class_name}". + """ module_name = object_class.__module__.split(".")[0] class_name = object_class.__name__ return f"{module_name}.{class_name}" @@ -114,7 +116,7 @@ def extract_attrs(obj, attrs_names): def time_to_float(obj): - """Converts a datetime or numpy datetime object to float.""" + """Converts a datetime or numpy datetime object to a float (timestamp).""" if isinstance(obj, np.datetime64) or isinstance(obj, np.timedelta64): obj = obj.astype("timedelta64") / np.timedelta64(1, "s") elif hasattr(obj, "timestamp"): @@ -137,13 +139,13 @@ def time_to_datetime(obj): def to_stripped_utc(time: datetime.datetime): - """Convert a datetime to UTC then strip timezone info""" + """Convert a datetime to UTC then strip timezone info.""" out = time.astimezone(zoneinfo.ZoneInfo("UTC")).replace(tzinfo=None) return out @runtime_checkable -class ArrayLike(Protocol, Sized): +class ArrayLike(Protocol): """ Simple definition of an array for now. """ @@ -183,7 +185,7 @@ def to_xdas_coord(self): @dataclass class EvenlySampledCoordinate(Coordinate): """ - A coordinate which is evenly sampled and contiguous. + A coordinate which is evenly sampled, sorted, and contiguous. Parameters ---------- @@ -248,6 +250,8 @@ class ArrayCoordinate(Coordinate): """ A coordinate which is not evenly sampled and contiguous. + The coordinate is represented by a generic array. + Parameters ---------- data @@ -345,7 +349,7 @@ class Converter: conversion methods with the `converts_to` decorator. """ - name: str = None # should be "{module}.{class_name}" see get_object_key. + name: str = None # should be "{module}.{class_name}" see get_class_key. _registry: ClassVar[dict[str, Converter]] = {} _graph: ClassVar[dict[str, list[str]]] = defaultdict(list) _converters: ClassVar[dict[str, callable]] = {} @@ -378,7 +382,8 @@ def post_conversion(self, input_obj: T, output_obj: T) -> T: Some conversions are lossy. This optional method allows subclasses to modify the output of `convert` before it gets returned. This might - be useful to re-attach lost metadata for example. + be useful to re-attach lost metadata for example. It doesn't work with + the `convert` function (in that case it needs to be applied manually). Parameters ---------- @@ -425,6 +430,8 @@ def get_shortest_path(cls, start, target): path.append(current) current = visited[current] return tuple(path[::-1]) + # TODO: Maybe add a check for DASBase here so that is tried + # before other potential conversion paths. for neighbor in graph[current]: if neighbor not in visited: visited[neighbor] = current @@ -711,12 +718,18 @@ def _decorator(obj, *args, **kwargs): # Convert the incoming object to target. This should do nothing # if it is already the correct format. cls = obj if inspect.isclass(obj) else type(obj) - key = get_object_key(cls) + key = get_class_key(cls) + conversion_class: Converter = Converter._registry[key] input_obj = convert(obj, to) - out = func(input_obj, *args, **kwargs) - output_obj = convert(out, key) - - return output_obj + func_out = func(input_obj, *args, **kwargs) + # Sometimes a function can return a different type than its input + # e.g., a dataframe. In this case just return output. + if not isinstance(func_out, cls): + return func_out + output_obj = convert(func_out, key) + # Apply class specific logic to compensate for lossy conversion. + out = conversion_class.post_conversion(input_obj, output_obj) + return out # Following the convention of pydantic, we attach the raw function # in case it needs to be accessed later. Also ensures to keep the @@ -748,7 +761,7 @@ def convert(obj, to: str): The input object converted to the specified format. """ obj_class = obj if inspect.isclass(obj) else type(obj) - key = get_object_key(obj_class) + key = get_class_key(obj_class) # No conversion needed, simply return object. if key == to: return obj diff --git a/test/test_unidas.py b/test/test_unidas.py index 6c0e8a6..5b02b55 100644 --- a/test/test_unidas.py +++ b/test/test_unidas.py @@ -12,6 +12,8 @@ from unidas import BaseDAS, adapter, convert, optional_import from xdas.core.dataarray import DataArray +from test.conftest import dascore_patch + try: from lightguide.blast import Blast @@ -73,34 +75,6 @@ def test_version(self): # --------- Tests for unidas conversions. -class TestFormatConversionCombinations: - """Tests for combinations of different formats.""" - - # Note: we could also parametrize the base structure fixtures to make - # all of these one test, but then it can get confusing to debug so - # I am making one test for each format that then tests converting to - # all other formats. - def test_convert_blast(self, lightguide_blast, format_name): - """Test that the base blast can be converted to all formats.""" - out = convert(lightguide_blast, to=format_name) - assert isinstance(out, NAME_CLASS_MAP[format_name]) - - def test_convert_patch(self, dascore_patch, format_name): - """Test that the base patch can be converted to all formats.""" - out = convert(dascore_patch, to=format_name) - assert isinstance(out, NAME_CLASS_MAP[format_name]) - - def test_convert_data_array(self, xdas_dataarray, format_name): - """Test that the base data array can be converted to all formats.""" - out = convert(xdas_dataarray, to=format_name) - assert isinstance(out, NAME_CLASS_MAP[format_name]) - - def test_convert_section(self, daspy_section, format_name): - """Test that the base section can be converted to all formats.""" - out = convert(daspy_section, to=format_name) - assert isinstance(out, NAME_CLASS_MAP[format_name]) - - class TestDASCorePatch: """Test suite for converting DASCore Patches.""" @@ -128,6 +102,11 @@ def test_to_xdas_time_coord(self, dascore_patch): time_coord2 = out.coords["time"].values assert np.all(time_coord1 == time_coord2) + def test_convert_patch_to_other(self, dascore_patch, format_name): + """Test that the base patch can be converted to all formats.""" + out = convert(dascore_patch, to=format_name) + assert isinstance(out, NAME_CLASS_MAP[format_name]) + class TestDASPySection: """Test suite for converting DASPy sections.""" @@ -145,10 +124,16 @@ def test_from_base_das(self, daspy_base_das, daspy_section): """Ensure the default section can round-trip.""" out = convert(daspy_base_das, "daspy.Section") # TODO these objects aren't equal but their strings are. - # Need to fix this. + # We need to fix this. + # assert out == daspy_section assert str(out) == str(daspy_section) assert np.all(out.data == daspy_section.data) + def test_convert_section(self, daspy_section, format_name): + """Test that the base section can be converted to all formats.""" + out = convert(daspy_section, to=format_name) + assert isinstance(out, NAME_CLASS_MAP[format_name]) + class TestXdasDataArray: """Tests for converting xdas DataArrays.""" @@ -162,12 +147,17 @@ def test_to_base_das(self, xdas_base_das): """Ensure the example data_array can be converted to BaseDAS.""" assert isinstance(xdas_base_das, BaseDAS) + def test_convert_data_array_to_other(self, xdas_dataarray, format_name): + """Test that the base data array can be converted to all formats.""" + out = convert(xdas_dataarray, to=format_name) + assert isinstance(out, NAME_CLASS_MAP[format_name]) + def test_from_base_das(self, xdas_base_das, xdas_dataarray): """Ensure xdas DataArray can round trip.""" out = convert(xdas_base_das, "xdas.DataArray") assert np.all(out.data == xdas_dataarray.data) # TODO the str rep of coords are equal but not coords themselves. - # Need to look into this. + # We need to look into this. assert str(out.coords) == str(xdas_dataarray.coords) attr1, attr2 = out.attrs, xdas_dataarray.attrs assert attr1 == attr2 or (not attr1 and not attr2) @@ -197,7 +187,7 @@ def test_from_base_das(self, lightguide_base_das, lightguide_blast): out = convert(lightguide_base_das, "lightguide.Blast") # TODO here the objects also do not compare equal. Need to figure out # why. For now just do weaker checks. - # assert np.all(out.data == lightguide_blast.data) + # assert out == lightguide_blast assert out.start_time == lightguide_blast.start_time assert np.all(out.data == lightguide_blast.data) assert out.unit == lightguide_blast.unit @@ -205,6 +195,11 @@ def test_from_base_das(self, lightguide_base_das, lightguide_blast): assert out.start_channel == lightguide_blast.start_channel assert out.sampling_rate == lightguide_blast.sampling_rate + def test_convert_blast_to_other(self, lightguide_blast, format_name): + """Test that the base blast can be converted to all formats.""" + out = convert(lightguide_blast, to=format_name) + assert isinstance(out, NAME_CLASS_MAP[format_name]) + class TestConvert: """Generic tests for the convert function.""" From a478fa51dac0bd43c54c9ac50aca1c00dd915621 Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Tue, 17 Dec 2024 17:27:40 -0800 Subject: [PATCH 2/5] lint --- src/unidas.py | 5 +++-- test/test_unidas.py | 2 -- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/unidas.py b/src/unidas.py index f486fd6..0308fdd 100644 --- a/src/unidas.py +++ b/src/unidas.py @@ -17,7 +17,7 @@ import inspect import zoneinfo from collections import defaultdict, deque -from collections.abc import Mapping, Sequence, Sized +from collections.abc import Mapping, Sequence from dataclasses import dataclass from functools import cache, wraps from types import ModuleType @@ -41,6 +41,7 @@ # ------------------------ Utility functions + def optional_import(package_name: str) -> ModuleType: """ Import a module and return the module object if installed, else raise error. @@ -97,7 +98,7 @@ def decorator(func): return decorator -def get_class_key(object_class)->str: +def get_class_key(object_class) -> str: """ Get a string which defines the class's identifier. diff --git a/test/test_unidas.py b/test/test_unidas.py index 5b02b55..2aaeb8e 100644 --- a/test/test_unidas.py +++ b/test/test_unidas.py @@ -12,8 +12,6 @@ from unidas import BaseDAS, adapter, convert, optional_import from xdas.core.dataarray import DataArray -from test.conftest import dascore_patch - try: from lightguide.blast import Blast From c6d6e6033d6f81674ba89bf2ac8c6a3e9509807f Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Tue, 17 Dec 2024 20:35:55 -0800 Subject: [PATCH 3/5] fix return types --- src/unidas.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/unidas.py b/src/unidas.py index 0308fdd..cfd3698 100644 --- a/src/unidas.py +++ b/src/unidas.py @@ -723,9 +723,10 @@ def _decorator(obj, *args, **kwargs): conversion_class: Converter = Converter._registry[key] input_obj = convert(obj, to) func_out = func(input_obj, *args, **kwargs) + cls_out = obj if inspect.isclass(func_out) else type(func_out) # Sometimes a function can return a different type than its input # e.g., a dataframe. In this case just return output. - if not isinstance(func_out, cls): + if get_class_key(cls_out) != to: return func_out output_obj = convert(func_out, key) # Apply class specific logic to compensate for lossy conversion. From 3d821d801d8980425574c1d07d0fdafa2225b6f0 Mon Sep 17 00:00:00 2001 From: Derrick Chambers Date: Tue, 17 Dec 2024 20:41:17 -0800 Subject: [PATCH 4/5] add test for different output types --- test/test_unidas.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/test_unidas.py b/test/test_unidas.py index 2aaeb8e..59193cc 100644 --- a/test/test_unidas.py +++ b/test/test_unidas.py @@ -7,6 +7,7 @@ import dascore as dc import daspy import numpy as np +import pandas as pd import pytest import unidas from unidas import BaseDAS, adapter, convert, optional_import @@ -248,6 +249,17 @@ def my_patch_func(patch): assert new2.raw_function is my_patch_func.raw_function assert new.raw_function is my_patch_func.raw_function + def test_different_return_type(self, daspy_section): + """Ensure wrapped functions that return different types still work.""" + + @adapter("dascore.Patch") + def dummy_func(patch): + """Dummy function that returns dataframe.""" + return dc.spool(patch).get_contents() + + out = dummy_func(daspy_section) + assert isinstance(out, pd.DataFrame) + class TestIntegrations: """Tests for integrating different data structures.""" From 914a80b64182adab79d55b8c184b53caa0ce8d7e Mon Sep 17 00:00:00 2001 From: derrick chambers Date: Wed, 18 Dec 2024 06:51:27 -0800 Subject: [PATCH 5/5] bump version --- pyproject.toml | 2 +- src/unidas.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ae4cdd7..6144de9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ packages = ["src/unidas.py"] [project] name = "unidas" -version = "0.0.0" # Make sure to bump dascore.__version__ as well! +version = "0.0.1" # Make sure to bump dascore.__version__ as well! authors = [ { name="Derrick Chambers", email="chambers.ja.derrick@gmail.com" }, diff --git a/src/unidas.py b/src/unidas.py index cfd3698..f845ebd 100644 --- a/src/unidas.py +++ b/src/unidas.py @@ -6,7 +6,7 @@ # Unidas version indicator. When incrementing, be sure to update # pyproject.toml as well. -__version__ = "0.0.0" +__version__ = "0.0.1" # Explicitly defines unidas' public API. # https://peps.python.org/pep-0008/#public-and-internal-interfaces