From d42f919b5b0aac86f19937d4d4d02bf2e4d8bdbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michaela=20M=C3=BCller?= <51025211+mumichae@users.noreply.github.com> Date: Wed, 10 Jan 2024 00:33:50 +0100 Subject: [PATCH 1/3] Update dependencies (#396) * pin pandas 2 and adapt breaking code * update scanorama * set minimum python version to 3.8 --- .github/workflows/deployment.yml | 4 ++-- .github/workflows/test.yml | 8 ++++---- scib/metrics/cell_cycle.py | 8 ++++++-- scib/metrics/trajectory.py | 2 +- setup.cfg | 28 ++++++++++++++-------------- tests/conftest.py | 4 ++-- 6 files changed, 29 insertions(+), 25 deletions(-) diff --git a/.github/workflows/deployment.yml b/.github/workflows/deployment.yml index 40dcd0da..912eaf9d 100644 --- a/.github/workflows/deployment.yml +++ b/.github/workflows/deployment.yml @@ -15,10 +15,10 @@ jobs: with: fetch-depth: 0 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: '3.10' - name: Install pip dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cfdc02bd..5207f21c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,7 +14,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: '3.10' - uses: actions/cache@v3 with: @@ -32,7 +32,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python: [3.7, 3.9] + python: ['3.8', '3.10'] os: [ubuntu-latest, macos-latest] steps: @@ -74,7 +74,7 @@ jobs: strategy: matrix: r: [4.2] - python: [3.9] + python: ['3.10'] os: [ubuntu-latest] steps: - uses: actions/checkout@v3 @@ -112,7 +112,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python: [3.7, 3.9] + python: ['3.8', '3.10'] os: [ubuntu-latest] steps: diff --git a/scib/metrics/cell_cycle.py b/scib/metrics/cell_cycle.py index cc806378..b3d92845 100644 --- a/scib/metrics/cell_cycle.py +++ b/scib/metrics/cell_cycle.py @@ -122,8 +122,12 @@ def cell_cycle( if agg_func is None: return pd.DataFrame( - [batches, scores_before, scores_after, scores_final], - columns=["batch", "before", "after", "score"], + { + "batch": pd.Series(batches, dtype=str), + "before": pd.Series(scores_before, dtype=float), + "after": pd.Series(scores_after, dtype=float), + "score": pd.Series(scores_final, dtype=float), + } ) else: return agg_func(scores_final) diff --git a/scib/metrics/trajectory.py b/scib/metrics/trajectory.py index fb298be0..79faa80f 100644 --- a/scib/metrics/trajectory.py +++ b/scib/metrics/trajectory.py @@ -121,7 +121,7 @@ def get_root(adata_pre, adata_post, ct_key, pseudotime_key="dpt_pseudotime", dpt csgraph=adata_post.obsp["connectivities"], directed=False, return_labels=True ) - start_clust = adata_pre.obs.groupby([ct_key]).mean()[pseudotime_key].idxmin() + start_clust = adata_pre.obs.groupby(ct_key)[pseudotime_key].mean().idxmin() min_dpt = adata_pre.obs[adata_pre.obs[ct_key] == start_clust].index which_max_neigh = ( adata_post.obs["neighborhood"] diff --git a/setup.cfg b/setup.cfg index dc521b52..1ba96b16 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,36 +17,36 @@ author = Malte D. Luecken, Maren Buettner, Daniel C. Strobl, Michaela F. Mueller author_email = malte.luecken@helmholtz-muenchen.de, michaela.mueller@helmholtz-muenchen.de license = MIT url = https://github.com/theislab/scib -project_urls = +project_urls = Pipeline = https://github.com/theislab/scib-pipeline Reproducibility = https://theislab.github.io/scib-reproducibility Bug Tracker = https://github.com/theislab/scib/issues -keywords = +keywords = benchmarking single cell data integration -classifiers = +classifiers = Development Status :: 3 - Alpha Intended Audience :: Developers Intended Audience :: Science/Research Topic :: Software Development :: Build Tools License :: OSI Approved :: MIT License Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 [bdist_wheel] build_number = 1 [options] -packages = +packages = scib scib.metrics -python_requires = >=3.7 -install_requires = +python_requires = >=3.8 +install_requires = numpy - pandas<2 + pandas>=2 seaborn matplotlib numba @@ -65,7 +65,7 @@ install_requires = zip_safe = False [options.package_data] -scib = +scib = resources/*.txt knn_graph/* @@ -76,10 +76,10 @@ dev = build; twine; isort; bump2version; pre-commit docs = sphinx; sphinx_rtd_theme; myst_parser; sphinx-automodapi louvain = python-igraph; louvain>=0.8 bbknn = bbknn ==1.3.9 -scanorama = scanorama ==1.7.0 +scanorama = scanorama >=1.7.4 mnn = mnnpy ==0.1.9.5 scgen = scgen >=2.1.0 -scvi = scvi-tools >=0.16.1 +scvi = scvi-tools >=0.16 trvae = trvae ==1.1.2 trvaep = trvaep ==0.1.0 desc = desc ==2.0.3 @@ -95,7 +95,7 @@ skip_glob = docs/* line-length = 120 target-version = py38 include = \.pyi?$ -exclude = +exclude = .eggs .git .venv @@ -104,7 +104,7 @@ exclude = [flake8] max-line-length = 88 -ignore = +ignore = W503 W504 E501 @@ -126,7 +126,7 @@ ignore = RST304 C408 exclude = .git,__pycache__,build,docs/_build,dist -per-file-ignores = +per-file-ignores = scib/*: D tests/*: D */__init__.py: F401 diff --git a/tests/conftest.py b/tests/conftest.py index 2aa8f769..7195a252 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,8 +44,8 @@ def adata_pbmc_template(): adata_concat = adata_ref.concatenate(adata, batch_categories=["ref", "new"]) adata_concat.obs.louvain = adata_concat.obs.louvain.astype("category") # fix category ordering - adata_concat.obs.louvain.cat.reorder_categories( - adata_ref.obs.louvain.cat.categories, inplace=True + adata_concat.obs["louvain"] = adata_concat.obs["louvain"].cat.set_categories( + adata_ref.obs["louvain"].cat.categories ) adata_concat.obs["celltype"] = adata_concat.obs["louvain"] From c17e22139cc538eed48e60f749e86afe9e03a446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michaela=20M=C3=BCller?= <51025211+mumichae@users.noreply.github.com> Date: Sun, 31 Mar 2024 19:58:33 +0200 Subject: [PATCH 2/3] Fix argument deprecation (#404) * use renamed_arg instead of deprecated_arg_names * remove python 3.8 from github actions * use np.array instead of np.matrix when densifying * test up to python 3.11 * more verbose assertion * change scanvi score for testing * change scvi score for testing * remove integration from code coverage * less exact test for scvi and scanvi --- .github/workflows/test.yml | 6 ++-- scib/_package_tools.py | 34 ++++++++++++++++++++++ scib/metrics/ari.py | 9 ++++-- scib/metrics/highly_variable_genes.py | 8 +++-- scib/metrics/nmi.py | 12 +++++--- scib/metrics/pcr.py | 2 +- scib/metrics/silhouette.py | 10 +++++-- scib/utils.py | 2 +- tests/common.py | 2 +- tests/conftest.py | 7 +---- tests/integration/test_scanvi.py | 2 +- tests/integration/test_scvi.py | 2 +- tests/metrics/test_beyond_label_metrics.py | 1 + 13 files changed, 72 insertions(+), 25 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5207f21c..b31d2f3a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,7 +32,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python: ['3.8', '3.10'] + python: ['3.9', '3.11'] os: [ubuntu-latest, macos-latest] steps: @@ -112,7 +112,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python: ['3.8', '3.10'] + python: ['3.9', '3.11'] os: [ubuntu-latest] steps: @@ -140,7 +140,7 @@ jobs: upload-codecov: - needs: [metrics, rpy2, integration] + needs: [metrics, rpy2] runs-on: ubuntu-latest steps: diff --git a/scib/_package_tools.py b/scib/_package_tools.py index 2744e951..0f42e00f 100644 --- a/scib/_package_tools.py +++ b/scib/_package_tools.py @@ -28,3 +28,37 @@ def rename_func(function, new_name): if callable(function): function = wrap_func_naming(function, new_name) setattr(inspect.getmodule(function), new_name, function) + + +def renamed_arg(old_name, new_name, *, pos_0: bool = False): + """ + Taken from: https://github.com/scverse/scanpy/blob/214e05bdc54df61c520dc563ab39b7780e6d3358/scanpy/_utils/__init__.py#L130C1-L157C21 + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + if old_name in kwargs: + f_name = func.__name__ + pos_str = ( + ( + f" at first position. Call it as `{f_name}(val, ...)` " + f"instead of `{f_name}({old_name}=val, ...)`" + ) + if pos_0 + else "" + ) + msg = ( + f"In function `{f_name}`, argument `{old_name}` " + f"was renamed to `{new_name}`{pos_str}." + ) + warnings.warn(msg, FutureWarning, stacklevel=3) + if pos_0: + args = (kwargs.pop(old_name), *args) + else: + kwargs[new_name] = kwargs.pop(old_name) + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/scib/metrics/ari.py b/scib/metrics/ari.py index dea11a22..3ab3b00d 100644 --- a/scib/metrics/ari.py +++ b/scib/metrics/ari.py @@ -1,13 +1,18 @@ import numpy as np import pandas as pd import scipy.special -from scanpy._utils import deprecated_arg_names from sklearn.metrics.cluster import adjusted_rand_score +try: + from scanpy._utils import renamed_arg +except ImportError: + from .._package_tools import renamed_arg + from ..utils import check_adata, check_batch -@deprecated_arg_names({"group1": "cluster_key", "group2": "label_key"}) +@renamed_arg("group1", "cluster_key") +@renamed_arg("group2", "label_key") def ari(adata, cluster_key, label_key, implementation=None): """Adjusted Rand Index diff --git a/scib/metrics/highly_variable_genes.py b/scib/metrics/highly_variable_genes.py index 2131e970..b12dc111 100644 --- a/scib/metrics/highly_variable_genes.py +++ b/scib/metrics/highly_variable_genes.py @@ -1,6 +1,10 @@ import numpy as np import scanpy as sc -from scanpy._utils import deprecated_arg_names + +try: + from scanpy._utils import renamed_arg +except ImportError: + from .._package_tools import renamed_arg from ..utils import split_batches @@ -36,7 +40,7 @@ def precompute_hvg_batch(adata, batch, features, n_hvg=500, save_hvg=False): return hvg_dir -@deprecated_arg_names({"batch": "batch_key"}) +@renamed_arg("batch", "batch_key") def hvg_overlap(adata_pre, adata_post, batch_key, n_hvg=500, verbose=False): """Highly variable gene overlap diff --git a/scib/metrics/nmi.py b/scib/metrics/nmi.py index 0d2c55dc..7867f81b 100644 --- a/scib/metrics/nmi.py +++ b/scib/metrics/nmi.py @@ -1,15 +1,19 @@ import os import subprocess -from scanpy._utils import deprecated_arg_names from sklearn.metrics.cluster import normalized_mutual_info_score +try: + from scanpy._utils import renamed_arg +except ImportError: + from .._package_tools import renamed_arg + from ..utils import check_adata, check_batch -@deprecated_arg_names( - {"group1": "cluster_key", "group2": "label_key", "method": "implementation"} -) +@renamed_arg("group1", "cluster_key") +@renamed_arg("group2", "label_key") +@renamed_arg("method", "implementation") def nmi(adata, cluster_key, label_key, implementation="arithmetic", nmi_dir=None): """Normalized mutual information diff --git a/scib/metrics/pcr.py b/scib/metrics/pcr.py index a61277ea..abb0c5a9 100644 --- a/scib/metrics/pcr.py +++ b/scib/metrics/pcr.py @@ -190,7 +190,7 @@ def pc_regression( svd_solver = "full" # convert to dense bc 'full' is not available for sparse matrices if sparse.issparse(matrix): - matrix = matrix.todense() + matrix = matrix.toarray() if verbose: print("compute PCA") diff --git a/scib/metrics/silhouette.py b/scib/metrics/silhouette.py index b8c42721..8d44b3aa 100644 --- a/scib/metrics/silhouette.py +++ b/scib/metrics/silhouette.py @@ -1,10 +1,14 @@ import numpy as np import pandas as pd -from scanpy._utils import deprecated_arg_names from sklearn.metrics.cluster import silhouette_samples, silhouette_score +try: + from scanpy._utils import renamed_arg +except ImportError: + from .._package_tools import renamed_arg -@deprecated_arg_names({"group_key": "label_key"}) + +@renamed_arg("group_key", "label_key") def silhouette(adata, label_key, embed, metric="euclidean", scale=True): """Average silhouette width (ASW) @@ -50,7 +54,7 @@ def silhouette(adata, label_key, embed, metric="euclidean", scale=True): return asw -@deprecated_arg_names({"group_key": "label_key"}) +@renamed_arg("group_key", "label_key") def silhouette_batch( adata, batch_key, diff --git a/scib/utils.py b/scib/utils.py index a7f744f1..d4781452 100644 --- a/scib/utils.py +++ b/scib/utils.py @@ -76,4 +76,4 @@ def todense(adata): import scipy if isinstance(adata.X, scipy.sparse.csr_matrix): - adata.X = adata.X.todense() + adata.X = adata.X.toarray() diff --git a/tests/common.py b/tests/common.py index f45834ca..29e824d0 100644 --- a/tests/common.py +++ b/tests/common.py @@ -12,7 +12,7 @@ def assert_near_exact(x, y, diff=1e-5): - assert abs(x - y) <= diff + assert abs(x - y) <= diff, f"{x} != {y} with error margin {diff}" def create_if_missing(dir): diff --git a/tests/conftest.py b/tests/conftest.py index 7195a252..759dafe0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,12 +23,7 @@ def adata_paul15_template(): @pytest.fixture(scope="session") def adata_pbmc_template(): - # adata_ref = sc.datasets.pbmc3k_processed() - # quick fix for broken dataset paths, should be removed with scanpy>=1.6.0 - adata_ref = sc.read( - "pbmc3k_processed.h5ad", - backup_url="https://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad", - ) + adata_ref = sc.datasets.pbmc3k_processed() adata = sc.datasets.pbmc68k_reduced() var_names = adata_ref.var_names.intersection(adata.var_names) diff --git a/tests/integration/test_scanvi.py b/tests/integration/test_scanvi.py index b61b5358..1aa71791 100644 --- a/tests/integration/test_scanvi.py +++ b/tests/integration/test_scanvi.py @@ -12,4 +12,4 @@ def test_scanvi(adata_paul15_template): ) score = scib.me.graph_connectivity(adata, label_key="celltype") - assert_near_exact(score, 0.9834078129657216, 1e-2) + assert_near_exact(score, 1.0, 1e-1) diff --git a/tests/integration/test_scvi.py b/tests/integration/test_scvi.py index 98989d4e..6e41ac80 100644 --- a/tests/integration/test_scvi.py +++ b/tests/integration/test_scvi.py @@ -10,4 +10,4 @@ def test_scvi(adata_paul15_template): ) score = scib.me.graph_connectivity(adata, label_key="celltype") - assert_near_exact(score, 0.9684638088694193, 1e-2) + assert_near_exact(score, 0.96, 1e-1) diff --git a/tests/metrics/test_beyond_label_metrics.py b/tests/metrics/test_beyond_label_metrics.py index 7776aa23..b27ba2d7 100644 --- a/tests/metrics/test_beyond_label_metrics.py +++ b/tests/metrics/test_beyond_label_metrics.py @@ -28,6 +28,7 @@ def test_cell_cycle_sparse(adata_paul15): # sparse matrix adata.X = csr_matrix(adata.X) + adata_int.X = csr_matrix(adata.X) # only final score score = scib.me.cell_cycle( From 59ae6eee5e611d9d3db067685ec96c28804e9127 Mon Sep 17 00:00:00 2001 From: Michaela Mueller Date: Mon, 1 Apr 2024 17:51:43 +0200 Subject: [PATCH 3/3] =?UTF-8?q?Bump=20version:=201.1.4=20=E2=86=92=201.1.5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- VERSION.txt | 2 +- setup.cfg | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/VERSION.txt b/VERSION.txt index 65087b4f..e25d8d9f 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -1.1.4 +1.1.5 diff --git a/setup.cfg b/setup.cfg index 1ba96b16..e82d0c17 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.1.4 +current_version = 1.1.5 commit = True tag = True @@ -17,15 +17,15 @@ author = Malte D. Luecken, Maren Buettner, Daniel C. Strobl, Michaela F. Mueller author_email = malte.luecken@helmholtz-muenchen.de, michaela.mueller@helmholtz-muenchen.de license = MIT url = https://github.com/theislab/scib -project_urls = +project_urls = Pipeline = https://github.com/theislab/scib-pipeline Reproducibility = https://theislab.github.io/scib-reproducibility Bug Tracker = https://github.com/theislab/scib/issues -keywords = +keywords = benchmarking single cell data integration -classifiers = +classifiers = Development Status :: 3 - Alpha Intended Audience :: Developers Intended Audience :: Science/Research @@ -40,11 +40,11 @@ classifiers = build_number = 1 [options] -packages = +packages = scib scib.metrics python_requires = >=3.8 -install_requires = +install_requires = numpy pandas>=2 seaborn @@ -65,7 +65,7 @@ install_requires = zip_safe = False [options.package_data] -scib = +scib = resources/*.txt knn_graph/* @@ -95,7 +95,7 @@ skip_glob = docs/* line-length = 120 target-version = py38 include = \.pyi?$ -exclude = +exclude = .eggs .git .venv @@ -104,7 +104,7 @@ exclude = [flake8] max-line-length = 88 -ignore = +ignore = W503 W504 E501 @@ -126,7 +126,7 @@ ignore = RST304 C408 exclude = .git,__pycache__,build,docs/_build,dist -per-file-ignores = +per-file-ignores = scib/*: D tests/*: D */__init__.py: F401