From f489cdd382ba0d0b3268023d675fe5a28b690cc5 Mon Sep 17 00:00:00 2001 From: Jannis Born Date: Sat, 3 May 2025 13:06:27 +0200 Subject: [PATCH 1/3] fix: Article IDs correctly extracted and tested --- .github/workflows/test.yml | 29 ++++++++++++++++++++++++ README.md | 11 +++++++++ pymed_paperscraper/__init__.py | 2 +- pymed_paperscraper/article.py | 2 +- pymed_paperscraper/tests/test_article.py | 12 ++++++++++ 5 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 pymed_paperscraper/tests/test_article.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..308252a --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,29 @@ +--- +name: Source +on: [push, release] + +jobs: + test-source-install: + runs-on: ubuntu-latest + strategy: + max-parallel: 3 + matrix: + python-version: + - "3.10" + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Install package from source + run: pip install -e . + - name: Test package from source + run: | + python -c "import pymed_paperscraper" + python -m pytest pymed_paperscraper \ No newline at end of file diff --git a/README.md b/README.md index 3586dc5..f11bcdc 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,10 @@ pubmed = PubMed(tool="MyTool", email="my@email.address") results = pubmed.query("Some query", max_results=500) ``` +## Bugfixes compared to archived [`pymed`](https://github.com/gijswobben/pymed): +- Article IDs are correctly extracted [`pymed#22`](https://github.com/gijswobben/pymed/issues/22) +- Automatic retries if API is unresponsive/overloaded. Support for `max_tries` in `PubMed` class. + ## Notes on the API The original documentation of the PubMed API can be found here: [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/tools/developers/). PubMed Central kindly requests you to: @@ -35,3 +39,10 @@ The original documentation of the PubMed API can be found here: [PubMed Central] > - Include two parameters that help to identify your service or application to our servers > * _tool_ should be the name of the application, as a string value with no internal spaces, and > * _email_ should be the e-mail address of the maintainer of the tool, and should be a valid e-mail address. + +## Citation +If you use `pymed_paperscraper` in your work, please cite: +```bib +(Citation follows) +``` + diff --git a/pymed_paperscraper/__init__.py b/pymed_paperscraper/__init__.py index fb55515..3eb4764 100644 --- a/pymed_paperscraper/__init__.py +++ b/pymed_paperscraper/__init__.py @@ -1,4 +1,4 @@ from .api import PubMed __all__ = ["PubMed"] -__version__ = "1.0.3" +__version__ = "1.0.4" diff --git a/pymed_paperscraper/article.py b/pymed_paperscraper/article.py index 7047a11..91c62ca 100644 --- a/pymed_paperscraper/article.py +++ b/pymed_paperscraper/article.py @@ -43,7 +43,7 @@ def __init__( self.__setattr__(field, kwargs.get(field, None)) def _extractPubMedId(self: object, xml_element: TypeVar("Element")) -> str: - path = ".//ArticleId[@IdType='pubmed']" + path = ".//PubmedData/ArticleIdList/ArticleId[@IdType='pubmed']" return getContent(element=xml_element, path=path) def _extractTitle(self: object, xml_element: TypeVar("Element")) -> str: diff --git a/pymed_paperscraper/tests/test_article.py b/pymed_paperscraper/tests/test_article.py new file mode 100644 index 0000000..d330b7e --- /dev/null +++ b/pymed_paperscraper/tests/test_article.py @@ -0,0 +1,12 @@ +from pymed_paperscraper import PubMed + + +def test_unique_id(): + pubmed = PubMed(tool="MyTool", email="my@email.address") + query = '((Haliaeetus leucocephalus[Title/Abstract])) AND ((prey[Title/Abstract]) OR (diet[Title/Abstract]))' + results = pubmed.query(query, max_results=30) + + for r in results: + ids = r.pubmed_id.strip().split("\n") + print('org',r.pubmed_id, 'IDS', ids) + assert len(ids) == 1 \ No newline at end of file From 1fa03a543f1f0b7019e2cfca34ea4292f2453105 Mon Sep 17 00:00:00 2001 From: Jannis Born Date: Sat, 3 May 2025 13:07:47 +0200 Subject: [PATCH 2/3] ci: add pytest --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 308252a..c762dbc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,6 +20,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + pip install pytest pip install -r requirements.txt - name: Install package from source run: pip install -e . From fbc5a82fb78de4046b3267aedef075c62beaa686 Mon Sep 17 00:00:00 2001 From: Jannis Born Date: Mon, 5 May 2025 16:52:34 +0200 Subject: [PATCH 3/3] test: cleanup --- pymed_paperscraper/tests/test_article.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymed_paperscraper/tests/test_article.py b/pymed_paperscraper/tests/test_article.py index d330b7e..91a0e16 100644 --- a/pymed_paperscraper/tests/test_article.py +++ b/pymed_paperscraper/tests/test_article.py @@ -8,5 +8,4 @@ def test_unique_id(): for r in results: ids = r.pubmed_id.strip().split("\n") - print('org',r.pubmed_id, 'IDS', ids) assert len(ids) == 1 \ No newline at end of file