From 20878a3f1a2ce97e38664e6c07cc2ddbe009a47b Mon Sep 17 00:00:00 2001 From: eecavanna Date: Wed, 5 Feb 2025 13:23:35 -0800 Subject: [PATCH 01/11] WIP: Implement spell checker GHA workflow (it overlooks some files) --- .github/workflows/assemble-website.yml | 7 ++- .github/workflows/check-spelling.yml | 51 +++++++++++++++++++ .../spellcheck_allow_list.txt | 36 +++++++++++++ 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/check-spelling.yml create mode 100644 .github/workflows/supporting_files/spellcheck_allow_list.txt diff --git a/.github/workflows/assemble-website.yml b/.github/workflows/assemble-website.yml index da6d4ef6..a08a2fd9 100644 --- a/.github/workflows/assemble-website.yml +++ b/.github/workflows/assemble-website.yml @@ -67,9 +67,14 @@ jobs: # artifact; e.g., spell checkers and link checkers. name: github-pages - # Use existing workflow(s) to check the file tree for broken links. + # Use existing workflows to check the file tree for broken links and spelling errors. check-links: name: Check links uses: ./.github/workflows/check-links.yml needs: - assemble + check-spelling: + name: Check spelling + uses: ./.github/workflows/check-spelling.yml + needs: + - assemble diff --git a/.github/workflows/check-spelling.yml b/.github/workflows/check-spelling.yml new file mode 100644 index 00000000..fd0a4230 --- /dev/null +++ b/.github/workflows/check-spelling.yml @@ -0,0 +1,51 @@ +# This GitHub Actions workflow checks spelling in HTML files and creates a GitHub Issue if it finds misspelled words. +# Reference: https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions +name: Check spelling + +on: + # Allow this workflow to be called by other workflows. + # Reference: https://docs.github.com/en/actions/using-workflows/reusing-workflows + workflow_call: { } + +jobs: + check-spelling: + name: Check spelling + runs-on: ubuntu-latest + steps: + # Check out the commit so that we have access to the "allow list" residing in the repository. + - name: Check out commit + uses: actions/checkout@v4 + - name: Get website file tree + uses: actions/download-artifact@v4 # docs: https://github.com/actions/download-artifact + with: + name: github-pages + path: _downloads + - name: Un-tar the archive + run: | + pwd + mkdir -p _build/html + tar -xvf _downloads/artifact.tar -C _build/html + ls -lR _build/html + - name: Create directory for report + run: mkdir -p _spellcheck_reports + - name: Run Spellchecker CLI + id: spellchecker-cli + uses: tbroadley/spellchecker-cli-action@v1.1.1 # docs: https://github.com/marketplace/actions/spellchecker-cli?version=v1.1.1 + continue-on-error: true + with: + # Note: The Spellchecker CLI uses Globby to resolve these patterns. + # Reference: https://github.com/tbroadley/spellchecker-cli?tab=readme-ov-file#globs + files: _build/html/**/*.html + dictionaries: .github/workflows/supporting_files/spellcheck_allow_list.txt + plugins: spell indefinite-article repeated-words syntax-mentions syntax-urls + reports: _spellcheck_reports/spellcheck_report.json + - name: Dump spell check report + run: cat _spellcheck_reports/spellcheck_report.json + # Use "Spellchecker CLI Summary" to report the spelling errors on GitHub. + # Reference: https://github.com/austenstone/spellchecker-cli-action-summary + - name: Report spelling errors + uses: austenstone/spellchecker-cli-action-summary@v1.0 + with: + file-json: _spellcheck_reports/spellcheck_report.json + # TODO: Be more specific about which files have changed. + files-changed: _build/html/**/*.html diff --git a/.github/workflows/supporting_files/spellcheck_allow_list.txt b/.github/workflows/supporting_files/spellcheck_allow_list.txt new file mode 100644 index 00000000..7078e871 --- /dev/null +++ b/.github/workflows/supporting_files/spellcheck_allow_list.txt @@ -0,0 +1,36 @@ +Bioinformatics +bioinformatics +Biosample +biosample +Changesheets +changesheets +Community-centric +community-centric +Diátaxis +Globus +IMG +JSON +json +Lipidomics +lipidomics +Metabolomics +metabolomics +MetaG +Metagenome +metagenome +Metagenomic +metagenomic +Metaproteomic +MetaT +Metatranscriptome +metatranscriptome +Metatranscriptomic +metatranscriptomic +Microbiome +microbiome +Mgt +MkDocs +multi-omics +NMDC +NMDC's +nmdc-runtime \ No newline at end of file From 110efae27d3decefb8840aa98215b1a78b48b1b2 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Thu, 20 Feb 2025 00:09:20 -0800 Subject: [PATCH 02/11] Switch from Node-based `spellchecker-cli` to Python-based `spellcheck` --- .github/workflows/check-spelling.yml | 33 ++++++------------- .../workflows/supporting_files/spellcheck.yml | 9 +++++ 2 files changed, 19 insertions(+), 23 deletions(-) create mode 100644 .github/workflows/supporting_files/spellcheck.yml diff --git a/.github/workflows/check-spelling.yml b/.github/workflows/check-spelling.yml index fd0a4230..3a4d8795 100644 --- a/.github/workflows/check-spelling.yml +++ b/.github/workflows/check-spelling.yml @@ -12,7 +12,7 @@ jobs: name: Check spelling runs-on: ubuntu-latest steps: - # Check out the commit so that we have access to the "allow list" residing in the repository. + # Check out the commit so that we have access to the spellcheck configuration file residing in the repository. - name: Check out commit uses: actions/checkout@v4 - name: Get website file tree @@ -26,26 +26,13 @@ jobs: mkdir -p _build/html tar -xvf _downloads/artifact.tar -C _build/html ls -lR _build/html - - name: Create directory for report - run: mkdir -p _spellcheck_reports - - name: Run Spellchecker CLI - id: spellchecker-cli - uses: tbroadley/spellchecker-cli-action@v1.1.1 # docs: https://github.com/marketplace/actions/spellchecker-cli?version=v1.1.1 - continue-on-error: true + # Use `GitHub Spellcheck Action`, from the GHA Marketplace, to check spelling. + # Reference: https://github.com/marketplace/actions/github-spellcheck-action + - name: Dump Spellcheck configuration file + run: cat .github/workflows/supporting_files/spellcheck.yml + - name: Run Spellcheck + uses: rojopolis/spellcheck-github-actions@0.47.0 with: - # Note: The Spellchecker CLI uses Globby to resolve these patterns. - # Reference: https://github.com/tbroadley/spellchecker-cli?tab=readme-ov-file#globs - files: _build/html/**/*.html - dictionaries: .github/workflows/supporting_files/spellcheck_allow_list.txt - plugins: spell indefinite-article repeated-words syntax-mentions syntax-urls - reports: _spellcheck_reports/spellcheck_report.json - - name: Dump spell check report - run: cat _spellcheck_reports/spellcheck_report.json - # Use "Spellchecker CLI Summary" to report the spelling errors on GitHub. - # Reference: https://github.com/austenstone/spellchecker-cli-action-summary - - name: Report spelling errors - uses: austenstone/spellchecker-cli-action-summary@v1.0 - with: - file-json: _spellcheck_reports/spellcheck_report.json - # TODO: Be more specific about which files have changed. - files-changed: _build/html/**/*.html + config_path: .github/workflows/supporting_files/spellcheck.yml + source_files: _build/html/**/*.html + task_name: HTML diff --git a/.github/workflows/supporting_files/spellcheck.yml b/.github/workflows/supporting_files/spellcheck.yml new file mode 100644 index 00000000..f4e4576d --- /dev/null +++ b/.github/workflows/supporting_files/spellcheck.yml @@ -0,0 +1,9 @@ +matrix: +- name: HTML + aspell: { lang: en } + pipeline: + # Docs: https://github.com/facelessuser/pyspelling/blob/master/pyspelling/filters/html.py + - pyspelling.filters.html: + comments: false + ignores: [ code, pre ] + default_encoding: utf-8 From a3ffac858028e665f5fecdf93aac29ac891ea350 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Thu, 20 Feb 2025 00:18:43 -0800 Subject: [PATCH 03/11] Move target file list from GHA workflow to spellcheck config file --- .github/workflows/check-spelling.yml | 1 - .github/workflows/supporting_files/spellcheck.yml | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-spelling.yml b/.github/workflows/check-spelling.yml index 3a4d8795..4a351b1e 100644 --- a/.github/workflows/check-spelling.yml +++ b/.github/workflows/check-spelling.yml @@ -34,5 +34,4 @@ jobs: uses: rojopolis/spellcheck-github-actions@0.47.0 with: config_path: .github/workflows/supporting_files/spellcheck.yml - source_files: _build/html/**/*.html task_name: HTML diff --git a/.github/workflows/supporting_files/spellcheck.yml b/.github/workflows/supporting_files/spellcheck.yml index f4e4576d..3961cbec 100644 --- a/.github/workflows/supporting_files/spellcheck.yml +++ b/.github/workflows/supporting_files/spellcheck.yml @@ -1,5 +1,6 @@ matrix: - name: HTML + sources: [ "_build/html/**/*.html" ] aspell: { lang: en } pipeline: # Docs: https://github.com/facelessuser/pyspelling/blob/master/pyspelling/filters/html.py From 641146fc75751594a0630a49baa84c5cb9d0957e Mon Sep 17 00:00:00 2001 From: eecavanna Date: Thu, 20 Feb 2025 00:29:14 -0800 Subject: [PATCH 04/11] Configure spellcheck to use custom dictionary --- .github/workflows/supporting_files/spellcheck.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/supporting_files/spellcheck.yml b/.github/workflows/supporting_files/spellcheck.yml index 3961cbec..f56deaaa 100644 --- a/.github/workflows/supporting_files/spellcheck.yml +++ b/.github/workflows/supporting_files/spellcheck.yml @@ -1,6 +1,11 @@ matrix: - name: HTML - sources: [ "_build/html/**/*.html" ] + sources: + - _build/html/**/*.html + dictionary: + wordlists: + - .github/workflows/supporting_files/spellcheck_allow_list.txt + encoding: utf-8 aspell: { lang: en } pipeline: # Docs: https://github.com/facelessuser/pyspelling/blob/master/pyspelling/filters/html.py From c1729964a4fd9993e68d3aa71c308c851ef966be Mon Sep 17 00:00:00 2001 From: eecavanna Date: Thu, 20 Feb 2025 00:58:27 -0800 Subject: [PATCH 05/11] Configure spellcheck to avoid spellchecking URLs --- .github/workflows/supporting_files/spellcheck.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/supporting_files/spellcheck.yml b/.github/workflows/supporting_files/spellcheck.yml index f56deaaa..cf80e2a0 100644 --- a/.github/workflows/supporting_files/spellcheck.yml +++ b/.github/workflows/supporting_files/spellcheck.yml @@ -8,8 +8,10 @@ matrix: encoding: utf-8 aspell: { lang: en } pipeline: - # Docs: https://github.com/facelessuser/pyspelling/blob/master/pyspelling/filters/html.py + # Docs: https://facelessuser.github.io/pyspelling/filters/html/ - pyspelling.filters.html: comments: false ignores: [ code, pre ] + # Docs: https://facelessuser.github.io/pyspelling/filters/url/ + - pyspelling.filters.url: {} default_encoding: utf-8 From 85461b82ed8a135f5175e1e71fd0c686019d0173 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Thu, 20 Feb 2025 17:16:34 -0800 Subject: [PATCH 06/11] Update spellcheck "allow list" based upon latest spellcheck result --- .../spellcheck_allow_list.txt | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/.github/workflows/supporting_files/spellcheck_allow_list.txt b/.github/workflows/supporting_files/spellcheck_allow_list.txt index 7078e871..7a4d98ed 100644 --- a/.github/workflows/supporting_files/spellcheck_allow_list.txt +++ b/.github/workflows/supporting_files/spellcheck_allow_list.txt @@ -1,16 +1,37 @@ +APK +APIs Bioinformatics bioinformatics +Biomes +biomes Biosample biosample +Biosamples +biosamples Changesheets changesheets Community-centric community-centric +Dagit +DataHarmonizer Diátaxis +DOI +ENVO +EnvO +ETL +FASTA +FASTQ Globus +GSC IMG JSON json +KEGG +LANL +LANL's +LBNL +LBNL's +LinkML Lipidomics lipidomics Metabolomics @@ -18,19 +39,42 @@ metabolomics MetaG Metagenome metagenome +Metagenomes +metagenomes Metagenomic metagenomic +Metagenomics +metagenomics Metaproteomic +Metaproteomics MetaT Metatranscriptome metatranscriptome Metatranscriptomic metatranscriptomic +Metatranscriptomics +metatranscriptomics Microbiome microbiome +Microbiomes +microbiomes +MIxS Mgt MkDocs multi-omics +NCBI NMDC NMDC's -nmdc-runtime \ No newline at end of file +NOM +nmdc-runtime +nmdc-schema +nmdc-server +Omics +omics +PNNL +PNNL's +Proteomics +proteomics +RESTful +Sankey +TestFlight \ No newline at end of file From eb7dbe36fd794dd1aa9721a578a55ac049a3bd15 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Thu, 20 Feb 2025 17:22:02 -0800 Subject: [PATCH 07/11] Update spellcheck "allow list" based upon spellcheck result TODO: Consider applying this "allow list" in a case-insensitive way. --- .../supporting_files/spellcheck_allow_list.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/supporting_files/spellcheck_allow_list.txt b/.github/workflows/supporting_files/spellcheck_allow_list.txt index 7a4d98ed..43158c06 100644 --- a/.github/workflows/supporting_files/spellcheck_allow_list.txt +++ b/.github/workflows/supporting_files/spellcheck_allow_list.txt @@ -1,5 +1,7 @@ +acyclic APK APIs +AST Bioinformatics bioinformatics Biomes @@ -13,7 +15,10 @@ changesheets Community-centric community-centric Dagit +Dagster +Dagster's DataHarmonizer +DataObject Diátaxis DOI ENVO @@ -63,6 +68,7 @@ Mgt MkDocs multi-omics NCBI +NERSC NMDC NMDC's NOM @@ -73,8 +79,12 @@ Omics omics PNNL PNNL's +programmatically Proteomics proteomics RESTful +Runtime +Runtime's Sankey -TestFlight \ No newline at end of file +TestFlight +YAML \ No newline at end of file From 3d5793d70fcbd50bf9d95101496740d7304e87c1 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Thu, 20 Feb 2025 17:45:43 -0800 Subject: [PATCH 08/11] Update spellcheck "allow list" based upon latest spellcheck result --- .../spellcheck_allow_list.txt | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/.github/workflows/supporting_files/spellcheck_allow_list.txt b/.github/workflows/supporting_files/spellcheck_allow_list.txt index 43158c06..60008a07 100644 --- a/.github/workflows/supporting_files/spellcheck_allow_list.txt +++ b/.github/workflows/supporting_files/spellcheck_allow_list.txt @@ -1,7 +1,12 @@ acyclic +ADR APK APIs +API's +Archaeal +archaeal AST +BBTools Bioinformatics bioinformatics Biomes @@ -14,24 +19,42 @@ Changesheets changesheets Community-centric community-centric +Contig +contig +Contigs +contigs +CSV Dagit Dagster Dagster's DataHarmonizer DataObject +DataObjects Diátaxis DOI +EMSL +EMSL's ENVO EnvO ETL +FastAPI FASTA FASTQ Globus +GOTTCHA +GPL GSC +Heatmap +heatmap +Heatmaps +heatmaps IMG +JGI JSON json +Jupyter KEGG +Kubernetes LANL LANL's LBNL @@ -67,6 +90,7 @@ MIxS Mgt MkDocs multi-omics +namespace NCBI NERSC NMDC @@ -77,14 +101,30 @@ nmdc-schema nmdc-server Omics omics +OmicsProcessing +Ontologies +ontologies +Ontologists +ontologists PNNL PNNL's programmatically Proteomics proteomics +PyPI +repo RESTful Runtime Runtime's Sankey +SQLAlchemy +Spectrometry +spectrometry TestFlight +TODO +TOML +TSV +Vue +Vuetify +WDL YAML \ No newline at end of file From f9e463bd7307f404230e8384a257f23afa36381d Mon Sep 17 00:00:00 2001 From: eecavanna Date: Sat, 22 Feb 2025 21:45:01 -0800 Subject: [PATCH 09/11] Configure spellchecker to ignore code cells in Jupyter notebooks --- .github/workflows/supporting_files/spellcheck.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/supporting_files/spellcheck.yml b/.github/workflows/supporting_files/spellcheck.yml index cf80e2a0..487e339c 100644 --- a/.github/workflows/supporting_files/spellcheck.yml +++ b/.github/workflows/supporting_files/spellcheck.yml @@ -11,7 +11,12 @@ matrix: # Docs: https://facelessuser.github.io/pyspelling/filters/html/ - pyspelling.filters.html: comments: false - ignores: [ code, pre ] + # CSS selectors supported: https://facelessuser.github.io/soupsieve/ + ignores: + - code + - pre + # Ignore code cells in Jupyter notebooks; i.e., `.jp-CodeCell` elements within `.jupyter-wrapper` elements. + - .jupyter-wrapper .jp-CodeCell # Docs: https://facelessuser.github.io/pyspelling/filters/url/ - pyspelling.filters.url: {} default_encoding: utf-8 From 966e1e905a24148dc666f366a69f41395ade901f Mon Sep 17 00:00:00 2001 From: eecavanna Date: Sat, 22 Feb 2025 21:56:51 -0800 Subject: [PATCH 10/11] Update spellcheck "allow list" based upon spellcheck result --- .../workflows/supporting_files/spellcheck_allow_list.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/supporting_files/spellcheck_allow_list.txt b/.github/workflows/supporting_files/spellcheck_allow_list.txt index 60008a07..6853ea98 100644 --- a/.github/workflows/supporting_files/spellcheck_allow_list.txt +++ b/.github/workflows/supporting_files/spellcheck_allow_list.txt @@ -36,14 +36,17 @@ EMSL EMSL's ENVO EnvO +ESS ETL FastAPI FASTA FASTQ +funder Globus GOTTCHA GPL GSC +GSC's Heatmap heatmap Heatmaps @@ -53,6 +56,7 @@ JGI JSON json Jupyter +KBase KEGG Kubernetes LANL @@ -120,6 +124,9 @@ Sankey SQLAlchemy Spectrometry spectrometry +Submitter +submitter +submitter's TestFlight TODO TOML @@ -127,4 +134,6 @@ TSV Vue Vuetify WDL +XLS +XLSX YAML \ No newline at end of file From bef3ccf9b41d3bf48e65dd25a88a22f708a3560b Mon Sep 17 00:00:00 2001 From: eecavanna Date: Wed, 26 Feb 2025 21:21:06 -0800 Subject: [PATCH 11/11] Remove lowercase "json" from allow list --- .github/workflows/supporting_files/spellcheck_allow_list.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/supporting_files/spellcheck_allow_list.txt b/.github/workflows/supporting_files/spellcheck_allow_list.txt index 6853ea98..d35511ba 100644 --- a/.github/workflows/supporting_files/spellcheck_allow_list.txt +++ b/.github/workflows/supporting_files/spellcheck_allow_list.txt @@ -54,7 +54,6 @@ heatmaps IMG JGI JSON -json Jupyter KBase KEGG