From f105d6b4ced505ff6d86bd7dfdd407bc57f9cc2e Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Sun, 6 Jul 2025 12:39:56 -0400
Subject: [PATCH 01/15] test

---
 tools | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools b/tools
index 9e692ae38..ecea6a3b0 160000
--- a/tools
+++ b/tools
@@ -1 +1 @@
-Subproject commit 9e692ae38c085776431da9f1633fcea7fc814440
+Subproject commit ecea6a3b0cff037e4b2f9406af1ee1cafbbc849e

From 0ba668360db35b579f70ba39b42c594240338c3e Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Mon, 7 Jul 2025 10:31:17 -0400
Subject: [PATCH 02/15] add rag24-doc-segmented for splade-v3 cached and
 splade-v3 onnx

---
 ...gmented-test-umbrela.splade-v3.cached.yaml | 56 +++++++++++++++++++
 ...segmented-test-umbrela.splade-v3.onnx.yaml | 55 ++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
 create mode 100644 src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml

diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
new file mode 100644
index 000000000..af999bee2
--- /dev/null
+++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
@@ -0,0 +1,56 @@
+---
+corpus: msmarco-v2.1-doc-segmented-splade-v3
+corpus_path: collections/msmarco/msmarco-v2.1-doc-segmented-splade-v3
+
+index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
+collection_class: JsonVectorCollection
+generator_class: DefaultLuceneDocumentGenerator
+index_threads: 24
+index_options: -impact -pretokenized -storeDocvectors
+index_stats: # TODO (issue #2870): need to update this section once the corpus is uploaded
+  documents: 8841823
+  documents (non-empty): 8841823
+  total terms: 46922883529
+
+metrics:
+  - metric: nDCG@20
+    command: bin/trec_eval
+    params: -c -m ndcg_cut.20
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@100
+    command: bin/trec_eval
+    params: -c -m ndcg_cut.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvString
+topics:
+  - name: "RAG 24: Test queries"
+    id: rag24.test
+    path: topics.rag24.test.splade-v3.tsv.gz
+    qrel: qrels.rag24.test-umbrela-all.txt
+
+# TODO (issue #2870): need to update this section once the index is uploaded and the encoded query is uploaded
+models:
+  - name: splade-v3-cached
+    display: SPLADE-v3
+    params: -impact -pretokenized -removeQuery -hits 1000
+    results:
+      nDCG@20:
+        - 0.2981
+      nDCG@100:
+        - 0.1782
+      R@100:
+        - 0.0742
diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml
new file mode 100644
index 000000000..50604c31f
--- /dev/null
+++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml
@@ -0,0 +1,55 @@
+---
+corpus: msmarco-v2.1-doc-segmented-splade-v3
+corpus_path: collections/msmarco/msmarco-v2.1-doc-segmented-splade-v3
+
+index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
+collection_class: JsonVectorCollection
+generator_class: DefaultLuceneDocumentGenerator
+index_threads: 24
+index_options: -impact -pretokenized -storeDocvectors
+index_stats: # TODO (issue #2870): need to update this section once the corpus is uploaded
+  documents: 8841823
+  documents (non-empty): 8841823
+  total terms: 46922883529
+
+metrics:
+  - metric: nDCG@20
+    command: bin/trec_eval
+    params: -c -m ndcg_cut.20
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@100
+    command: bin/trec_eval
+    params: -c -m ndcg_cut.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvString
+topics:
+  - name: "RAG 24: Test queries"
+    id: rag24.test
+    path: topics.rag24.test.txt
+    qrel: qrels.rag24.test-umbrela-all.txt
+
+models: # TODO (issue #2870): update this section once the index is uploaded
+  - name: splade-v3-onnx
+    display: SPLADE-v3
+    params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3
+    results:
+      nDCG@20:
+        - 0.2981
+      nDCG@100:
+        - 0.1782
+      R@100:
+        - 0.0742

From cf0ee32145de3e0b8e210a552419e9c04450edc7 Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Mon, 7 Jul 2025 10:35:57 -0400
Subject: [PATCH 03/15] update corpus path

---
 .../rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml      | 2 +-
 .../rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
index af999bee2..67a3349ae 100644
--- a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
+++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
@@ -1,6 +1,6 @@
 ---
 corpus: msmarco-v2.1-doc-segmented-splade-v3
-corpus_path: collections/msmarco/msmarco-v2.1-doc-segmented-splade-v3
+corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3
 
 index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
 collection_class: JsonVectorCollection
diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml
index 50604c31f..abfefabf9 100644
--- a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml
+++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml
@@ -1,6 +1,6 @@
 ---
 corpus: msmarco-v2.1-doc-segmented-splade-v3
-corpus_path: collections/msmarco/msmarco-v2.1-doc-segmented-splade-v3
+corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3
 
 index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
 collection_class: JsonVectorCollection

From bb5381996526e8dade711e2d05eacb538ba0b87d Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Mon, 7 Jul 2025 10:43:08 -0400
Subject: [PATCH 04/15] update git submodule

---
 tools | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools b/tools
index ecea6a3b0..9e692ae38 160000
--- a/tools
+++ b/tools
@@ -1 +1 @@
-Subproject commit ecea6a3b0cff037e4b2f9406af1ee1cafbbc849e
+Subproject commit 9e692ae38c085776431da9f1633fcea7fc814440

From e7f9536ba2cfeb575cdd10a5887a08997327d57d Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Tue, 8 Jul 2025 21:48:48 -0400
Subject: [PATCH 05/15] added msmarco v2.1 doc segmented splade-v3 bindings

---
 .../java/io/anserini/index/IndexInfo.java     | 14 +++++
 ...c-segmented-test.splade-v3.cached.template | 62 ++++++++++++++++++
 ...doc-segmented-test.splade-v3.onnx.template | 63 +++++++++++++++++++
 ...gmented-test-umbrela.splade-v3.cached.yaml | 17 +++--
 ...segmented-test-umbrela.splade-v3.onnx.yaml | 18 +++---
 .../io/anserini/index/PrebuiltIndexTest.java  |  2 +-
 6 files changed, 157 insertions(+), 19 deletions(-)
 create mode 100644 src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template
 create mode 100644 src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template

diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java
index 3246eb640..be1921db6 100644
--- a/src/main/java/io/anserini/index/IndexInfo.java
+++ b/src/main/java/io/anserini/index/IndexInfo.java
@@ -376,6 +376,20 @@ public enum IndexInfo {
       VectorQueryGenerator.class.getSimpleName(),
       "msmarco-v2.1-doc-segmented"),
 
+  MSMARCO_V21_DOC_SEGMENTED_SPLADE_V3("msmarco-v2.1-doc-segmented-splade-v3",
+     "Lucene impact index of the MS MARCO V2.1 segmented document corpus encoded by SPLADE v3.",
+     "lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3.20250707.4039c3.tar.gz",
+     "lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3.20250707.4039c3.README.md",
+     "MS MARCO V2.1 Segmented Doc",
+     "SPLADE v3",
+     new String[] {
+          "https://huggingface.co/datasets/castorini/prebuilt-indexes-msmarco-v2.1-doc-segmented/resolve/main/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3.20250707.4039c3.tar.gz" },
+     "75f677301833b4f3bf2c2c286be8879f",
+     IndexType.SPARSE_IMPACT,
+     SpladeV3Encoder.class.getSimpleName(),
+     BagOfWordsQueryGenerator.class.getSimpleName(),
+     "msmarco-v2.1-doc-segmented"),
+
   // BEIR: flat
   BEIR_V1_0_0_TREC_COVID_FLAT("beir-v1.0.0-trec-covid.flat",
       "Lucene inverted 'flat' index of BEIR (v1.0.0): TREC-COVID.",
diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template
new file mode 100644
index 000000000..3c52d7bb9
--- /dev/null
+++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template
@@ -0,0 +1,62 @@
+# Anserini Regressions: TREC 2024 RAG Track Test Topics
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries)
+
+This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
+These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
+See the following paper for more details:
+
+> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+## Indexing
+
+Sample indexing command:
+
+```
+${index_cmds}
+```
+
+The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the pre-encoded tokens.
+For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md).
+
+## Retrieval
+
+Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors.
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template
new file mode 100644
index 000000000..584f20ef6
--- /dev/null
+++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template
@@ -0,0 +1,63 @@
+# Anserini Regressions: TREC 2024 RAG Track Test Topics
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using ONNX to perform query encoding on the fly.
+
+Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
+These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
+See the following paper for more details:
+
+> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+## Indexing
+
+Typical indexing command:
+
+```
+${index_cmds}
+```
+
+The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus.
+
+For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md).
+
+## Retrieval
+
+Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors.
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
index 67a3349ae..e693a7584 100644
--- a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
+++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml
@@ -6,11 +6,11 @@ index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
 index_threads: 24
-index_options: -impact -pretokenized -storeDocvectors
-index_stats: # TODO (issue #2870): need to update this section once the corpus is uploaded
-  documents: 8841823
-  documents (non-empty): 8841823
-  total terms: 46922883529
+index_options: -impact -pretokenized
+index_stats:
+  documents: 113520750
+  documents (non-empty): 113520750
+  total terms: 866904601378
 
 metrics:
   - metric: nDCG@20
@@ -42,15 +42,14 @@ topics:
     path: topics.rag24.test.splade-v3.tsv.gz
     qrel: qrels.rag24.test-umbrela-all.txt
 
-# TODO (issue #2870): need to update this section once the index is uploaded and the encoded query is uploaded
 models:
   - name: splade-v3-cached
     display: SPLADE-v3
     params: -impact -pretokenized -removeQuery -hits 1000
     results:
       nDCG@20:
-        - 0.2981
+        - 0.5167
       nDCG@100:
-        - 0.1782
+        - 0.4587
       R@100:
-        - 0.0742
+        - 0.2437
diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml
index abfefabf9..c7561826e 100644
--- a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml
+++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml
@@ -6,11 +6,11 @@ index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
 index_threads: 24
-index_options: -impact -pretokenized -storeDocvectors
-index_stats: # TODO (issue #2870): need to update this section once the corpus is uploaded
-  documents: 8841823
-  documents (non-empty): 8841823
-  total terms: 46922883529
+index_options: -impact -pretokenized
+index_stats:
+  documents: 113520750
+  documents (non-empty): 113520750
+  total terms: 866904601378
 
 metrics:
   - metric: nDCG@20
@@ -42,14 +42,14 @@ topics:
     path: topics.rag24.test.txt
     qrel: qrels.rag24.test-umbrela-all.txt
 
-models: # TODO (issue #2870): update this section once the index is uploaded
+models:
   - name: splade-v3-onnx
     display: SPLADE-v3
     params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3
     results:
       nDCG@20:
-        - 0.2981
+        - 0.5167
       nDCG@100:
-        - 0.1782
+        - 0.4587
       R@100:
-        - 0.0742
+        - 0.2437
diff --git a/src/test/java/io/anserini/index/PrebuiltIndexTest.java b/src/test/java/io/anserini/index/PrebuiltIndexTest.java
index a0b80008a..78bec61c5 100644
--- a/src/test/java/io/anserini/index/PrebuiltIndexTest.java
+++ b/src/test/java/io/anserini/index/PrebuiltIndexTest.java
@@ -61,6 +61,6 @@ public void testUrls() {
   // test number of prebuilt-indexes
   @Test
   public void testNumPrebuiltIndexes() {
-    assertEquals(211, IndexInfo.values().length);
+    assertEquals(212, IndexInfo.values().length);
   }
 }

From 47e7197983f022d8618e18fa766aaa21359acf66 Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Tue, 8 Jul 2025 22:02:48 -0400
Subject: [PATCH 06/15] fix file naming

---
 ...=> rag24-doc-segmented-test-umbrela.splade-v3.cached.template} | 0
 ...e => rag24-doc-segmented-test-umbrela.splade-v3.onnx.template} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/docgen/templates/{rag24-doc-segmented-test.splade-v3.cached.template => rag24-doc-segmented-test-umbrela.splade-v3.cached.template} (100%)
 rename src/main/resources/docgen/templates/{rag24-doc-segmented-test.splade-v3.onnx.template => rag24-doc-segmented-test-umbrela.splade-v3.onnx.template} (100%)

diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template
similarity index 100%
rename from src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template
rename to src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template
diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template
similarity index 100%
rename from src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template
rename to src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template

From 29f6a9478d97e8137f918a1af2acd9e5f1af2319 Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Tue, 8 Jul 2025 22:16:21 -0400
Subject: [PATCH 07/15] added build files

---
 ...segmented-test-umbrela.splade-v3.cached.md | 82 ++++++++++++++++++
 ...c-segmented-test-umbrela.splade-v3.onnx.md | 83 +++++++++++++++++++
 2 files changed, 165 insertions(+)
 create mode 100644 docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md
 create mode 100644 docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md

diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md
new file mode 100644
index 000000000..e44250cae
--- /dev/null
+++ b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md
@@ -0,0 +1,82 @@
+# Anserini Regressions: TREC 2024 RAG Track Test Topics
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries)
+
+This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
+These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
+See the following paper for more details:
+
+> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression rag24-doc-segmented-test-umbrela.splade-v3.cached
+```
+
+## Indexing
+
+Sample indexing command:
+
+```
+bin/run.sh io.anserini.index.IndexCollection \
+  -threads 24 \
+  -collection JsonVectorCollection \
+  -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \
+  -generator DefaultLuceneDocumentGenerator \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -impact -pretokenized \
+  >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 &
+```
+
+The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the pre-encoded tokens.
+For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md).
+
+## Retrieval
+
+Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors.
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+bin/run.sh io.anserini.search.SearchCollection \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -topics tools/topics-and-qrels/topics.rag24.test.splade-v3.tsv.gz \
+  -topicReader TsvString \
+  -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt \
+  -impact -pretokenized -removeQuery -hits 1000 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+bin/trec_eval -c -m ndcg_cut.20 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt
+bin/trec_eval -c -m ndcg_cut.100 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **nDCG@20**                                                                                                  | **SPLADE-v3**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| RAG 24: Test queries                                                                                         | 0.5167    |
+| **nDCG@100**                                                                                                 | **SPLADE-v3**|
+| RAG 24: Test queries                                                                                         | 0.4587    |
+| **R@100**                                                                                                    | **SPLADE-v3**|
+| RAG 24: Test queries                                                                                         | 0.2437    |
diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md
new file mode 100644
index 000000000..6addf4921
--- /dev/null
+++ b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md
@@ -0,0 +1,83 @@
+# Anserini Regressions: TREC 2024 RAG Track Test Topics
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using ONNX to perform query encoding on the fly.
+
+Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
+These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
+See the following paper for more details:
+
+> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression rag24-doc-segmented-test-umbrela.splade-v3.onnx
+```
+
+## Indexing
+
+Typical indexing command:
+
+```
+bin/run.sh io.anserini.index.IndexCollection \
+  -threads 24 \
+  -collection JsonVectorCollection \
+  -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \
+  -generator DefaultLuceneDocumentGenerator \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -impact -pretokenized \
+  >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 &
+```
+
+The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus.
+
+For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md).
+
+## Retrieval
+
+Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors.
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+bin/run.sh io.anserini.search.SearchCollection \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -topics tools/topics-and-qrels/topics.rag24.test.txt \
+  -topicReader TsvString \
+  -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt \
+  -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+bin/trec_eval -c -m ndcg_cut.20 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt
+bin/trec_eval -c -m ndcg_cut.100 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **nDCG@20**                                                                                                  | **SPLADE-v3**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| RAG 24: Test queries                                                                                         | 0.5167    |
+| **nDCG@100**                                                                                                 | **SPLADE-v3**|
+| RAG 24: Test queries                                                                                         | 0.4587    |
+| **R@100**                                                                                                    | **SPLADE-v3**|
+| RAG 24: Test queries                                                                                         | 0.2437    |

From 4cd19bb52f5e5557c8fa86407593b0481851c2ef Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Wed, 9 Jul 2025 14:51:55 -0400
Subject: [PATCH 08/15] added nist and msmarco v2.1 templates

---
 ....1-doc-segmented.splade-v3.cached.template | 57 +++++++++++++++
 ...v2.1-doc-segmented.splade-v3.onnx.template | 57 +++++++++++++++
 ...mented-test-nist.splade-v3.cached.template | 62 ++++++++++++++++
 ...egmented-test-nist.splade-v3.onnx.template | 63 ++++++++++++++++
 ...o-v2.1-doc-segmented.splade-v3.cached.yaml | 72 +++++++++++++++++++
 ...rco-v2.1-doc-segmented.splade-v3.onnx.yaml | 72 +++++++++++++++++++
 ...-segmented-test-nist.splade-v3.cached.yaml | 55 ++++++++++++++
 ...oc-segmented-test-nist.splade-v3.onnx.yaml | 55 ++++++++++++++
 8 files changed, 493 insertions(+)
 create mode 100644 src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template
 create mode 100644 src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template
 create mode 100644 src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template
 create mode 100644 src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template
 create mode 100644 src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml
 create mode 100644 src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml
 create mode 100644 src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml
 create mode 100644 src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml

diff --git a/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template b/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template
new file mode 100644
index 000000000..e0c4d7092
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template
@@ -0,0 +1,57 @@
+# Anserini Regressions: MS MARCO V2.1 Document Ranking
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries)
+
+This page describes regression experiments for document ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the dev queries, which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+## Indexing
+
+Typical indexing command:
+
+```
+${index_cmds}
+```
+
+The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus.
+
+For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md).
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+These evaluation resources are from the original V2 corpus, but have been "projected" over to the V2.1 corpus.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
diff --git a/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template b/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template
new file mode 100644
index 000000000..479aafd95
--- /dev/null
+++ b/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template
@@ -0,0 +1,57 @@
+# Anserini Regressions: MS MARCO V2.1 Document Ranking
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments for document ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the dev queries, which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using ONNX to perform query encoding on the fly.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+## Indexing
+
+Typical indexing command:
+
+```
+${index_cmds}
+```
+
+The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus.
+
+For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md).
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+These evaluation resources are from the original V2 corpus, but have been "projected" over to the V2.1 corpus.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template
new file mode 100644
index 000000000..3c52d7bb9
--- /dev/null
+++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template
@@ -0,0 +1,62 @@
+# Anserini Regressions: TREC 2024 RAG Track Test Topics
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries)
+
+This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
+These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
+See the following paper for more details:
+
+> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+## Indexing
+
+Sample indexing command:
+
+```
+${index_cmds}
+```
+
+The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the pre-encoded tokens.
+For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md).
+
+## Retrieval
+
+Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors.
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template
new file mode 100644
index 000000000..584f20ef6
--- /dev/null
+++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template
@@ -0,0 +1,63 @@
+# Anserini Regressions: TREC 2024 RAG Track Test Topics
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using ONNX to perform query encoding on the fly.
+
+Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
+These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
+See the following paper for more details:
+
+> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
+
+The exact configurations for these regressions are stored in [this YAML file](${yaml}).
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
+```
+
+## Indexing
+
+Typical indexing command:
+
+```
+${index_cmds}
+```
+
+The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus.
+
+For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md).
+
+## Retrieval
+
+Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors.
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+${ranking_cmds}
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+${eval_cmds}
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+${effectiveness}
diff --git a/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml b/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml
new file mode 100644
index 000000000..e72136e3b
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml
@@ -0,0 +1,72 @@
+---
+corpus: msmarco-v2.1-doc-segmented-splade-v3
+corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3
+
+index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
+collection_class: JsonVectorCollection
+generator_class: DefaultLuceneDocumentGenerator
+index_threads: 24
+index_options: -impact -pretokenized
+index_stats:
+  documents: 113520750
+  documents (non-empty): 113520750
+  total terms: 866904601378
+
+metrics:
+  - metric: MAP@100
+    command: bin/trec_eval
+    params: -c -M 100 -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: true
+  - metric: MRR@100
+    command: bin/trec_eval
+    params: -c -M 100 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: true
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvString
+topics:
+  - name: "[MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)"
+    id: dev
+    path: topics.msmarco-v2-doc.dev.tsv.gz
+    qrel: qrels.msmarco-v2.1-doc.dev.txt
+  - name: "[MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)"
+    id: dev2
+    path: topics.msmarco-v2-doc.dev2.tsv.gz
+    qrel: qrels.msmarco-v2.1-doc.dev2.txt
+
+models:
+  - name: splade-v3-cached
+    display: SPLADE-v3
+    params: -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000
+    results:
+      MAP@100:
+        - 0.2846
+        - 0.2836
+      MRR@100:
+        - 0.2874
+        - 0.2869
+      R@100:
+        - 0.8446
+        - 0.8462
+      R@1000:
+        - 0.9390
+        - 0.9407
diff --git a/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml b/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml
new file mode 100644
index 000000000..38fdc9c5f
--- /dev/null
+++ b/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml
@@ -0,0 +1,72 @@
+---
+corpus: msmarco-v2.1-doc-segmented-splade-v3
+corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3
+
+index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
+collection_class: JsonVectorCollection
+generator_class: DefaultLuceneDocumentGenerator
+index_threads: 24
+index_options: -impact -pretokenized
+index_stats:
+  documents: 113520750
+  documents (non-empty): 113520750
+  total terms: 866904601378
+
+metrics:
+  - metric: MAP@100
+    command: bin/trec_eval
+    params: -c -M 100 -m map
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: true
+  - metric: MRR@100
+    command: bin/trec_eval
+    params: -c -M 100 -m recip_rank
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: true
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@1000
+    command: bin/trec_eval
+    params: -c -m recall.1000
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvString
+topics:
+  - name: "[MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)"
+    id: dev
+    path: topics.msmarco-v2-doc.dev.txt
+    qrel: qrels.msmarco-v2.1-doc.dev.txt
+  - name: "[MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)"
+    id: dev2
+    path: topics.msmarco-v2-doc.dev2.txt
+    qrel: qrels.msmarco-v2.1-doc.dev2.txt
+
+models:
+  - name: splade-v3-onnx
+    display: SPLADE-v3
+    params: -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -encoder SpladeV3
+    results:
+      MAP@100:
+        - 0.2846
+        - 0.2836
+      MRR@100:
+        - 0.2874
+        - 0.2869
+      R@100:
+        - 0.8446
+        - 0.8462
+      R@1000:
+        - 0.9390
+        - 0.9407
diff --git a/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml b/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml
new file mode 100644
index 000000000..1460739fc
--- /dev/null
+++ b/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml
@@ -0,0 +1,55 @@
+---
+corpus: msmarco-v2.1-doc-segmented-splade-v3
+corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3
+
+index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
+collection_class: JsonVectorCollection
+generator_class: DefaultLuceneDocumentGenerator
+index_threads: 24
+index_options: -impact -pretokenized
+index_stats:
+  documents: 113520750
+  documents (non-empty): 113520750
+  total terms: 866904601378
+
+metrics:
+  - metric: nDCG@20
+    command: bin/trec_eval
+    params: -c -m ndcg_cut.20
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@100
+    command: bin/trec_eval
+    params: -c -m ndcg_cut.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvString
+topics:
+  - name: "RAG 24: Test queries"
+    id: rag24.test
+    path: topics.rag24.test.splade-v3.tsv.gz
+    qrel: qrels.rag24.test.txt
+
+models:
+  - name: splade-v3-cached
+    display: SPLADE-v3
+    params: -impact -pretokenized -removeQuery -hits 1000
+    results:
+      nDCG@20:
+        - 0.4642
+      nDCG@100:
+        - 0.4349
+      R@100:
+        - 0.3198
diff --git a/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml b/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml
new file mode 100644
index 000000000..fd1452333
--- /dev/null
+++ b/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml
@@ -0,0 +1,55 @@
+---
+corpus: msmarco-v2.1-doc-segmented-splade-v3
+corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3
+
+index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/
+collection_class: JsonVectorCollection
+generator_class: DefaultLuceneDocumentGenerator
+index_threads: 24
+index_options: -impact -pretokenized
+index_stats:
+  documents: 113520750
+  documents (non-empty): 113520750
+  total terms: 866904601378
+
+metrics:
+  - metric: nDCG@20
+    command: bin/trec_eval
+    params: -c -m ndcg_cut.20
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: nDCG@100
+    command: bin/trec_eval
+    params: -c -m ndcg_cut.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+  - metric: R@100
+    command: bin/trec_eval
+    params: -c -m recall.100
+    separator: "\t"
+    parse_index: 2
+    metric_precision: 4
+    can_combine: false
+
+topic_reader: TsvString
+topics:
+  - name: "RAG 24: Test queries"
+    id: rag24.test
+    path: topics.rag24.test.txt
+    qrel: qrels.rag24.test.txt
+
+models:
+  - name: splade-v3-onnx
+    display: SPLADE-v3
+    params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3
+    results:
+      nDCG@20:
+        - 0.4642
+      nDCG@100:
+        - 0.4349
+      R@100:
+        - 0.3198

From bae7b9b30d07d133062e7bb5c0e6a6f5e199cc2b Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Wed, 9 Jul 2025 14:54:00 -0400
Subject: [PATCH 09/15] updated git submodule

---
 tools | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools b/tools
index 9e692ae38..0c2805c4d 160000
--- a/tools
+++ b/tools
@@ -1 +1 @@
-Subproject commit 9e692ae38c085776431da9f1633fcea7fc814440
+Subproject commit 0c2805c4d00b1e77a776c47f4aef6faef54b6398

From 8cbee30a739e050985efc8bca0ea73c6bbb05172 Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Wed, 9 Jul 2025 15:17:46 -0400
Subject: [PATCH 10/15] added regression build files

---
 ...rco-v2.1-doc-segmented.splade-v3.cached.md | 92 +++++++++++++++++++
 ...marco-v2.1-doc-segmented.splade-v3.onnx.md | 92 +++++++++++++++++++
 ...oc-segmented-test-nist.splade-v3.cached.md | 82 +++++++++++++++++
 ...-doc-segmented-test-nist.splade-v3.onnx.md | 83 +++++++++++++++++
 4 files changed, 349 insertions(+)
 create mode 100644 docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.cached.md
 create mode 100644 docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.onnx.md
 create mode 100644 docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.cached.md
 create mode 100644 docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md

diff --git a/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.cached.md b/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.cached.md
new file mode 100644
index 000000000..4d5fc63d7
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.cached.md
@@ -0,0 +1,92 @@
+# Anserini Regressions: MS MARCO V2.1 Document Ranking
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries)
+
+This page describes regression experiments for document ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the dev queries, which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2.1-doc-segmented.splade-v3.cached
+```
+
+## Indexing
+
+Typical indexing command:
+
+```
+bin/run.sh io.anserini.index.IndexCollection \
+  -threads 24 \
+  -collection JsonVectorCollection \
+  -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \
+  -generator DefaultLuceneDocumentGenerator \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -impact -pretokenized \
+  >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 &
+```
+
+The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus.
+
+For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md).
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+These evaluation resources are from the original V2 corpus, but have been "projected" over to the V2.1 corpus.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+bin/run.sh io.anserini.search.SearchCollection \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -topics tools/topics-and-qrels/topics.msmarco-v2-doc.dev.tsv.gz \
+  -topicReader TsvString \
+  -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev.txt \
+  -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 &
+bin/run.sh io.anserini.search.SearchCollection \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -topics tools/topics-and-qrels/topics.msmarco-v2-doc.dev2.tsv.gz \
+  -topicReader TsvString \
+  -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev2.txt \
+  -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev.txt
+bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev2.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev2.txt
+bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev2.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **MAP@100**                                                                                                  | **SPLADE-v3**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                          | 0.2846    |
+| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                         | 0.2836    |
+| **MRR@100**                                                                                                  | **SPLADE-v3**|
+| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                          | 0.2874    |
+| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                         | 0.2869    |
+| **R@100**                                                                                                    | **SPLADE-v3**|
+| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                          | 0.8446    |
+| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                         | 0.8462    |
+| **R@1000**                                                                                                   | **SPLADE-v3**|
+| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                          | 0.9390    |
+| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                         | 0.9407    |
diff --git a/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.onnx.md b/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.onnx.md
new file mode 100644
index 000000000..5c2fa5343
--- /dev/null
+++ b/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.onnx.md
@@ -0,0 +1,92 @@
+# Anserini Regressions: MS MARCO V2.1 Document Ranking
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments for document ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the dev queries, which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using ONNX to perform query encoding on the fly.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2.1-doc-segmented.splade-v3.onnx
+```
+
+## Indexing
+
+Typical indexing command:
+
+```
+bin/run.sh io.anserini.index.IndexCollection \
+  -threads 24 \
+  -collection JsonVectorCollection \
+  -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \
+  -generator DefaultLuceneDocumentGenerator \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -impact -pretokenized \
+  >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 &
+```
+
+The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus.
+
+For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md).
+
+## Retrieval
+
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+These evaluation resources are from the original V2 corpus, but have been "projected" over to the V2.1 corpus.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+bin/run.sh io.anserini.search.SearchCollection \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -topics tools/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \
+  -topicReader TsvString \
+  -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev.txt \
+  -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -encoder SpladeV3 &
+bin/run.sh io.anserini.search.SearchCollection \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -topics tools/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \
+  -topicReader TsvString \
+  -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev2.txt \
+  -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -encoder SpladeV3 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev.txt
+bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev2.txt
+bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev2.txt
+bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev2.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **MAP@100**                                                                                                  | **SPLADE-v3**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                          | 0.2846    |
+| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                         | 0.2836    |
+| **MRR@100**                                                                                                  | **SPLADE-v3**|
+| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                          | 0.2874    |
+| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                         | 0.2869    |
+| **R@100**                                                                                                    | **SPLADE-v3**|
+| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                          | 0.8446    |
+| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                         | 0.8462    |
+| **R@1000**                                                                                                   | **SPLADE-v3**|
+| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                          | 0.9390    |
+| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)                         | 0.9407    |
diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.cached.md b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.cached.md
new file mode 100644
index 000000000..d3d52f9d6
--- /dev/null
+++ b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.cached.md
@@ -0,0 +1,82 @@
+# Anserini Regressions: TREC 2024 RAG Track Test Topics
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries)
+
+This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using cached queries (i.e., cached results of query encoding).
+
+Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
+These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
+See the following paper for more details:
+
+> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression rag24-doc-segmented-test-nist.splade-v3.cached
+```
+
+## Indexing
+
+Sample indexing command:
+
+```
+bin/run.sh io.anserini.index.IndexCollection \
+  -threads 24 \
+  -collection JsonVectorCollection \
+  -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \
+  -generator DefaultLuceneDocumentGenerator \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -impact -pretokenized \
+  >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 &
+```
+
+The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the pre-encoded tokens.
+For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md).
+
+## Retrieval
+
+Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors.
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+bin/run.sh io.anserini.search.SearchCollection \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -topics tools/topics-and-qrels/topics.rag24.test.splade-v3.tsv.gz \
+  -topicReader TsvString \
+  -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt \
+  -impact -pretokenized -removeQuery -hits 1000 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+bin/trec_eval -c -m ndcg_cut.20 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt
+bin/trec_eval -c -m ndcg_cut.100 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **nDCG@20**                                                                                                  | **SPLADE-v3**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| RAG 24: Test queries                                                                                         | 0.4642    |
+| **nDCG@100**                                                                                                 | **SPLADE-v3**|
+| RAG 24: Test queries                                                                                         | 0.4349    |
+| **R@100**                                                                                                    | **SPLADE-v3**|
+| RAG 24: Test queries                                                                                         | 0.3198    |
diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md
new file mode 100644
index 000000000..1a87feb24
--- /dev/null
+++ b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md
@@ -0,0 +1,83 @@
+# Anserini Regressions: TREC 2024 RAG Track Test Topics
+
+**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding)
+
+This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework.
+This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track.
+
+The model itself can be download [here](https://huggingface.co/naver/splade-v3).
+See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details:
+
+> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_.
+
+In these experiments, we are using ONNX to perform query encoding on the fly.
+
+Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
+These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
+See the following paper for more details:
+
+> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
+
+The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml).
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+
+From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
+
+```
+python src/main/python/run_regression.py --index --verify --search --regression rag24-doc-segmented-test-nist.splade-v3.onnx
+```
+
+## Indexing
+
+Typical indexing command:
+
+```
+bin/run.sh io.anserini.index.IndexCollection \
+  -threads 24 \
+  -collection JsonVectorCollection \
+  -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \
+  -generator DefaultLuceneDocumentGenerator \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -impact -pretokenized \
+  >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 &
+```
+
+The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus.
+
+For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md).
+
+## Retrieval
+
+Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors.
+Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule.
+
+After indexing has completed, you should be able to perform retrieval as follows:
+
+```
+bin/run.sh io.anserini.search.SearchCollection \
+  -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \
+  -topics tools/topics-and-qrels/topics.rag24.test.txt \
+  -topicReader TsvString \
+  -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt \
+  -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3 &
+```
+
+Evaluation can be performed using `trec_eval`:
+
+```
+bin/trec_eval -c -m ndcg_cut.20 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt
+bin/trec_eval -c -m ndcg_cut.100 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt
+bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt
+```
+
+## Effectiveness
+
+With the above commands, you should be able to reproduce the following results:
+
+| **nDCG@20**                                                                                                  | **SPLADE-v3**|
+|:-------------------------------------------------------------------------------------------------------------|-----------|
+| RAG 24: Test queries                                                                                         | 0.4642    |
+| **nDCG@100**                                                                                                 | **SPLADE-v3**|
+| RAG 24: Test queries                                                                                         | 0.4349    |
+| **R@100**                                                                                                    | **SPLADE-v3**|
+| RAG 24: Test queries                                                                                         | 0.3198    |

From 51395a96ef3e3edda75efc5ca0292ca4e51755fc Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Wed, 9 Jul 2025 16:23:17 -0400
Subject: [PATCH 11/15] update tools

---
 tools | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools b/tools
index 0c2805c4d..da31c91e5 160000
--- a/tools
+++ b/tools
@@ -1 +1 @@
-Subproject commit 0c2805c4d00b1e77a776c47f4aef6faef54b6398
+Subproject commit da31c91e59af2678317060e6cdffccc40b22cee0

From 1502a49b4709bc8402c1aef2c6616c035edd7a26 Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Wed, 9 Jul 2025 16:45:54 -0400
Subject: [PATCH 12/15] updated tools

---
 tools | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools b/tools
index da31c91e5..3b506ecb3 160000
--- a/tools
+++ b/tools
@@ -1 +1 @@
-Subproject commit da31c91e59af2678317060e6cdffccc40b22cee0
+Subproject commit 3b506ecb3e8a19fd596936761d76282f2abeba03

From 45bf6bbf906536a11b083f820f943f1d34f7a5f2 Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Wed, 9 Jul 2025 16:50:55 -0400
Subject: [PATCH 13/15] fix template

---
 ...24-doc-segmented-test-umbrela.splade-v3.cached.template | 5 ++---
 ...ag24-doc-segmented-test-umbrela.splade-v3.onnx.template | 7 +++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template
index 3c52d7bb9..f9723e2f9 100644
--- a/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template
+++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template
@@ -12,9 +12,8 @@ See the [official SPLADE repo](https://github.com/naver/splade) and the followin
 
 In these experiments, we are using cached queries (i.e., cached results of query encoding).
 
-Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
-These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
-See the following paper for more details:
+Evaluation uses (automatically generated) UMBRELA qrels over all 301 topics from the TREC 2024 RAG Track test set.
+UMBRELA is described in the following paper:
 
 > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
 
diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template
index 584f20ef6..42e8b8886 100644
--- a/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template
+++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template
@@ -12,14 +12,13 @@ See the [official SPLADE repo](https://github.com/naver/splade) and the followin
 
 In these experiments, we are using ONNX to perform query encoding on the fly.
 
-Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
-These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
-See the following paper for more details:
+Evaluation uses (automatically generated) UMBRELA qrels over all 301 topics from the TREC 2024 RAG Track test set.
+UMBRELA is described in the following paper:
 
 > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
 
 The exact configurations for these regressions are stored in [this YAML file](${yaml}).
-Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 

From d0dd0380679ad06784c0cd960b158780fa9b690d Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Wed, 9 Jul 2025 16:54:09 -0400
Subject: [PATCH 14/15] fix template

---
 .../rag24-doc-segmented-test-nist.splade-v3.onnx.template       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template
index 584f20ef6..38763a90d 100644
--- a/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template
+++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template
@@ -19,7 +19,7 @@ See the following paper for more details:
 > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
 
 The exact configurations for these regressions are stored in [this YAML file](${yaml}).
-Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 

From e66560bb9f8cafdd39ba17bb87d49957a8d6a0ab Mon Sep 17 00:00:00 2001
From: clides <daniel168.guo@gmail.com>
Date: Wed, 9 Jul 2025 17:01:50 -0400
Subject: [PATCH 15/15] update build files

---
 ...essions-rag24-doc-segmented-test-nist.splade-v3.onnx.md | 2 +-
 ...ns-rag24-doc-segmented-test-umbrela.splade-v3.cached.md | 5 ++---
 ...ions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md | 7 +++----
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md
index 1a87feb24..be6866bcc 100644
--- a/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md
+++ b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md
@@ -19,7 +19,7 @@ See the following paper for more details:
 > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
 
 The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml).
-Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md
index e44250cae..d26dbc29b 100644
--- a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md
+++ b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md
@@ -12,9 +12,8 @@ See the [official SPLADE repo](https://github.com/naver/splade) and the followin
 
 In these experiments, we are using cached queries (i.e., cached results of query encoding).
 
-Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
-These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
-See the following paper for more details:
+Evaluation uses (automatically generated) UMBRELA qrels over all 301 topics from the TREC 2024 RAG Track test set.
+UMBRELA is described in the following paper:
 
 > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
 
diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md
index 6addf4921..f51d13c54 100644
--- a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md
+++ b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md
@@ -12,14 +12,13 @@ See the [official SPLADE repo](https://github.com/naver/splade) and the followin
 
 In these experiments, we are using ONNX to perform query encoding on the fly.
 
-Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set.
-These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments.
-See the following paper for more details:
+Evaluation uses (automatically generated) UMBRELA qrels over all 301 topics from the TREC 2024 RAG Track test set.
+UMBRELA is described in the following paper:
 
 > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025.
 
 The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml).
-Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead.
+Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation.
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: