From f105d6b4ced505ff6d86bd7dfdd407bc57f9cc2e Mon Sep 17 00:00:00 2001 From: clides Date: Sun, 6 Jul 2025 12:39:56 -0400 Subject: [PATCH 01/15] test --- tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools b/tools index 9e692ae38..ecea6a3b0 160000 --- a/tools +++ b/tools @@ -1 +1 @@ -Subproject commit 9e692ae38c085776431da9f1633fcea7fc814440 +Subproject commit ecea6a3b0cff037e4b2f9406af1ee1cafbbc849e From 0ba668360db35b579f70ba39b42c594240338c3e Mon Sep 17 00:00:00 2001 From: clides Date: Mon, 7 Jul 2025 10:31:17 -0400 Subject: [PATCH 02/15] add rag24-doc-segmented for splade-v3 cached and splade-v3 onnx --- ...gmented-test-umbrela.splade-v3.cached.yaml | 56 +++++++++++++++++++ ...segmented-test-umbrela.splade-v3.onnx.yaml | 55 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml create mode 100644 src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml new file mode 100644 index 000000000..af999bee2 --- /dev/null +++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml @@ -0,0 +1,56 @@ +--- +corpus: msmarco-v2.1-doc-segmented-splade-v3 +corpus_path: collections/msmarco/msmarco-v2.1-doc-segmented-splade-v3 + +index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ +collection_class: JsonVectorCollection +generator_class: DefaultLuceneDocumentGenerator +index_threads: 24 +index_options: -impact -pretokenized -storeDocvectors +index_stats: # TODO (issue #2870): need to update this section once the corpus is uploaded + documents: 8841823 + documents (non-empty): 8841823 + total terms: 46922883529 + +metrics: + - metric: nDCG@20 + command: bin/trec_eval + params: -c -m ndcg_cut.20 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: nDCG@100 + command: bin/trec_eval + params: -c -m ndcg_cut.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "RAG 24: Test queries" + id: rag24.test + path: topics.rag24.test.splade-v3.tsv.gz + qrel: qrels.rag24.test-umbrela-all.txt + +# TODO (issue #2870): need to update this section once the index is uploaded and the encoded query is uploaded +models: + - name: splade-v3-cached + display: SPLADE-v3 + params: -impact -pretokenized -removeQuery -hits 1000 + results: + nDCG@20: + - 0.2981 + nDCG@100: + - 0.1782 + R@100: + - 0.0742 diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml new file mode 100644 index 000000000..50604c31f --- /dev/null +++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml @@ -0,0 +1,55 @@ +--- +corpus: msmarco-v2.1-doc-segmented-splade-v3 +corpus_path: collections/msmarco/msmarco-v2.1-doc-segmented-splade-v3 + +index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ +collection_class: JsonVectorCollection +generator_class: DefaultLuceneDocumentGenerator +index_threads: 24 +index_options: -impact -pretokenized -storeDocvectors +index_stats: # TODO (issue #2870): need to update this section once the corpus is uploaded + documents: 8841823 + documents (non-empty): 8841823 + total terms: 46922883529 + +metrics: + - metric: nDCG@20 + command: bin/trec_eval + params: -c -m ndcg_cut.20 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: nDCG@100 + command: bin/trec_eval + params: -c -m ndcg_cut.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "RAG 24: Test queries" + id: rag24.test + path: topics.rag24.test.txt + qrel: qrels.rag24.test-umbrela-all.txt + +models: # TODO (issue #2870): update this section once the index is uploaded + - name: splade-v3-onnx + display: SPLADE-v3 + params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3 + results: + nDCG@20: + - 0.2981 + nDCG@100: + - 0.1782 + R@100: + - 0.0742 From cf0ee32145de3e0b8e210a552419e9c04450edc7 Mon Sep 17 00:00:00 2001 From: clides Date: Mon, 7 Jul 2025 10:35:57 -0400 Subject: [PATCH 03/15] update corpus path --- .../rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml | 2 +- .../rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml index af999bee2..67a3349ae 100644 --- a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml +++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml @@ -1,6 +1,6 @@ --- corpus: msmarco-v2.1-doc-segmented-splade-v3 -corpus_path: collections/msmarco/msmarco-v2.1-doc-segmented-splade-v3 +corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3 index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ collection_class: JsonVectorCollection diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml index 50604c31f..abfefabf9 100644 --- a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml +++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml @@ -1,6 +1,6 @@ --- corpus: msmarco-v2.1-doc-segmented-splade-v3 -corpus_path: collections/msmarco/msmarco-v2.1-doc-segmented-splade-v3 +corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3 index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ collection_class: JsonVectorCollection From bb5381996526e8dade711e2d05eacb538ba0b87d Mon Sep 17 00:00:00 2001 From: clides Date: Mon, 7 Jul 2025 10:43:08 -0400 Subject: [PATCH 04/15] update git submodule --- tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools b/tools index ecea6a3b0..9e692ae38 160000 --- a/tools +++ b/tools @@ -1 +1 @@ -Subproject commit ecea6a3b0cff037e4b2f9406af1ee1cafbbc849e +Subproject commit 9e692ae38c085776431da9f1633fcea7fc814440 From e7f9536ba2cfeb575cdd10a5887a08997327d57d Mon Sep 17 00:00:00 2001 From: clides Date: Tue, 8 Jul 2025 21:48:48 -0400 Subject: [PATCH 05/15] added msmarco v2.1 doc segmented splade-v3 bindings --- .../java/io/anserini/index/IndexInfo.java | 14 +++++ ...c-segmented-test.splade-v3.cached.template | 62 ++++++++++++++++++ ...doc-segmented-test.splade-v3.onnx.template | 63 +++++++++++++++++++ ...gmented-test-umbrela.splade-v3.cached.yaml | 17 +++-- ...segmented-test-umbrela.splade-v3.onnx.yaml | 18 +++--- .../io/anserini/index/PrebuiltIndexTest.java | 2 +- 6 files changed, 157 insertions(+), 19 deletions(-) create mode 100644 src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template create mode 100644 src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template diff --git a/src/main/java/io/anserini/index/IndexInfo.java b/src/main/java/io/anserini/index/IndexInfo.java index 3246eb640..be1921db6 100644 --- a/src/main/java/io/anserini/index/IndexInfo.java +++ b/src/main/java/io/anserini/index/IndexInfo.java @@ -376,6 +376,20 @@ public enum IndexInfo { VectorQueryGenerator.class.getSimpleName(), "msmarco-v2.1-doc-segmented"), + MSMARCO_V21_DOC_SEGMENTED_SPLADE_V3("msmarco-v2.1-doc-segmented-splade-v3", + "Lucene impact index of the MS MARCO V2.1 segmented document corpus encoded by SPLADE v3.", + "lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3.20250707.4039c3.tar.gz", + "lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3.20250707.4039c3.README.md", + "MS MARCO V2.1 Segmented Doc", + "SPLADE v3", + new String[] { + "https://huggingface.co/datasets/castorini/prebuilt-indexes-msmarco-v2.1-doc-segmented/resolve/main/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3.20250707.4039c3.tar.gz" }, + "75f677301833b4f3bf2c2c286be8879f", + IndexType.SPARSE_IMPACT, + SpladeV3Encoder.class.getSimpleName(), + BagOfWordsQueryGenerator.class.getSimpleName(), + "msmarco-v2.1-doc-segmented"), + // BEIR: flat BEIR_V1_0_0_TREC_COVID_FLAT("beir-v1.0.0-trec-covid.flat", "Lucene inverted 'flat' index of BEIR (v1.0.0): TREC-COVID.", diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template new file mode 100644 index 000000000..3c52d7bb9 --- /dev/null +++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template @@ -0,0 +1,62 @@ +# Anserini Regressions: TREC 2024 RAG Track Test Topics + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries) + +This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using cached queries (i.e., cached results of query encoding). + +Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. +These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. +See the following paper for more details: + +> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. + +The exact configurations for these regressions are stored in [this YAML file](${yaml}). +Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression ${test_name} +``` + +## Indexing + +Sample indexing command: + +``` +${index_cmds} +``` + +The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the pre-encoded tokens. +For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md). + +## Retrieval + +Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors. +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +${ranking_cmds} +``` + +Evaluation can be performed using `trec_eval`: + +``` +${eval_cmds} +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +${effectiveness} diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template new file mode 100644 index 000000000..584f20ef6 --- /dev/null +++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template @@ -0,0 +1,63 @@ +# Anserini Regressions: TREC 2024 RAG Track Test Topics + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding) + +This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using ONNX to perform query encoding on the fly. + +Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. +These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. +See the following paper for more details: + +> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. + +The exact configurations for these regressions are stored in [this YAML file](${yaml}). +Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression ${test_name} +``` + +## Indexing + +Typical indexing command: + +``` +${index_cmds} +``` + +The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus. + +For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md). + +## Retrieval + +Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors. +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +${ranking_cmds} +``` + +Evaluation can be performed using `trec_eval`: + +``` +${eval_cmds} +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +${effectiveness} diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml index 67a3349ae..e693a7584 100644 --- a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml +++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml @@ -6,11 +6,11 @@ index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 24 -index_options: -impact -pretokenized -storeDocvectors -index_stats: # TODO (issue #2870): need to update this section once the corpus is uploaded - documents: 8841823 - documents (non-empty): 8841823 - total terms: 46922883529 +index_options: -impact -pretokenized +index_stats: + documents: 113520750 + documents (non-empty): 113520750 + total terms: 866904601378 metrics: - metric: nDCG@20 @@ -42,15 +42,14 @@ topics: path: topics.rag24.test.splade-v3.tsv.gz qrel: qrels.rag24.test-umbrela-all.txt -# TODO (issue #2870): need to update this section once the index is uploaded and the encoded query is uploaded models: - name: splade-v3-cached display: SPLADE-v3 params: -impact -pretokenized -removeQuery -hits 1000 results: nDCG@20: - - 0.2981 + - 0.5167 nDCG@100: - - 0.1782 + - 0.4587 R@100: - - 0.0742 + - 0.2437 diff --git a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml index abfefabf9..c7561826e 100644 --- a/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml +++ b/src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml @@ -6,11 +6,11 @@ index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 24 -index_options: -impact -pretokenized -storeDocvectors -index_stats: # TODO (issue #2870): need to update this section once the corpus is uploaded - documents: 8841823 - documents (non-empty): 8841823 - total terms: 46922883529 +index_options: -impact -pretokenized +index_stats: + documents: 113520750 + documents (non-empty): 113520750 + total terms: 866904601378 metrics: - metric: nDCG@20 @@ -42,14 +42,14 @@ topics: path: topics.rag24.test.txt qrel: qrels.rag24.test-umbrela-all.txt -models: # TODO (issue #2870): update this section once the index is uploaded +models: - name: splade-v3-onnx display: SPLADE-v3 params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3 results: nDCG@20: - - 0.2981 + - 0.5167 nDCG@100: - - 0.1782 + - 0.4587 R@100: - - 0.0742 + - 0.2437 diff --git a/src/test/java/io/anserini/index/PrebuiltIndexTest.java b/src/test/java/io/anserini/index/PrebuiltIndexTest.java index a0b80008a..78bec61c5 100644 --- a/src/test/java/io/anserini/index/PrebuiltIndexTest.java +++ b/src/test/java/io/anserini/index/PrebuiltIndexTest.java @@ -61,6 +61,6 @@ public void testUrls() { // test number of prebuilt-indexes @Test public void testNumPrebuiltIndexes() { - assertEquals(211, IndexInfo.values().length); + assertEquals(212, IndexInfo.values().length); } } From 47e7197983f022d8618e18fa766aaa21359acf66 Mon Sep 17 00:00:00 2001 From: clides Date: Tue, 8 Jul 2025 22:02:48 -0400 Subject: [PATCH 06/15] fix file naming --- ...=> rag24-doc-segmented-test-umbrela.splade-v3.cached.template} | 0 ...e => rag24-doc-segmented-test-umbrela.splade-v3.onnx.template} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/docgen/templates/{rag24-doc-segmented-test.splade-v3.cached.template => rag24-doc-segmented-test-umbrela.splade-v3.cached.template} (100%) rename src/main/resources/docgen/templates/{rag24-doc-segmented-test.splade-v3.onnx.template => rag24-doc-segmented-test-umbrela.splade-v3.onnx.template} (100%) diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template similarity index 100% rename from src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.cached.template rename to src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template similarity index 100% rename from src/main/resources/docgen/templates/rag24-doc-segmented-test.splade-v3.onnx.template rename to src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template From 29f6a9478d97e8137f918a1af2acd9e5f1af2319 Mon Sep 17 00:00:00 2001 From: clides Date: Tue, 8 Jul 2025 22:16:21 -0400 Subject: [PATCH 07/15] added build files --- ...segmented-test-umbrela.splade-v3.cached.md | 82 ++++++++++++++++++ ...c-segmented-test-umbrela.splade-v3.onnx.md | 83 +++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md create mode 100644 docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md new file mode 100644 index 000000000..e44250cae --- /dev/null +++ b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md @@ -0,0 +1,82 @@ +# Anserini Regressions: TREC 2024 RAG Track Test Topics + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries) + +This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using cached queries (i.e., cached results of query encoding). + +Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. +These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. +See the following paper for more details: + +> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. + +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression rag24-doc-segmented-test-umbrela.splade-v3.cached +``` + +## Indexing + +Sample indexing command: + +``` +bin/run.sh io.anserini.index.IndexCollection \ + -threads 24 \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \ + -generator DefaultLuceneDocumentGenerator \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -impact -pretokenized \ + >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 & +``` + +The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the pre-encoded tokens. +For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md). + +## Retrieval + +Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors. +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +bin/run.sh io.anserini.search.SearchCollection \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -topics tools/topics-and-qrels/topics.rag24.test.splade-v3.tsv.gz \ + -topicReader TsvString \ + -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt \ + -impact -pretokenized -removeQuery -hits 1000 & +``` + +Evaluation can be performed using `trec_eval`: + +``` +bin/trec_eval -c -m ndcg_cut.20 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt +bin/trec_eval -c -m ndcg_cut.100 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +| **nDCG@20** | **SPLADE-v3**| +|:-------------------------------------------------------------------------------------------------------------|-----------| +| RAG 24: Test queries | 0.5167 | +| **nDCG@100** | **SPLADE-v3**| +| RAG 24: Test queries | 0.4587 | +| **R@100** | **SPLADE-v3**| +| RAG 24: Test queries | 0.2437 | diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md new file mode 100644 index 000000000..6addf4921 --- /dev/null +++ b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md @@ -0,0 +1,83 @@ +# Anserini Regressions: TREC 2024 RAG Track Test Topics + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding) + +This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using ONNX to perform query encoding on the fly. + +Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. +These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. +See the following paper for more details: + +> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. + +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression rag24-doc-segmented-test-umbrela.splade-v3.onnx +``` + +## Indexing + +Typical indexing command: + +``` +bin/run.sh io.anserini.index.IndexCollection \ + -threads 24 \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \ + -generator DefaultLuceneDocumentGenerator \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -impact -pretokenized \ + >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 & +``` + +The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus. + +For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md). + +## Retrieval + +Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors. +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +bin/run.sh io.anserini.search.SearchCollection \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -topics tools/topics-and-qrels/topics.rag24.test.txt \ + -topicReader TsvString \ + -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt \ + -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3 & +``` + +Evaluation can be performed using `trec_eval`: + +``` +bin/trec_eval -c -m ndcg_cut.20 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt +bin/trec_eval -c -m ndcg_cut.100 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.rag24.test-umbrela-all.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +| **nDCG@20** | **SPLADE-v3**| +|:-------------------------------------------------------------------------------------------------------------|-----------| +| RAG 24: Test queries | 0.5167 | +| **nDCG@100** | **SPLADE-v3**| +| RAG 24: Test queries | 0.4587 | +| **R@100** | **SPLADE-v3**| +| RAG 24: Test queries | 0.2437 | From 4cd19bb52f5e5557c8fa86407593b0481851c2ef Mon Sep 17 00:00:00 2001 From: clides Date: Wed, 9 Jul 2025 14:51:55 -0400 Subject: [PATCH 08/15] added nist and msmarco v2.1 templates --- ....1-doc-segmented.splade-v3.cached.template | 57 +++++++++++++++ ...v2.1-doc-segmented.splade-v3.onnx.template | 57 +++++++++++++++ ...mented-test-nist.splade-v3.cached.template | 62 ++++++++++++++++ ...egmented-test-nist.splade-v3.onnx.template | 63 ++++++++++++++++ ...o-v2.1-doc-segmented.splade-v3.cached.yaml | 72 +++++++++++++++++++ ...rco-v2.1-doc-segmented.splade-v3.onnx.yaml | 72 +++++++++++++++++++ ...-segmented-test-nist.splade-v3.cached.yaml | 55 ++++++++++++++ ...oc-segmented-test-nist.splade-v3.onnx.yaml | 55 ++++++++++++++ 8 files changed, 493 insertions(+) create mode 100644 src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template create mode 100644 src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template create mode 100644 src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template create mode 100644 src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template create mode 100644 src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml create mode 100644 src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml create mode 100644 src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml create mode 100644 src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml diff --git a/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template b/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template new file mode 100644 index 000000000..e0c4d7092 --- /dev/null +++ b/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template @@ -0,0 +1,57 @@ +# Anserini Regressions: MS MARCO V2.1 Document Ranking + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries) + +This page describes regression experiments for document ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the dev queries, which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using cached queries (i.e., cached results of query encoding). + +The exact configurations for these regressions are stored in [this YAML file](${yaml}). +Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression ${test_name} +``` + +## Indexing + +Typical indexing command: + +``` +${index_cmds} +``` + +The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus. + +For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md). + +## Retrieval + +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. +These evaluation resources are from the original V2 corpus, but have been "projected" over to the V2.1 corpus. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +${ranking_cmds} +``` + +Evaluation can be performed using `trec_eval`: + +``` +${eval_cmds} +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +${effectiveness} diff --git a/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template b/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template new file mode 100644 index 000000000..479aafd95 --- /dev/null +++ b/src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template @@ -0,0 +1,57 @@ +# Anserini Regressions: MS MARCO V2.1 Document Ranking + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding) + +This page describes regression experiments for document ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the dev queries, which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using ONNX to perform query encoding on the fly. + +The exact configurations for these regressions are stored in [this YAML file](${yaml}). +Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression ${test_name} +``` + +## Indexing + +Typical indexing command: + +``` +${index_cmds} +``` + +The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus. + +For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md). + +## Retrieval + +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. +These evaluation resources are from the original V2 corpus, but have been "projected" over to the V2.1 corpus. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +${ranking_cmds} +``` + +Evaluation can be performed using `trec_eval`: + +``` +${eval_cmds} +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +${effectiveness} diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template new file mode 100644 index 000000000..3c52d7bb9 --- /dev/null +++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template @@ -0,0 +1,62 @@ +# Anserini Regressions: TREC 2024 RAG Track Test Topics + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries) + +This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using cached queries (i.e., cached results of query encoding). + +Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. +These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. +See the following paper for more details: + +> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. + +The exact configurations for these regressions are stored in [this YAML file](${yaml}). +Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression ${test_name} +``` + +## Indexing + +Sample indexing command: + +``` +${index_cmds} +``` + +The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the pre-encoded tokens. +For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md). + +## Retrieval + +Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors. +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +${ranking_cmds} +``` + +Evaluation can be performed using `trec_eval`: + +``` +${eval_cmds} +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +${effectiveness} diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template new file mode 100644 index 000000000..584f20ef6 --- /dev/null +++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template @@ -0,0 +1,63 @@ +# Anserini Regressions: TREC 2024 RAG Track Test Topics + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding) + +This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using ONNX to perform query encoding on the fly. + +Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. +These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. +See the following paper for more details: + +> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. + +The exact configurations for these regressions are stored in [this YAML file](${yaml}). +Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression ${test_name} +``` + +## Indexing + +Typical indexing command: + +``` +${index_cmds} +``` + +The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus. + +For additional details, see explanation of [common indexing options](${root_path}/docs/common-indexing-options.md). + +## Retrieval + +Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors. +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +${ranking_cmds} +``` + +Evaluation can be performed using `trec_eval`: + +``` +${eval_cmds} +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +${effectiveness} diff --git a/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml b/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml new file mode 100644 index 000000000..e72136e3b --- /dev/null +++ b/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml @@ -0,0 +1,72 @@ +--- +corpus: msmarco-v2.1-doc-segmented-splade-v3 +corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3 + +index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ +collection_class: JsonVectorCollection +generator_class: DefaultLuceneDocumentGenerator +index_threads: 24 +index_options: -impact -pretokenized +index_stats: + documents: 113520750 + documents (non-empty): 113520750 + total terms: 866904601378 + +metrics: + - metric: MAP@100 + command: bin/trec_eval + params: -c -M 100 -m map + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: true + - metric: MRR@100 + command: bin/trec_eval + params: -c -M 100 -m recip_rank + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: true + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "[MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" + id: dev + path: topics.msmarco-v2-doc.dev.tsv.gz + qrel: qrels.msmarco-v2.1-doc.dev.txt + - name: "[MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" + id: dev2 + path: topics.msmarco-v2-doc.dev2.tsv.gz + qrel: qrels.msmarco-v2.1-doc.dev2.txt + +models: + - name: splade-v3-cached + display: SPLADE-v3 + params: -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 + results: + MAP@100: + - 0.2846 + - 0.2836 + MRR@100: + - 0.2874 + - 0.2869 + R@100: + - 0.8446 + - 0.8462 + R@1000: + - 0.9390 + - 0.9407 diff --git a/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml b/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml new file mode 100644 index 000000000..38fdc9c5f --- /dev/null +++ b/src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml @@ -0,0 +1,72 @@ +--- +corpus: msmarco-v2.1-doc-segmented-splade-v3 +corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3 + +index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ +collection_class: JsonVectorCollection +generator_class: DefaultLuceneDocumentGenerator +index_threads: 24 +index_options: -impact -pretokenized +index_stats: + documents: 113520750 + documents (non-empty): 113520750 + total terms: 866904601378 + +metrics: + - metric: MAP@100 + command: bin/trec_eval + params: -c -M 100 -m map + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: true + - metric: MRR@100 + command: bin/trec_eval + params: -c -M 100 -m recip_rank + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: true + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@1000 + command: bin/trec_eval + params: -c -m recall.1000 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "[MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" + id: dev + path: topics.msmarco-v2-doc.dev.txt + qrel: qrels.msmarco-v2.1-doc.dev.txt + - name: "[MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html)" + id: dev2 + path: topics.msmarco-v2-doc.dev2.txt + qrel: qrels.msmarco-v2.1-doc.dev2.txt + +models: + - name: splade-v3-onnx + display: SPLADE-v3 + params: -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -encoder SpladeV3 + results: + MAP@100: + - 0.2846 + - 0.2836 + MRR@100: + - 0.2874 + - 0.2869 + R@100: + - 0.8446 + - 0.8462 + R@1000: + - 0.9390 + - 0.9407 diff --git a/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml b/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml new file mode 100644 index 000000000..1460739fc --- /dev/null +++ b/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml @@ -0,0 +1,55 @@ +--- +corpus: msmarco-v2.1-doc-segmented-splade-v3 +corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3 + +index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ +collection_class: JsonVectorCollection +generator_class: DefaultLuceneDocumentGenerator +index_threads: 24 +index_options: -impact -pretokenized +index_stats: + documents: 113520750 + documents (non-empty): 113520750 + total terms: 866904601378 + +metrics: + - metric: nDCG@20 + command: bin/trec_eval + params: -c -m ndcg_cut.20 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: nDCG@100 + command: bin/trec_eval + params: -c -m ndcg_cut.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "RAG 24: Test queries" + id: rag24.test + path: topics.rag24.test.splade-v3.tsv.gz + qrel: qrels.rag24.test.txt + +models: + - name: splade-v3-cached + display: SPLADE-v3 + params: -impact -pretokenized -removeQuery -hits 1000 + results: + nDCG@20: + - 0.4642 + nDCG@100: + - 0.4349 + R@100: + - 0.3198 diff --git a/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml b/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml new file mode 100644 index 000000000..fd1452333 --- /dev/null +++ b/src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml @@ -0,0 +1,55 @@ +--- +corpus: msmarco-v2.1-doc-segmented-splade-v3 +corpus_path: /mnt/collections/msmarco/msmarco_v2.1_doc_segmented_splade-v3 + +index_path: indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ +collection_class: JsonVectorCollection +generator_class: DefaultLuceneDocumentGenerator +index_threads: 24 +index_options: -impact -pretokenized +index_stats: + documents: 113520750 + documents (non-empty): 113520750 + total terms: 866904601378 + +metrics: + - metric: nDCG@20 + command: bin/trec_eval + params: -c -m ndcg_cut.20 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: nDCG@100 + command: bin/trec_eval + params: -c -m ndcg_cut.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + - metric: R@100 + command: bin/trec_eval + params: -c -m recall.100 + separator: "\t" + parse_index: 2 + metric_precision: 4 + can_combine: false + +topic_reader: TsvString +topics: + - name: "RAG 24: Test queries" + id: rag24.test + path: topics.rag24.test.txt + qrel: qrels.rag24.test.txt + +models: + - name: splade-v3-onnx + display: SPLADE-v3 + params: -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3 + results: + nDCG@20: + - 0.4642 + nDCG@100: + - 0.4349 + R@100: + - 0.3198 From bae7b9b30d07d133062e7bb5c0e6a6f5e199cc2b Mon Sep 17 00:00:00 2001 From: clides Date: Wed, 9 Jul 2025 14:54:00 -0400 Subject: [PATCH 09/15] updated git submodule --- tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools b/tools index 9e692ae38..0c2805c4d 160000 --- a/tools +++ b/tools @@ -1 +1 @@ -Subproject commit 9e692ae38c085776431da9f1633fcea7fc814440 +Subproject commit 0c2805c4d00b1e77a776c47f4aef6faef54b6398 From 8cbee30a739e050985efc8bca0ea73c6bbb05172 Mon Sep 17 00:00:00 2001 From: clides Date: Wed, 9 Jul 2025 15:17:46 -0400 Subject: [PATCH 10/15] added regression build files --- ...rco-v2.1-doc-segmented.splade-v3.cached.md | 92 +++++++++++++++++++ ...marco-v2.1-doc-segmented.splade-v3.onnx.md | 92 +++++++++++++++++++ ...oc-segmented-test-nist.splade-v3.cached.md | 82 +++++++++++++++++ ...-doc-segmented-test-nist.splade-v3.onnx.md | 83 +++++++++++++++++ 4 files changed, 349 insertions(+) create mode 100644 docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.cached.md create mode 100644 docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.onnx.md create mode 100644 docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.cached.md create mode 100644 docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md diff --git a/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.cached.md b/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.cached.md new file mode 100644 index 000000000..4d5fc63d7 --- /dev/null +++ b/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.cached.md @@ -0,0 +1,92 @@ +# Anserini Regressions: MS MARCO V2.1 Document Ranking + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries) + +This page describes regression experiments for document ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the dev queries, which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using cached queries (i.e., cached results of query encoding). + +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2.1-doc-segmented.splade-v3.cached +``` + +## Indexing + +Typical indexing command: + +``` +bin/run.sh io.anserini.index.IndexCollection \ + -threads 24 \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \ + -generator DefaultLuceneDocumentGenerator \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -impact -pretokenized \ + >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 & +``` + +The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus. + +For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md). + +## Retrieval + +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. +These evaluation resources are from the original V2 corpus, but have been "projected" over to the V2.1 corpus. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +bin/run.sh io.anserini.search.SearchCollection \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -topics tools/topics-and-qrels/topics.msmarco-v2-doc.dev.tsv.gz \ + -topicReader TsvString \ + -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev.txt \ + -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +bin/run.sh io.anserini.search.SearchCollection \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -topics tools/topics-and-qrels/topics.msmarco-v2-doc.dev2.tsv.gz \ + -topicReader TsvString \ + -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev2.txt \ + -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 & +``` + +Evaluation can be performed using `trec_eval`: + +``` +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev.txt +bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev2.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev2.txt +bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.msmarco-v2-doc.dev2.txt +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +| **MAP@100** | **SPLADE-v3**| +|:-------------------------------------------------------------------------------------------------------------|-----------| +| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.2846 | +| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.2836 | +| **MRR@100** | **SPLADE-v3**| +| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.2874 | +| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.2869 | +| **R@100** | **SPLADE-v3**| +| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.8446 | +| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.8462 | +| **R@1000** | **SPLADE-v3**| +| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.9390 | +| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.9407 | diff --git a/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.onnx.md b/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.onnx.md new file mode 100644 index 000000000..5c2fa5343 --- /dev/null +++ b/docs/regressions/regressions-msmarco-v2.1-doc-segmented.splade-v3.onnx.md @@ -0,0 +1,92 @@ +# Anserini Regressions: MS MARCO V2.1 Document Ranking + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding) + +This page describes regression experiments for document ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the dev queries, which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using ONNX to perform query encoding on the fly. + +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/msmarco-v2.1-doc-segmented.splade-v3.onnx.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/msmarco-v2.1-doc-segmented.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2.1-doc-segmented.splade-v3.onnx +``` + +## Indexing + +Typical indexing command: + +``` +bin/run.sh io.anserini.index.IndexCollection \ + -threads 24 \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \ + -generator DefaultLuceneDocumentGenerator \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -impact -pretokenized \ + >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 & +``` + +The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus. + +For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md). + +## Retrieval + +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. +These evaluation resources are from the original V2 corpus, but have been "projected" over to the V2.1 corpus. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +bin/run.sh io.anserini.search.SearchCollection \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -topics tools/topics-and-qrels/topics.msmarco-v2-doc.dev.txt \ + -topicReader TsvString \ + -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev.txt \ + -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -encoder SpladeV3 & +bin/run.sh io.anserini.search.SearchCollection \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -topics tools/topics-and-qrels/topics.msmarco-v2-doc.dev2.txt \ + -topicReader TsvString \ + -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev2.txt \ + -impact -pretokenized -removeQuery -hits 10000 -selectMaxPassage -selectMaxPassage.delimiter "#" -selectMaxPassage.hits 1000 -encoder SpladeV3 & +``` + +Evaluation can be performed using `trec_eval`: + +``` +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev.txt +bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev2.txt +bin/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev2.txt +bin/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank tools/topics-and-qrels/qrels.msmarco-v2.1-doc.dev2.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.msmarco-v2-doc.dev2.txt +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +| **MAP@100** | **SPLADE-v3**| +|:-------------------------------------------------------------------------------------------------------------|-----------| +| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.2846 | +| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.2836 | +| **MRR@100** | **SPLADE-v3**| +| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.2874 | +| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.2869 | +| **R@100** | **SPLADE-v3**| +| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.8446 | +| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.8462 | +| **R@1000** | **SPLADE-v3**| +| [MS MARCO V2 Doc: Dev](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.9390 | +| [MS MARCO V2 Doc: Dev2](https://microsoft.github.io/msmarco/TREC-Deep-Learning.html) | 0.9407 | diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.cached.md b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.cached.md new file mode 100644 index 000000000..d3d52f9d6 --- /dev/null +++ b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.cached.md @@ -0,0 +1,82 @@ +# Anserini Regressions: TREC 2024 RAG Track Test Topics + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using cached queries) + +This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using cached queries (i.e., cached results of query encoding). + +Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. +These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. +See the following paper for more details: + +> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. + +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.cached.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.cached.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression rag24-doc-segmented-test-nist.splade-v3.cached +``` + +## Indexing + +Sample indexing command: + +``` +bin/run.sh io.anserini.index.IndexCollection \ + -threads 24 \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \ + -generator DefaultLuceneDocumentGenerator \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -impact -pretokenized \ + >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 & +``` + +The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the pre-encoded tokens. +For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md). + +## Retrieval + +Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors. +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +bin/run.sh io.anserini.search.SearchCollection \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -topics tools/topics-and-qrels/topics.rag24.test.splade-v3.tsv.gz \ + -topicReader TsvString \ + -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt \ + -impact -pretokenized -removeQuery -hits 1000 & +``` + +Evaluation can be performed using `trec_eval`: + +``` +bin/trec_eval -c -m ndcg_cut.20 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt +bin/trec_eval -c -m ndcg_cut.100 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-cached.topics.rag24.test.splade-v3.txt +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +| **nDCG@20** | **SPLADE-v3**| +|:-------------------------------------------------------------------------------------------------------------|-----------| +| RAG 24: Test queries | 0.4642 | +| **nDCG@100** | **SPLADE-v3**| +| RAG 24: Test queries | 0.4349 | +| **R@100** | **SPLADE-v3**| +| RAG 24: Test queries | 0.3198 | diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md new file mode 100644 index 000000000..1a87feb24 --- /dev/null +++ b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md @@ -0,0 +1,83 @@ +# Anserini Regressions: TREC 2024 RAG Track Test Topics + +**Model**: [SPLADE-v3](https://arxiv.org/abs/2403.06789) (using ONNX for on-the-fly query encoding) + +This page describes regression experiments for ranking _on the segmented version_ of the MS MARCO V2.1 document corpus using the test topics (= queries in TREC parlance), which is integrated into Anserini's regression testing framework. +This corpus was derived from the MS MARCO V2 _segmented_ document corpus and prepared for the TREC 2024 RAG Track. + +The model itself can be download [here](https://huggingface.co/naver/splade-v3). +See the [official SPLADE repo](https://github.com/naver/splade) and the following paper for more details: + +> Carlos Lassance, Hervé Déjean, Thibault Formal, and Stéphane Clinchant. [SPLADE-v3: New baselines for SPLADE.](https://arxiv.org/abs/2403.06789) _arXiv:2403.06789_. + +In these experiments, we are using ONNX to perform query encoding on the fly. + +Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. +These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. +See the following paper for more details: + +> Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. + +The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml). +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. + +From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: + +``` +python src/main/python/run_regression.py --index --verify --search --regression rag24-doc-segmented-test-nist.splade-v3.onnx +``` + +## Indexing + +Typical indexing command: + +``` +bin/run.sh io.anserini.index.IndexCollection \ + -threads 24 \ + -collection JsonVectorCollection \ + -input /path/to/msmarco-v2.1-doc-segmented-splade-v3 \ + -generator DefaultLuceneDocumentGenerator \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -impact -pretokenized \ + >& logs/log.msmarco-v2.1-doc-segmented-splade-v3 & +``` + +The setting of `-input` should be a directory containing the compressed `jsonl` files that comprise the corpus. + +For additional details, see explanation of [common indexing options](../../docs/common-indexing-options.md). + +## Retrieval + +Here, we are using 89 test topics from the TREC 2024 RAG Track with manual relevance judgments from NIST assessors. +Topics and qrels are stored [here](https://github.com/castorini/anserini-tools/tree/master/topics-and-qrels), which is linked to the Anserini repo as a submodule. + +After indexing has completed, you should be able to perform retrieval as follows: + +``` +bin/run.sh io.anserini.search.SearchCollection \ + -index indexes/lucene-inverted.msmarco-v2.1-doc-segmented.splade-v3/ \ + -topics tools/topics-and-qrels/topics.rag24.test.txt \ + -topicReader TsvString \ + -output runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt \ + -impact -pretokenized -removeQuery -hits 1000 -encoder SpladeV3 & +``` + +Evaluation can be performed using `trec_eval`: + +``` +bin/trec_eval -c -m ndcg_cut.20 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt +bin/trec_eval -c -m ndcg_cut.100 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt +bin/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.rag24.test.txt runs/run.msmarco-v2.1-doc-segmented-splade-v3.splade-v3-onnx.topics.rag24.test.txt +``` + +## Effectiveness + +With the above commands, you should be able to reproduce the following results: + +| **nDCG@20** | **SPLADE-v3**| +|:-------------------------------------------------------------------------------------------------------------|-----------| +| RAG 24: Test queries | 0.4642 | +| **nDCG@100** | **SPLADE-v3**| +| RAG 24: Test queries | 0.4349 | +| **R@100** | **SPLADE-v3**| +| RAG 24: Test queries | 0.3198 | From 51395a96ef3e3edda75efc5ca0292ca4e51755fc Mon Sep 17 00:00:00 2001 From: clides Date: Wed, 9 Jul 2025 16:23:17 -0400 Subject: [PATCH 11/15] update tools --- tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools b/tools index 0c2805c4d..da31c91e5 160000 --- a/tools +++ b/tools @@ -1 +1 @@ -Subproject commit 0c2805c4d00b1e77a776c47f4aef6faef54b6398 +Subproject commit da31c91e59af2678317060e6cdffccc40b22cee0 From 1502a49b4709bc8402c1aef2c6616c035edd7a26 Mon Sep 17 00:00:00 2001 From: clides Date: Wed, 9 Jul 2025 16:45:54 -0400 Subject: [PATCH 12/15] updated tools --- tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools b/tools index da31c91e5..3b506ecb3 160000 --- a/tools +++ b/tools @@ -1 +1 @@ -Subproject commit da31c91e59af2678317060e6cdffccc40b22cee0 +Subproject commit 3b506ecb3e8a19fd596936761d76282f2abeba03 From 45bf6bbf906536a11b083f820f943f1d34f7a5f2 Mon Sep 17 00:00:00 2001 From: clides Date: Wed, 9 Jul 2025 16:50:55 -0400 Subject: [PATCH 13/15] fix template --- ...24-doc-segmented-test-umbrela.splade-v3.cached.template | 5 ++--- ...ag24-doc-segmented-test-umbrela.splade-v3.onnx.template | 7 +++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template index 3c52d7bb9..f9723e2f9 100644 --- a/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template +++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.cached.template @@ -12,9 +12,8 @@ See the [official SPLADE repo](https://github.com/naver/splade) and the followin In these experiments, we are using cached queries (i.e., cached results of query encoding). -Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. -These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. -See the following paper for more details: +Evaluation uses (automatically generated) UMBRELA qrels over all 301 topics from the TREC 2024 RAG Track test set. +UMBRELA is described in the following paper: > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template index 584f20ef6..42e8b8886 100644 --- a/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template +++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template @@ -12,14 +12,13 @@ See the [official SPLADE repo](https://github.com/naver/splade) and the followin In these experiments, we are using ONNX to perform query encoding on the fly. -Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. -These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. -See the following paper for more details: +Evaluation uses (automatically generated) UMBRELA qrels over all 301 topics from the TREC 2024 RAG Track test set. +UMBRELA is described in the following paper: > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. The exact configurations for these regressions are stored in [this YAML file](${yaml}). -Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: From d0dd0380679ad06784c0cd960b158780fa9b690d Mon Sep 17 00:00:00 2001 From: clides Date: Wed, 9 Jul 2025 16:54:09 -0400 Subject: [PATCH 14/15] fix template --- .../rag24-doc-segmented-test-nist.splade-v3.onnx.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template index 584f20ef6..38763a90d 100644 --- a/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template +++ b/src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template @@ -19,7 +19,7 @@ See the following paper for more details: > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. The exact configurations for these regressions are stored in [this YAML file](${yaml}). -Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +Note that this page is automatically generated from [this template](${template}) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: From e66560bb9f8cafdd39ba17bb87d49957a8d6a0ab Mon Sep 17 00:00:00 2001 From: clides Date: Wed, 9 Jul 2025 17:01:50 -0400 Subject: [PATCH 15/15] update build files --- ...essions-rag24-doc-segmented-test-nist.splade-v3.onnx.md | 2 +- ...ns-rag24-doc-segmented-test-umbrela.splade-v3.cached.md | 5 ++--- ...ions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md | 7 +++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md index 1a87feb24..be6866bcc 100644 --- a/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md +++ b/docs/regressions/regressions-rag24-doc-segmented-test-nist.splade-v3.onnx.md @@ -19,7 +19,7 @@ See the following paper for more details: > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-nist.splade-v3.onnx.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-nist.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md index e44250cae..d26dbc29b 100644 --- a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md +++ b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.cached.md @@ -12,9 +12,8 @@ See the [official SPLADE repo](https://github.com/naver/splade) and the followin In these experiments, we are using cached queries (i.e., cached results of query encoding). -Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. -These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. -See the following paper for more details: +Evaluation uses (automatically generated) UMBRELA qrels over all 301 topics from the TREC 2024 RAG Track test set. +UMBRELA is described in the following paper: > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. diff --git a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md index 6addf4921..f51d13c54 100644 --- a/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md +++ b/docs/regressions/regressions-rag24-doc-segmented-test-umbrela.splade-v3.onnx.md @@ -12,14 +12,13 @@ See the [official SPLADE repo](https://github.com/naver/splade) and the followin In these experiments, we are using ONNX to perform query encoding on the fly. -Evaluation uses qrels over 89 topics from the TREC 2024 RAG Track test set. -These qrels represent manual relevance judgments from NIST assessors, contrasted with automatically generated UMBRELA judgments. -See the following paper for more details: +Evaluation uses (automatically generated) UMBRELA qrels over all 301 topics from the TREC 2024 RAG Track test set. +UMBRELA is described in the following paper: > Shivani Upadhyay, Ronak Pradeep, Nandan Thakur, Daniel Campos, Nick Craswell, Ian Soboroff, and Jimmy Lin. A Large-Scale Study of Relevance Assessments with Large Language Models Using UMBRELA. _Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR 2025)_, 2025. The exact configurations for these regressions are stored in [this YAML file](../../src/main/resources/regression/rag24-doc-segmented-test-umbrela.splade-v3.onnx.yaml). -Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead. +Note that this page is automatically generated from [this template](../../src/main/resources/docgen/templates/rag24-doc-segmented-test-umbrela.splade-v3.onnx.template) as part of Anserini's regression pipeline, so do not modify this page directly; modify the template instead and then run `bin/build.sh` to rebuild the documentation. From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end: