From 2a568fde3fb798074a46a4f571f196ff0a53e5cb Mon Sep 17 00:00:00 2001 From: LHT129 Date: Thu, 3 Jul 2025 19:16:11 +0800 Subject: [PATCH] refactor cmake Signed-off-by: LHT129 --- .circleci/fresh_ci_cache.commit | 2 +- .github/workflows/lint.yml | 2 +- Makefile | 4 +- src/CMakeLists.txt | 37 +- src/algorithm/CMakeLists.txt | 17 + src/algorithm/brute_force.cpp | 6 +- src/algorithm/brute_force.h | 6 +- src/algorithm/brute_force_parameter.cpp | 2 +- src/algorithm/hgraph.cpp | 10 +- src/algorithm/hgraph.h | 3 +- src/algorithm/hnswlib/CMakeLists.txt | 10 + src/algorithm/hnswlib/block_manager.h | 2 +- src/algorithm/hnswlib/hnswalg.h | 2 +- src/algorithm/hnswlib/hnswalg_static.h | 2 +- src/algorithm/hnswlib/visited_list_pool.h | 2 +- src/algorithm/inner_index_interface.cpp | 2 +- src/algorithm/inner_index_interface_test.cpp | 2 +- src/algorithm/ivf.cpp | 10 +- src/algorithm/ivf.h | 2 +- src/algorithm/ivf_parameter.cpp | 2 +- src/algorithm/ivf_parameter.h | 3 +- src/algorithm/ivf_partition/CMakeLists.txt | 12 + .../ivf_partition/gno_imi_parameter.cpp | 2 +- .../ivf_partition/gno_imi_parameter.h | 3 +- .../ivf_partition/gno_imi_partition.cpp | 5 +- .../ivf_partition/gno_imi_partition_test.cpp | 2 +- .../ivf_partition/ivf_nearest_partition.cpp | 4 +- .../ivf_nearest_partition_test.cpp | 2 +- .../ivf_partition/ivf_partition_strategy.cpp | 34 ++ .../ivf_partition/ivf_partition_strategy.h | 12 +- .../ivf_partition_strategy_parameter.cpp | 2 +- .../ivf_partition_strategy_parameter.h | 3 +- src/algorithm/pyramid.cpp | 2 +- src/algorithm/pyramid.h | 4 +- src/algorithm/sparse_index.cpp | 4 +- src/algorithm/sparse_index.h | 2 +- src/attr/CMakeLists.txt | 10 + src/attr/attr_type_schema_test.cpp | 2 +- src/{ => attr}/attr_value_map.h | 2 +- src/{ => attr}/attr_value_map_test.cpp | 4 +- src/{ => attr}/attribute.cpp | 0 .../executor/comparison_executor_test.cpp | 2 +- .../executor/integer_list_executor_test.cpp | 2 +- src/attr/executor/logical_executor_test.cpp | 2 +- .../executor/string_list_executor_test.cpp | 2 +- src/data_cell/CMakeLists.txt | 7 + .../attribute_bucket_inverted_datacell.h | 2 +- ...ttribute_bucket_inverted_datacell_test.cpp | 2 +- src/data_cell/attribute_inverted_datacell.h | 2 +- .../attribute_inverted_datacell_test.cpp | 2 +- src/data_cell/bucket_datacell.h | 1 + src/data_cell/bucket_datacell_parameter.cpp | 2 +- src/data_cell/bucket_datacell_test.cpp | 4 +- src/data_cell/bucket_interface.cpp | 2 +- .../compressed_graph_datacell_parameter.h | 3 +- .../compressed_graph_datacell_test.cpp | 4 +- .../extra_info_datacell_parameter.cpp | 2 +- src/data_cell/extra_info_datacell_test.cpp | 4 +- src/data_cell/extra_info_interface.cpp | 2 +- src/data_cell/extra_info_interface_test.cpp | 4 +- src/data_cell/flatten_datacell_parameter.cpp | 2 +- src/data_cell/flatten_datacell_test.cpp | 4 +- src/data_cell/graph_datacell_parameter.cpp | 2 +- src/data_cell/graph_datacell_test.cpp | 4 +- src/data_cell/graph_interface_test.cpp | 4 +- .../sparse_graph_datacell_parameter.h | 1 + src/data_cell/sparse_graph_datacell_test.cpp | 4 +- .../sparse_vector_datacell_parameter.h | 2 +- src/data_cell/sparse_vector_datacell_test.cpp | 2 +- src/dataset_impl.h | 60 ++- src/dataset_impl_test.cpp | 2 +- src/default_thread_pool.cpp | 2 + src/default_thread_pool.h | 1 - src/engine.cpp | 2 +- src/impl/CMakeLists.txt | 20 + src/impl/allocator/CMakeLists.txt | 11 + src/{ => impl/allocator}/allocator_wrapper.h | 0 .../allocator}/default_allocator.cpp | 13 + src/{ => impl/allocator}/default_allocator.h | 13 +- .../allocator}/default_allocator_test.cpp | 0 src/{ => impl/allocator}/safe_allocator.h | 6 +- .../allocator}/safe_allocator_test.cpp | 0 src/impl/basic_optimizer.cpp | 2 + src/impl/basic_optimizer.h | 11 +- src/impl/basic_searcher.cpp | 3 +- src/impl/basic_searcher.h | 11 +- src/impl/basic_searcher_test.cpp | 2 +- src/impl/bitset/CMakeLists.txt | 14 + src/impl/bitset/fast_bitset.h | 2 +- src/impl/bitset/fast_bitset_test.cpp | 1 + src/impl/bitset/sparse_bitset_test.cpp | 2 +- src/impl/conjugate_graph.h | 6 +- src/impl/conjugate_graph_test.cpp | 2 +- src/impl/elias_fano_encoder.cpp | 1 - src/impl/elias_fano_encoder.h | 3 +- src/impl/elias_fano_encoder_test.cpp | 2 +- src/impl/filter/CMakeLists.txt | 2 +- src/impl/filter/extrainfo_wrapper_filter.h | 2 - src/impl/heap/CMakeLists.txt | 11 + src/{utils => impl/heap}/distance_heap.cpp | 9 + src/{utils => impl/heap}/distance_heap.h | 11 +- .../heap}/distance_heap_test.cpp | 2 +- .../heap/memmove_heap.cpp} | 47 +-- src/impl/heap/memmove_heap.h | 57 +++ src/impl/heap/standard_heap.cpp | 44 +++ src/{utils => impl/heap}/standard_heap.h | 23 +- src/impl/kmeans_cluster.cpp | 5 +- src/impl/kmeans_cluster_test.cpp | 2 +- src/impl/odescent_graph_builder.cpp | 1 + src/impl/odescent_graph_builder.h | 7 +- src/impl/odescent_graph_builder_test.cpp | 2 +- src/impl/odescent_graph_parameter.cpp | 2 +- src/impl/pruning_strategy.cpp | 2 +- src/impl/pruning_strategy.h | 4 +- src/impl/reorder.h | 3 +- src/impl/transform/CMakeLists.txt | 6 +- .../transform/fht_kac_rotate_transformer.h | 3 +- .../fht_kac_rotate_transformer_test.cpp | 2 +- src/impl/transform/pca_transformer.cpp | 7 +- src/impl/transform/pca_transformer.h | 5 - src/impl/transform/pca_transformer_test.cpp | 2 +- .../random_orthogonal_transformer.cpp | 2 +- .../random_orthogonal_transformer_test.cpp | 2 +- src/index/diskann_zparameters.cpp | 2 +- src/index/hnsw.cpp | 8 +- src/index/hnsw.h | 3 +- src/index/hnsw_zparameters.cpp | 2 +- src/index/hnsw_zparameters.h | 4 +- src/index/index_common_param.cpp | 2 +- src/index/index_common_param.h | 1 - src/index/iterator_filter_test.cpp | 2 +- src/index_feature_list.cpp | 2 +- src/io/CMakeLists.txt | 4 +- src/io/async_io.cpp | 114 ++++++ src/io/async_io.h | 117 +----- src/io/async_io_test.cpp | 2 +- src/io/basic_io.h | 2 +- src/io/buffer_io.cpp | 91 +++++ src/io/buffer_io.h | 78 +--- src/io/buffer_io_test.cpp | 2 +- src/io/io_context.h | 2 +- src/io/memory_block_io.cpp | 156 ++++++++ src/io/memory_block_io.h | 160 +------- src/io/memory_block_io_test.cpp | 2 +- src/io/memory_io_test.cpp | 2 +- src/io/mmap_io_test.cpp | 2 +- src/label_table.h | 2 +- src/parameter.h | 1 - src/quantization/CMakeLists.txt | 5 + src/quantization/fp32_quantizer_test.cpp | 4 +- .../pq_fastscan_quantizer.cpp | 369 ++++++++++++++++++ .../pq_fastscan_quantizer.h | 357 +---------------- .../pq_fastscan_quantizer_test.cpp | 2 +- .../product_quantizer.cpp | 79 ++++ .../product_quantization/product_quantizer.h | 77 +--- .../product_quantizer_test.cpp | 2 +- src/quantization/quantizer.h | 2 +- src/quantization/quantizer_parameter.cpp | 2 +- .../rabitq_quantizer_test.cpp | 5 +- .../bf16_quantizer_test.cpp | 4 +- .../fp16_quantizer_test.cpp | 2 +- .../sq4_quantizer_test.cpp | 4 +- .../sq4_uniform_quantizer_test.cpp | 2 +- .../sq8_quantizer_test.cpp | 4 +- .../sq8_uniform_quantizer_test.cpp | 2 +- src/resource.cpp | 2 +- src/storage/CMakeLists.txt | 2 +- src/storage/footer.h | 3 +- src/storage/stream_reader.cpp | 2 +- src/typing.h | 2 +- src/utils/CMakeLists.txt | 13 + src/utils/linear_congruential_generator.cpp | 33 ++ src/utils/linear_congruential_generator.h | 20 +- src/utils/resource_object_pool.h | 2 +- src/utils/util_functions.cpp | 6 +- src/utils/util_functions.h | 4 +- src/utils/visited_list.cpp | 41 ++ src/utils/visited_list.h | 28 +- src/utils/visited_list_test.cpp | 2 +- tests/test_fc_visitor.cpp | 2 +- 180 files changed, 1502 insertions(+), 1117 deletions(-) create mode 100644 src/algorithm/CMakeLists.txt create mode 100644 src/algorithm/hnswlib/CMakeLists.txt create mode 100644 src/algorithm/ivf_partition/CMakeLists.txt create mode 100644 src/algorithm/ivf_partition/ivf_partition_strategy.cpp create mode 100644 src/attr/CMakeLists.txt rename src/{ => attr}/attr_value_map.h (99%) rename src/{ => attr}/attr_value_map_test.cpp (96%) rename src/{ => attr}/attribute.cpp (100%) create mode 100644 src/data_cell/CMakeLists.txt create mode 100644 src/impl/CMakeLists.txt create mode 100644 src/impl/allocator/CMakeLists.txt rename src/{ => impl/allocator}/allocator_wrapper.h (100%) rename src/{ => impl/allocator}/default_allocator.cpp (86%) rename src/{ => impl/allocator}/default_allocator.h (81%) rename src/{ => impl/allocator}/default_allocator_test.cpp (100%) rename src/{ => impl/allocator}/safe_allocator.h (90%) rename src/{ => impl/allocator}/safe_allocator_test.cpp (100%) create mode 100644 src/impl/bitset/CMakeLists.txt create mode 100644 src/impl/heap/CMakeLists.txt rename src/{utils => impl/heap}/distance_heap.cpp (83%) rename src/{utils => impl/heap}/distance_heap.h (89%) rename src/{utils => impl/heap}/distance_heap_test.cpp (98%) rename src/{utils/memmove_heap.h => impl/heap/memmove_heap.cpp} (78%) create mode 100644 src/impl/heap/memmove_heap.h create mode 100644 src/impl/heap/standard_heap.cpp rename src/{utils => impl/heap}/standard_heap.h (68%) create mode 100644 src/io/buffer_io.cpp create mode 100644 src/io/memory_block_io.cpp create mode 100644 src/quantization/product_quantization/pq_fastscan_quantizer.cpp create mode 100644 src/quantization/product_quantization/product_quantizer.cpp create mode 100644 src/utils/CMakeLists.txt create mode 100644 src/utils/linear_congruential_generator.cpp create mode 100644 src/utils/visited_list.cpp diff --git a/.circleci/fresh_ci_cache.commit b/.circleci/fresh_ci_cache.commit index bca884aa8..5399d9346 100644 --- a/.circleci/fresh_ci_cache.commit +++ b/.circleci/fresh_ci_cache.commit @@ -1 +1 @@ -39aebf3c03c5e26e8fb158b3ba54ba79a3f9513d +9f035bade6ddca04b17aff63de4a409f3d0f9de1 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 483846bb6..edf6841a1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -29,4 +29,4 @@ jobs: save: ${{ github.event_name != 'pull_request' }} key: build-lint-${{ hashFiles('./CMakeLists.txt') }}-${{ hashFiles('./.circleci/fresh_ci_cache.commit') }} - name: Run lint - run: export CMAKE_GENERATOR="Ninja" && make debug && make lint + run: export CMAKE_GENERATOR="Ninja" && make release && make lint diff --git a/Makefile b/Makefile index a15e08a54..6fea1cf42 100644 --- a/Makefile +++ b/Makefile @@ -86,11 +86,11 @@ cov: ## Build unit tests with code coverage enabled. .PHONEY: lint lint: ## Check coding styles defined in `.clang-tidy`. - @./scripts/linters/run-clang-tidy.py -p build/ -use-color -source-filter '^.*vsag\/src.*(?(heap->Size()), allocator_); + create_fast_dataset(static_cast(heap->Size()), allocator_); for (auto j = static_cast(heap->Size() - 1); j >= 0; --j) { dists[j] = heap->Top().first; ids[j] = this->label_table_->GetLabelById(heap->Top().second); @@ -168,7 +168,7 @@ BruteForce::RangeSearch(const vsag::DatasetPtr& query, } auto [dataset_results, dists, ids] = - CreateFastDataset(static_cast(heap->Size()), allocator_); + create_fast_dataset(static_cast(heap->Size()), allocator_); for (auto j = static_cast(heap->Size() - 1); j >= 0; --j) { dists[j] = heap->Top().first; ids[j] = this->label_table_->GetLabelById(heap->Top().second); diff --git a/src/algorithm/brute_force.h b/src/algorithm/brute_force.h index f1ef4fad9..d9340b888 100644 --- a/src/algorithm/brute_force.h +++ b/src/algorithm/brute_force.h @@ -17,12 +17,10 @@ #include "algorithm/inner_index_interface.h" #include "brute_force_parameter.h" -#include "common.h" -#include "data_cell/flatten_datacell.h" -#include "impl/filter/filter_headers.h" -#include "index_feature_list.h" +#include "data_cell/flatten_interface.h" #include "label_table.h" #include "typing.h" +#include "vsag/filter.h" namespace vsag { diff --git a/src/algorithm/brute_force_parameter.cpp b/src/algorithm/brute_force_parameter.cpp index 79812ec2e..7d5584475 100644 --- a/src/algorithm/brute_force_parameter.cpp +++ b/src/algorithm/brute_force_parameter.cpp @@ -15,7 +15,7 @@ #include "brute_force_parameter.h" -#include +#include #include "vsag/constants.h" diff --git a/src/algorithm/hgraph.cpp b/src/algorithm/hgraph.cpp index 4eb0306b5..4a1c7ce2e 100644 --- a/src/algorithm/hgraph.cpp +++ b/src/algorithm/hgraph.cpp @@ -16,7 +16,7 @@ #include "hgraph.h" #include -#include +#include #include #include @@ -25,6 +25,7 @@ #include "data_cell/graph_datacell_parameter.h" #include "data_cell/sparse_graph_datacell.h" #include "dataset_impl.h" +#include "impl/heap/standard_heap.h" #include "impl/odescent_graph_builder.h" #include "impl/pruning_strategy.h" #include "impl/reorder.h" @@ -34,7 +35,6 @@ #include "storage/serialization.h" #include "storage/stream_reader.h" #include "typing.h" -#include "utils/standard_heap.h" #include "utils/util_functions.h" #include "vsag/options.h" @@ -358,7 +358,7 @@ HGraph::KnnSearch(const DatasetPtr& query, return DatasetImpl::MakeEmptyDataset(); } auto count = static_cast(search_result->Size()); - auto [dataset_results, dists, ids] = CreateFastDataset(count, search_allocator); + auto [dataset_results, dists, ids] = create_fast_dataset(count, search_allocator); char* extra_infos = nullptr; if (extra_info_size_ > 0) { extra_infos = (char*)search_allocator->Allocate(extra_info_size_ * search_result->Size()); @@ -473,7 +473,7 @@ HGraph::KnnSearch(const DatasetPtr& query, return DatasetImpl::MakeEmptyDataset(); } auto count = static_cast(search_result->Size()); - auto [dataset_results, dists, ids] = CreateFastDataset(count, search_allocator); + auto [dataset_results, dists, ids] = create_fast_dataset(count, search_allocator); char* extra_infos = nullptr; if (extra_info_size_ > 0) { extra_infos = (char*)search_allocator->Allocate(extra_info_size_ * search_result->Size()); @@ -633,7 +633,7 @@ HGraph::RangeSearch(const DatasetPtr& query, } auto count = static_cast(search_result->Size()); - auto [dataset_results, dists, ids] = CreateFastDataset(count, allocator_); + auto [dataset_results, dists, ids] = create_fast_dataset(count, allocator_); char* extra_infos = nullptr; if (extra_info_size_ > 0) { extra_infos = (char*)allocator_->Allocate(extra_info_size_ * search_result->Size()); diff --git a/src/algorithm/hgraph.h b/src/algorithm/hgraph.h index 1d6836e06..d8b9113f5 100644 --- a/src/algorithm/hgraph.h +++ b/src/algorithm/hgraph.h @@ -30,14 +30,15 @@ #include "data_cell/sparse_graph_datacell_parameter.h" #include "default_thread_pool.h" #include "hgraph_parameter.h" +#include "impl/basic_optimizer.h" #include "impl/basic_searcher.h" +#include "impl/heap/distance_heap.h" #include "index/index_common_param.h" #include "index/iterator_filter.h" #include "index_feature_list.h" #include "inner_index_interface.h" #include "lock_strategy.h" #include "typing.h" -#include "utils/distance_heap.h" #include "utils/visited_list.h" #include "vsag/index.h" #include "vsag/index_features.h" diff --git a/src/algorithm/hnswlib/CMakeLists.txt b/src/algorithm/hnswlib/CMakeLists.txt new file mode 100644 index 000000000..cfdb407d5 --- /dev/null +++ b/src/algorithm/hnswlib/CMakeLists.txt @@ -0,0 +1,10 @@ + +set (HNSWLIB_SRCS + block_manager.cpp + algorithm_interface.cpp + hnswalg.cpp +) + +add_library (hnswlib OBJECT ${HNSWLIB_SRCS}) +target_link_libraries (hnswlib PUBLIC coverage_config) +add_dependencies (hnswlib spdlog fmt::fmt) diff --git a/src/algorithm/hnswlib/block_manager.h b/src/algorithm/hnswlib/block_manager.h index e459407c1..4c066d501 100644 --- a/src/algorithm/hnswlib/block_manager.h +++ b/src/algorithm/hnswlib/block_manager.h @@ -20,7 +20,7 @@ #include #include -#include "../../default_allocator.h" +#include "impl/allocator/default_allocator.h" #include "storage/stream_reader.h" #include "storage/stream_writer.h" diff --git a/src/algorithm/hnswlib/hnswalg.h b/src/algorithm/hnswlib/hnswalg.h index 3727e15fa..c66c88d0c 100644 --- a/src/algorithm/hnswlib/hnswalg.h +++ b/src/algorithm/hnswlib/hnswalg.h @@ -36,7 +36,7 @@ #include "block_manager.h" #include "data_cell/flatten_interface.h" #include "data_cell/graph_interface.h" -#include "default_allocator.h" +#include "impl/allocator/default_allocator.h" #include "index/iterator_filter.h" #include "prefetch.h" #include "simd/simd.h" diff --git a/src/algorithm/hnswlib/hnswalg_static.h b/src/algorithm/hnswlib/hnswalg_static.h index a0ea122bc..b596526f9 100644 --- a/src/algorithm/hnswlib/hnswalg_static.h +++ b/src/algorithm/hnswlib/hnswalg_static.h @@ -30,8 +30,8 @@ #include #include //#include -#include "../../default_allocator.h" #include "hnswlib.h" +#include "impl/allocator/default_allocator.h" #include "storage/stream_reader.h" #include "visited_list_pool.h" diff --git a/src/algorithm/hnswlib/visited_list_pool.h b/src/algorithm/hnswlib/visited_list_pool.h index 5092eb76c..934abaf6f 100644 --- a/src/algorithm/hnswlib/visited_list_pool.h +++ b/src/algorithm/hnswlib/visited_list_pool.h @@ -21,7 +21,7 @@ #include #include -#include "../../default_allocator.h" +#include "impl/allocator/default_allocator.h" #include "storage/stream_writer.h" namespace vsag { diff --git a/src/algorithm/inner_index_interface.cpp b/src/algorithm/inner_index_interface.cpp index bb04a5f5d..437b7e4be 100644 --- a/src/algorithm/inner_index_interface.cpp +++ b/src/algorithm/inner_index_interface.cpp @@ -15,7 +15,7 @@ #include "inner_index_interface.h" -#include +#include #include "brute_force.h" #include "empty_index_binary_set.h" diff --git a/src/algorithm/inner_index_interface_test.cpp b/src/algorithm/inner_index_interface_test.cpp index c425e5534..7b30815b3 100644 --- a/src/algorithm/inner_index_interface_test.cpp +++ b/src/algorithm/inner_index_interface_test.cpp @@ -21,7 +21,7 @@ #include "brute_force.h" #include "hgraph.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/algorithm/ivf.cpp b/src/algorithm/ivf.cpp index 5c7635475..70199f866 100644 --- a/src/algorithm/ivf.cpp +++ b/src/algorithm/ivf.cpp @@ -21,6 +21,7 @@ #include "attr/executor/executor.h" #include "attr/expression_visitor.h" #include "impl/basic_searcher.h" +#include "impl/heap/standard_heap.h" #include "impl/reorder.h" #include "index/index_impl.h" #include "inner_string_params.h" @@ -29,7 +30,6 @@ #include "storage/serialization.h" #include "storage/stream_reader.h" #include "storage/stream_writer.h" -#include "utils/standard_heap.h" #include "utils/util_functions.h" namespace vsag { @@ -362,7 +362,7 @@ IVF::KnnSearch(const DatasetPtr& query, return reorder(k, search_result, query->GetFloat32Vectors()); } auto count = static_cast(search_result->Size()); - auto [dataset_results, dists, labels] = CreateFastDataset(count, allocator_); + auto [dataset_results, dists, labels] = create_fast_dataset(count, allocator_); for (int64_t j = count - 1; j >= 0; --j) { dists[j] = search_result->Top().first; labels[j] = label_table_->GetLabelById(search_result->Top().second); @@ -391,7 +391,7 @@ IVF::RangeSearch(const DatasetPtr& query, return reorder(k, search_result, query->GetFloat32Vectors()); } auto count = static_cast(search_result->Size()); - auto [dataset_results, dists, labels] = CreateFastDataset(count, allocator_); + auto [dataset_results, dists, labels] = create_fast_dataset(count, allocator_); for (int64_t j = count - 1; j >= 0; --j) { dists[j] = search_result->Top().first; labels[j] = label_table_->GetLabelById(search_result->Top().second); @@ -550,7 +550,7 @@ IVF::create_search_param(const std::string& parameters, const FilterPtr& filter) DatasetPtr IVF::reorder(int64_t topk, DistHeapPtr& input, const float* query) const { - auto [dataset_results, dists, labels] = CreateFastDataset(topk, allocator_); + auto [dataset_results, dists, labels] = create_fast_dataset(topk, allocator_); auto reorder_heap = Reorder::ReorderByFlatten(input, reorder_codes_, query, allocator_, topk); for (int64_t j = topk - 1; j >= 0; --j) { dists[j] = reorder_heap->Top().first; @@ -773,7 +773,7 @@ IVF::SearchWithRequest(const SearchRequest& request) const { return reorder(request.topk_, search_result, query->GetFloat32Vectors()); } auto count = static_cast(search_result->Size()); - auto [dataset_results, dists, labels] = CreateFastDataset(count, allocator_); + auto [dataset_results, dists, labels] = create_fast_dataset(count, allocator_); for (int64_t j = count - 1; j >= 0; --j) { dists[j] = search_result->Top().first; labels[j] = label_table_->GetLabelById(search_result->Top().second); diff --git a/src/algorithm/ivf.h b/src/algorithm/ivf.h index 7c59f8bd0..fbaabbc8f 100644 --- a/src/algorithm/ivf.h +++ b/src/algorithm/ivf.h @@ -19,6 +19,7 @@ #include "data_cell/bucket_datacell.h" #include "data_cell/flatten_interface.h" #include "impl/basic_searcher.h" +#include "impl/heap/distance_heap.h" #include "index/index_common_param.h" #include "inner_index_interface.h" #include "ivf_parameter.h" @@ -26,7 +27,6 @@ #include "storage/stream_reader.h" #include "storage/stream_writer.h" #include "typing.h" -#include "utils/distance_heap.h" #include "vsag/index.h" namespace vsag { diff --git a/src/algorithm/ivf_parameter.cpp b/src/algorithm/ivf_parameter.cpp index be911748d..0bf2c9b28 100644 --- a/src/algorithm/ivf_parameter.cpp +++ b/src/algorithm/ivf_parameter.cpp @@ -15,7 +15,7 @@ #include "ivf_parameter.h" -#include +#include #include "inner_string_params.h" #include "vsag/constants.h" diff --git a/src/algorithm/ivf_parameter.h b/src/algorithm/ivf_parameter.h index bb8242aa8..e34131e65 100644 --- a/src/algorithm/ivf_parameter.h +++ b/src/algorithm/ivf_parameter.h @@ -14,11 +14,12 @@ // limitations under the License. #pragma once +#include + #include "algorithm/ivf_partition/ivf_nearest_partition.h" #include "algorithm/ivf_partition/ivf_partition_strategy_parameter.h" #include "data_cell/bucket_datacell_parameter.h" #include "data_cell/flatten_datacell_parameter.h" -#include "fmt/format-inl.h" #include "inner_string_params.h" #include "parameter.h" #include "typing.h" diff --git a/src/algorithm/ivf_partition/CMakeLists.txt b/src/algorithm/ivf_partition/CMakeLists.txt new file mode 100644 index 000000000..2c26580d8 --- /dev/null +++ b/src/algorithm/ivf_partition/CMakeLists.txt @@ -0,0 +1,12 @@ + +set (IVF_PARTITION_SRCS + gno_imi_parameter.cpp + ivf_nearest_partition.cpp + gno_imi_partition.cpp + ivf_partition_strategy_parameter.cpp + ivf_partition_strategy.cpp +) + +add_library (ivf_partition OBJECT ${IVF_PARTITION_SRCS}) +target_link_libraries (ivf_partition PUBLIC coverage_config) +maybe_add_dependencies (ivf_partition spdlog fmt::fmt mkl openblas) diff --git a/src/algorithm/ivf_partition/gno_imi_parameter.cpp b/src/algorithm/ivf_partition/gno_imi_parameter.cpp index 8e891ccd2..914fcf35b 100644 --- a/src/algorithm/ivf_partition/gno_imi_parameter.cpp +++ b/src/algorithm/ivf_partition/gno_imi_parameter.cpp @@ -15,7 +15,7 @@ #include "gno_imi_parameter.h" -#include +#include #include diff --git a/src/algorithm/ivf_partition/gno_imi_parameter.h b/src/algorithm/ivf_partition/gno_imi_parameter.h index c33850cdc..8a2b2d455 100644 --- a/src/algorithm/ivf_partition/gno_imi_parameter.h +++ b/src/algorithm/ivf_partition/gno_imi_parameter.h @@ -14,8 +14,9 @@ // limitations under the License. #pragma once +#include + #include "data_cell/bucket_datacell_parameter.h" -#include "fmt/format-inl.h" #include "inner_string_params.h" #include "parameter.h" #include "typing.h" diff --git a/src/algorithm/ivf_partition/gno_imi_partition.cpp b/src/algorithm/ivf_partition/gno_imi_partition.cpp index 48bdf8a45..c23293474 100644 --- a/src/algorithm/ivf_partition/gno_imi_partition.cpp +++ b/src/algorithm/ivf_partition/gno_imi_partition.cpp @@ -15,14 +15,15 @@ #include "gno_imi_partition.h" -#include +#include +#include #include #include +#include "impl/allocator/safe_allocator.h" #include "impl/kmeans_cluster.h" #include "inner_string_params.h" -#include "safe_allocator.h" #include "utils/util_functions.h" namespace vsag { diff --git a/src/algorithm/ivf_partition/gno_imi_partition_test.cpp b/src/algorithm/ivf_partition/gno_imi_partition_test.cpp index 60ba92eeb..0a8d4f8b7 100644 --- a/src/algorithm/ivf_partition/gno_imi_partition_test.cpp +++ b/src/algorithm/ivf_partition/gno_imi_partition_test.cpp @@ -19,8 +19,8 @@ #include "algorithm/ivf_parameter.h" #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "impl/basic_searcher.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/algorithm/ivf_partition/ivf_nearest_partition.cpp b/src/algorithm/ivf_partition/ivf_nearest_partition.cpp index 0dd5fa699..6c82b1504 100644 --- a/src/algorithm/ivf_partition/ivf_nearest_partition.cpp +++ b/src/algorithm/ivf_partition/ivf_nearest_partition.cpp @@ -15,12 +15,12 @@ #include "ivf_nearest_partition.h" -#include +#include #include "algorithm/hgraph.h" +#include "impl/allocator/safe_allocator.h" #include "impl/kmeans_cluster.h" #include "inner_string_params.h" -#include "safe_allocator.h" #include "utils/util_functions.h" namespace vsag { diff --git a/src/algorithm/ivf_partition/ivf_nearest_partition_test.cpp b/src/algorithm/ivf_partition/ivf_nearest_partition_test.cpp index bbab89331..979d51be6 100644 --- a/src/algorithm/ivf_partition/ivf_nearest_partition_test.cpp +++ b/src/algorithm/ivf_partition/ivf_nearest_partition_test.cpp @@ -18,7 +18,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "safe_thread_pool.h" using namespace vsag; diff --git a/src/algorithm/ivf_partition/ivf_partition_strategy.cpp b/src/algorithm/ivf_partition/ivf_partition_strategy.cpp new file mode 100644 index 000000000..ea951d9f6 --- /dev/null +++ b/src/algorithm/ivf_partition/ivf_partition_strategy.cpp @@ -0,0 +1,34 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ivf_partition_strategy.h" + +#include + +namespace vsag { + +void +IVFPartitionStrategy::GetResidual( + size_t n, const float* x, float* residuals, float* centroids, BucketIdType* assign) { + // TODO: Directly implement c = a - b. + memcpy(residuals, x, sizeof(float) * n * dim_); + for (size_t i = 0; i < n; ++i) { + BucketIdType bucket_id = assign[i]; + cblas_saxpy( + static_cast(dim_), -1.0, centroids + bucket_id * dim_, 1, residuals + i * dim_, 1); + } +} + +} // namespace vsag diff --git a/src/algorithm/ivf_partition/ivf_partition_strategy.h b/src/algorithm/ivf_partition/ivf_partition_strategy.h index d06ed0ab2..54c21f04d 100644 --- a/src/algorithm/ivf_partition/ivf_partition_strategy.h +++ b/src/algorithm/ivf_partition/ivf_partition_strategy.h @@ -15,8 +15,6 @@ #pragma once -#include - #include #include @@ -82,15 +80,7 @@ class IVFPartitionStrategy { } virtual void - GetResidual( - size_t n, const float* x, float* residuals, float* centroids, BucketIdType* assign) { - // TODO: Directly implement c = a - b. - memcpy(residuals, x, sizeof(float) * n * dim_); - for (size_t i = 0; i < n; ++i) { - BucketIdType bucket_id = assign[i]; - cblas_saxpy(dim_, -1.0, centroids + bucket_id * dim_, 1, residuals + i * dim_, 1); - } - } + GetResidual(size_t n, const float* x, float* residuals, float* centroids, BucketIdType* assign); public: bool is_trained_{false}; diff --git a/src/algorithm/ivf_partition/ivf_partition_strategy_parameter.cpp b/src/algorithm/ivf_partition/ivf_partition_strategy_parameter.cpp index 025fff389..660bc1828 100644 --- a/src/algorithm/ivf_partition/ivf_partition_strategy_parameter.cpp +++ b/src/algorithm/ivf_partition/ivf_partition_strategy_parameter.cpp @@ -15,7 +15,7 @@ #include "ivf_partition_strategy_parameter.h" -#include +#include #include diff --git a/src/algorithm/ivf_partition/ivf_partition_strategy_parameter.h b/src/algorithm/ivf_partition/ivf_partition_strategy_parameter.h index ce2700a20..37f42457b 100644 --- a/src/algorithm/ivf_partition/ivf_partition_strategy_parameter.h +++ b/src/algorithm/ivf_partition/ivf_partition_strategy_parameter.h @@ -14,8 +14,9 @@ // limitations under the License. #pragma once +#include + #include "data_cell/bucket_datacell_parameter.h" -#include "fmt/format-inl.h" #include "gno_imi_parameter.h" #include "inner_string_params.h" #include "parameter.h" diff --git a/src/algorithm/pyramid.cpp b/src/algorithm/pyramid.cpp index 3e40cf843..c581b0762 100644 --- a/src/algorithm/pyramid.cpp +++ b/src/algorithm/pyramid.cpp @@ -17,12 +17,12 @@ #include "data_cell/flatten_interface.h" #include "empty_index_binary_set.h" +#include "impl/heap/standard_heap.h" #include "impl/odescent_graph_builder.h" #include "impl/pruning_strategy.h" #include "io/memory_io_parameter.h" #include "storage/serialization.h" #include "utils/slow_task_timer.h" -#include "utils/standard_heap.h" namespace vsag { diff --git a/src/algorithm/pyramid.h b/src/algorithm/pyramid.h index c87b9446a..bd8482957 100644 --- a/src/algorithm/pyramid.h +++ b/src/algorithm/pyramid.h @@ -18,8 +18,10 @@ #include #include "data_cell/graph_interface.h" +#include "impl/allocator/safe_allocator.h" #include "impl/basic_searcher.h" #include "impl/filter/filter_headers.h" +#include "impl/heap/distance_heap.h" #include "impl/odescent_graph_builder.h" #include "index_feature_list.h" #include "inner_index_interface.h" @@ -27,8 +29,6 @@ #include "logger.h" #include "pyramid_zparameters.h" #include "quantization/fp32_quantizer_parameter.h" -#include "safe_allocator.h" -#include "utils/distance_heap.h" namespace vsag { diff --git a/src/algorithm/sparse_index.cpp b/src/algorithm/sparse_index.cpp index efb4cf678..0eca50271 100644 --- a/src/algorithm/sparse_index.cpp +++ b/src/algorithm/sparse_index.cpp @@ -15,7 +15,7 @@ #include "sparse_index.h" -#include "utils/standard_heap.h" +#include "impl/heap/standard_heap.h" #include "utils/util_functions.h" namespace vsag { @@ -172,7 +172,7 @@ SparseIndex::RangeSearch(const DatasetPtr& query, DatasetPtr SparseIndex::collect_results(const DistHeapPtr& results) const { auto [result, dists, ids] = - CreateFastDataset(static_cast(results->Size()), allocator_); + create_fast_dataset(static_cast(results->Size()), allocator_); if (results->Empty()) { result->Dim(0)->NumElements(1); return result; diff --git a/src/algorithm/sparse_index.h b/src/algorithm/sparse_index.h index 3b38c5534..b8e78a8f6 100644 --- a/src/algorithm/sparse_index.h +++ b/src/algorithm/sparse_index.h @@ -15,9 +15,9 @@ #pragma once +#include "impl/heap/distance_heap.h" #include "inner_index_interface.h" #include "sparse_index_parameters.h" -#include "utils/distance_heap.h" namespace vsag { diff --git a/src/attr/CMakeLists.txt b/src/attr/CMakeLists.txt new file mode 100644 index 000000000..34ab9f07a --- /dev/null +++ b/src/attr/CMakeLists.txt @@ -0,0 +1,10 @@ + +file (GLOB ATTR_SRCS "executor/*.cpp") +list (FILTER ATTR_SRCS EXCLUDE REGEX "_test.cpp") + +list (APPEND ATTR_SRCS expression_visitor.cpp attribute.cpp attr_type_schema.cpp) + +add_library (attr OBJECT ${ATTR_SRCS}) +target_link_libraries (attr PUBLIC coverage_config) + +add_dependencies (attr spdlog fmt::fmt antlr4) diff --git a/src/attr/attr_type_schema_test.cpp b/src/attr/attr_type_schema_test.cpp index 36608bcee..b26ecf7a6 100644 --- a/src/attr/attr_type_schema_test.cpp +++ b/src/attr/attr_type_schema_test.cpp @@ -17,7 +17,7 @@ #include -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/attr_value_map.h b/src/attr/attr_value_map.h similarity index 99% rename from src/attr_value_map.h rename to src/attr/attr_value_map.h index 3cb655dd5..bc300003e 100644 --- a/src/attr_value_map.h +++ b/src/attr/attr_value_map.h @@ -16,8 +16,8 @@ #pragma once #include +#include "impl/allocator/safe_allocator.h" #include "impl/bitset/computable_bitset.h" -#include "safe_allocator.h" #include "storage/stream_reader.h" #include "storage/stream_writer.h" #include "typing.h" diff --git a/src/attr_value_map_test.cpp b/src/attr/attr_value_map_test.cpp similarity index 96% rename from src/attr_value_map_test.cpp rename to src/attr/attr_value_map_test.cpp index b115ea655..979502191 100644 --- a/src/attr_value_map_test.cpp +++ b/src/attr/attr_value_map_test.cpp @@ -13,12 +13,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "attr_value_map.h" +#include "attr/attr_value_map.h" #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/attribute.cpp b/src/attr/attribute.cpp similarity index 100% rename from src/attribute.cpp rename to src/attr/attribute.cpp diff --git a/src/attr/executor/comparison_executor_test.cpp b/src/attr/executor/comparison_executor_test.cpp index 1c8a8869c..d87e65f4c 100644 --- a/src/attr/executor/comparison_executor_test.cpp +++ b/src/attr/executor/comparison_executor_test.cpp @@ -20,7 +20,7 @@ #include "attr/expression_visitor.h" #include "data_cell/attribute_inverted_interface.h" #include "executor_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/attr/executor/integer_list_executor_test.cpp b/src/attr/executor/integer_list_executor_test.cpp index a8ab7b521..1e5cf9341 100644 --- a/src/attr/executor/integer_list_executor_test.cpp +++ b/src/attr/executor/integer_list_executor_test.cpp @@ -19,7 +19,7 @@ #include "attr/expression_visitor.h" #include "executor_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/attr/executor/logical_executor_test.cpp b/src/attr/executor/logical_executor_test.cpp index 05a8a9a83..0596e718b 100644 --- a/src/attr/executor/logical_executor_test.cpp +++ b/src/attr/executor/logical_executor_test.cpp @@ -19,7 +19,7 @@ #include "attr/expression_visitor.h" #include "executor_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/attr/executor/string_list_executor_test.cpp b/src/attr/executor/string_list_executor_test.cpp index 63466efdc..a33ec70cf 100644 --- a/src/attr/executor/string_list_executor_test.cpp +++ b/src/attr/executor/string_list_executor_test.cpp @@ -19,7 +19,7 @@ #include "attr/expression_visitor.h" #include "executor_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; template diff --git a/src/data_cell/CMakeLists.txt b/src/data_cell/CMakeLists.txt new file mode 100644 index 000000000..cd64a03a5 --- /dev/null +++ b/src/data_cell/CMakeLists.txt @@ -0,0 +1,7 @@ + +file (GLOB DATACELL_SRCS "*.cpp") +list (FILTER DATACELL_SRCS EXCLUDE REGEX "_test.cpp") + +add_library (datacell OBJECT ${DATACELL_SRCS}) +target_link_libraries(datacell PRIVATE transform quantizer io attr coverage_config) +add_dependencies (datacell spdlog fmt::fmt) diff --git a/src/data_cell/attribute_bucket_inverted_datacell.h b/src/data_cell/attribute_bucket_inverted_datacell.h index 8c46a2e38..29497e4c8 100644 --- a/src/data_cell/attribute_bucket_inverted_datacell.h +++ b/src/data_cell/attribute_bucket_inverted_datacell.h @@ -18,7 +18,7 @@ #include #include -#include "attr_value_map.h" +#include "attr/attr_value_map.h" #include "attribute_inverted_interface.h" #include "vsag_exception.h" diff --git a/src/data_cell/attribute_bucket_inverted_datacell_test.cpp b/src/data_cell/attribute_bucket_inverted_datacell_test.cpp index a1ff27edb..3a9e5af8b 100644 --- a/src/data_cell/attribute_bucket_inverted_datacell_test.cpp +++ b/src/data_cell/attribute_bucket_inverted_datacell_test.cpp @@ -18,7 +18,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/data_cell/attribute_inverted_datacell.h b/src/data_cell/attribute_inverted_datacell.h index 06e176c2a..d7b60a7b2 100644 --- a/src/data_cell/attribute_inverted_datacell.h +++ b/src/data_cell/attribute_inverted_datacell.h @@ -18,7 +18,7 @@ #include #include -#include "attr_value_map.h" +#include "attr/attr_value_map.h" #include "attribute_inverted_interface.h" #include "vsag_exception.h" diff --git a/src/data_cell/attribute_inverted_datacell_test.cpp b/src/data_cell/attribute_inverted_datacell_test.cpp index 426ada3fd..1edbbc4d2 100644 --- a/src/data_cell/attribute_inverted_datacell_test.cpp +++ b/src/data_cell/attribute_inverted_datacell_test.cpp @@ -18,7 +18,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/data_cell/bucket_datacell.h b/src/data_cell/bucket_datacell.h index 429deae1a..6e41733c3 100644 --- a/src/data_cell/bucket_datacell.h +++ b/src/data_cell/bucket_datacell.h @@ -20,6 +20,7 @@ #include "bucket_interface.h" #include "byte_buffer.h" #include "quantization/product_quantization/pq_fastscan_quantizer.h" +#include "simd/fp32_simd.h" namespace vsag { diff --git a/src/data_cell/bucket_datacell_parameter.cpp b/src/data_cell/bucket_datacell_parameter.cpp index 87582df15..2f19e296b 100644 --- a/src/data_cell/bucket_datacell_parameter.cpp +++ b/src/data_cell/bucket_datacell_parameter.cpp @@ -15,7 +15,7 @@ #include "bucket_datacell_parameter.h" -#include +#include #include "inner_string_params.h" diff --git a/src/data_cell/bucket_datacell_test.cpp b/src/data_cell/bucket_datacell_test.cpp index 7e8dd3e7d..08883a4a7 100644 --- a/src/data_cell/bucket_datacell_test.cpp +++ b/src/data_cell/bucket_datacell_test.cpp @@ -20,9 +20,9 @@ #include #include -#include "default_allocator.h" #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "simd/simd.h" using namespace vsag; diff --git a/src/data_cell/bucket_interface.cpp b/src/data_cell/bucket_interface.cpp index c2af55c7b..df3d0554b 100644 --- a/src/data_cell/bucket_interface.cpp +++ b/src/data_cell/bucket_interface.cpp @@ -15,7 +15,7 @@ #include "bucket_interface.h" -#include +#include #include "bucket_datacell.h" #include "inner_string_params.h" diff --git a/src/data_cell/compressed_graph_datacell_parameter.h b/src/data_cell/compressed_graph_datacell_parameter.h index 7b67cd64d..eb406ac6e 100644 --- a/src/data_cell/compressed_graph_datacell_parameter.h +++ b/src/data_cell/compressed_graph_datacell_parameter.h @@ -15,9 +15,8 @@ #pragma once -#include - #include "graph_interface_parameter.h" +#include "inner_string_params.h" namespace vsag { class CompressedGraphDatacellParameter : public GraphInterfaceParameter { diff --git a/src/data_cell/compressed_graph_datacell_test.cpp b/src/data_cell/compressed_graph_datacell_test.cpp index bb56b091f..916d66655 100644 --- a/src/data_cell/compressed_graph_datacell_test.cpp +++ b/src/data_cell/compressed_graph_datacell_test.cpp @@ -15,14 +15,14 @@ #include "compressed_graph_datacell.h" -#include +#include #include #include #include "graph_datacell_parameter.h" #include "graph_interface_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; void diff --git a/src/data_cell/extra_info_datacell_parameter.cpp b/src/data_cell/extra_info_datacell_parameter.cpp index 20fecabdf..1e7171a5c 100644 --- a/src/data_cell/extra_info_datacell_parameter.cpp +++ b/src/data_cell/extra_info_datacell_parameter.cpp @@ -15,7 +15,7 @@ #include "extra_info_datacell_parameter.h" -#include +#include #include "inner_string_params.h" diff --git a/src/data_cell/extra_info_datacell_test.cpp b/src/data_cell/extra_info_datacell_test.cpp index 11bb05ca2..81db61b31 100644 --- a/src/data_cell/extra_info_datacell_test.cpp +++ b/src/data_cell/extra_info_datacell_test.cpp @@ -20,11 +20,11 @@ #include #include -#include "default_allocator.h" #include "extra_info_interface_test.h" #include "fixtures.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "parameter_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/data_cell/extra_info_interface.cpp b/src/data_cell/extra_info_interface.cpp index b654d52e3..97632e580 100644 --- a/src/data_cell/extra_info_interface.cpp +++ b/src/data_cell/extra_info_interface.cpp @@ -15,7 +15,7 @@ #include "extra_info_interface.h" -#include +#include #include "extra_info_datacell.h" #include "inner_string_params.h" diff --git a/src/data_cell/extra_info_interface_test.cpp b/src/data_cell/extra_info_interface_test.cpp index 6f43345a6..2e891d781 100644 --- a/src/data_cell/extra_info_interface_test.cpp +++ b/src/data_cell/extra_info_interface_test.cpp @@ -19,9 +19,9 @@ #include #include -#include "default_allocator.h" #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "simd/simd.h" namespace vsag { diff --git a/src/data_cell/flatten_datacell_parameter.cpp b/src/data_cell/flatten_datacell_parameter.cpp index b0cae4b54..8c2e6c9d0 100644 --- a/src/data_cell/flatten_datacell_parameter.cpp +++ b/src/data_cell/flatten_datacell_parameter.cpp @@ -15,7 +15,7 @@ #include "flatten_datacell_parameter.h" -#include +#include #include "inner_string_params.h" diff --git a/src/data_cell/flatten_datacell_test.cpp b/src/data_cell/flatten_datacell_test.cpp index 3c8813777..01d6cb575 100644 --- a/src/data_cell/flatten_datacell_test.cpp +++ b/src/data_cell/flatten_datacell_test.cpp @@ -20,10 +20,10 @@ #include #include -#include "default_allocator.h" #include "fixtures.h" #include "flatten_interface_test.h" -#include "safe_allocator.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/data_cell/graph_datacell_parameter.cpp b/src/data_cell/graph_datacell_parameter.cpp index ec14c230b..8d2c383ec 100644 --- a/src/data_cell/graph_datacell_parameter.cpp +++ b/src/data_cell/graph_datacell_parameter.cpp @@ -15,7 +15,7 @@ #include "graph_datacell_parameter.h" -#include +#include #include "inner_string_params.h" #include "vsag/constants.h" diff --git a/src/data_cell/graph_datacell_test.cpp b/src/data_cell/graph_datacell_test.cpp index 641d88a61..9bc2418aa 100644 --- a/src/data_cell/graph_datacell_test.cpp +++ b/src/data_cell/graph_datacell_test.cpp @@ -13,14 +13,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #include #include "graph_interface_parameter.h" #include "graph_interface_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/data_cell/graph_interface_test.cpp b/src/data_cell/graph_interface_test.cpp index ffe54ac6a..4947c3838 100644 --- a/src/data_cell/graph_interface_test.cpp +++ b/src/data_cell/graph_interface_test.cpp @@ -19,9 +19,9 @@ #include #include -#include "default_allocator.h" #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/data_cell/sparse_graph_datacell_parameter.h b/src/data_cell/sparse_graph_datacell_parameter.h index cdd5fe411..8ac243a92 100644 --- a/src/data_cell/sparse_graph_datacell_parameter.h +++ b/src/data_cell/sparse_graph_datacell_parameter.h @@ -16,6 +16,7 @@ #pragma once #include "graph_interface_parameter.h" +#include "inner_string_params.h" #include "vsag/constants.h" namespace vsag { diff --git a/src/data_cell/sparse_graph_datacell_test.cpp b/src/data_cell/sparse_graph_datacell_test.cpp index c75b5641e..dd2397d57 100644 --- a/src/data_cell/sparse_graph_datacell_test.cpp +++ b/src/data_cell/sparse_graph_datacell_test.cpp @@ -15,13 +15,13 @@ #include "sparse_graph_datacell.h" -#include +#include #include #include #include "graph_interface_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "sparse_graph_datacell_parameter.h" using namespace vsag; diff --git a/src/data_cell/sparse_vector_datacell_parameter.h b/src/data_cell/sparse_vector_datacell_parameter.h index 4caf8f1ec..99d80136a 100644 --- a/src/data_cell/sparse_vector_datacell_parameter.h +++ b/src/data_cell/sparse_vector_datacell_parameter.h @@ -15,7 +15,7 @@ #pragma once -#include +#include #include "flatten_interface.h" #include "inner_string_params.h" diff --git a/src/data_cell/sparse_vector_datacell_test.cpp b/src/data_cell/sparse_vector_datacell_test.cpp index 34e36fec0..7532c1484 100644 --- a/src/data_cell/sparse_vector_datacell_test.cpp +++ b/src/data_cell/sparse_vector_datacell_test.cpp @@ -18,9 +18,9 @@ #include "data_cell/sparse_vector_datacell_parameter.h" #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "index/index_common_param.h" #include "quantization/sparse_quantization/sparse_quantizer_parameter.h" -#include "safe_allocator.h" #include "thread_pool.h" namespace vsag { diff --git a/src/dataset_impl.h b/src/dataset_impl.h index e7d8363ff..b3275728b 100644 --- a/src/dataset_impl.h +++ b/src/dataset_impl.h @@ -15,8 +15,6 @@ #pragma once -#include - #include #include #include @@ -46,41 +44,41 @@ class DatasetImpl : public Dataset { return; } - if (allocator_) { - allocator_->Deallocate((void*)this->GetIds()); - allocator_->Deallocate((void*)this->GetDistances()); - allocator_->Deallocate((void*)this->GetInt8Vectors()); - allocator_->Deallocate((void*)this->GetFloat32Vectors()); - allocator_->Deallocate((void*)this->GetPaths()); - allocator_->Deallocate((void*)this->GetExtraInfos()); - - if (this->GetSparseVectors()) { - for (int i = 0; i < this->GetNumElements(); i++) { - allocator_->Deallocate((void*)this->GetSparseVectors()[i].ids_); - allocator_->Deallocate((void*)this->GetSparseVectors()[i].vals_); + if (allocator_ != nullptr) { + allocator_->Deallocate((void*)(DatasetImpl::GetIds())); + allocator_->Deallocate((void*)(DatasetImpl::GetDistances())); + allocator_->Deallocate((void*)(DatasetImpl::GetInt8Vectors())); + allocator_->Deallocate((void*)(DatasetImpl::GetFloat32Vectors())); + allocator_->Deallocate((void*)(DatasetImpl::GetPaths())); + allocator_->Deallocate((void*)(DatasetImpl::GetExtraInfos())); + + if (DatasetImpl::GetSparseVectors() != nullptr) { + for (int i = 0; i < DatasetImpl::GetNumElements(); i++) { + allocator_->Deallocate((void*)DatasetImpl::GetSparseVectors()[i].ids_); + allocator_->Deallocate((void*)DatasetImpl::GetSparseVectors()[i].vals_); } - allocator_->Deallocate((void*)this->GetSparseVectors()); + allocator_->Deallocate((void*)DatasetImpl::GetSparseVectors()); } } else { - delete[] this->GetIds(); - delete[] this->GetDistances(); - delete[] this->GetInt8Vectors(); - delete[] this->GetFloat32Vectors(); - delete[] this->GetPaths(); - delete[] this->GetExtraInfos(); - - if (this->GetSparseVectors()) { - for (int i = 0; i < this->GetNumElements(); i++) { - delete[] this->GetSparseVectors()[i].ids_; - delete[] this->GetSparseVectors()[i].vals_; + delete[] DatasetImpl::GetIds(); + delete[] DatasetImpl::GetDistances(); + delete[] DatasetImpl::GetInt8Vectors(); + delete[] DatasetImpl::GetFloat32Vectors(); + delete[] DatasetImpl::GetPaths(); + delete[] DatasetImpl::GetExtraInfos(); + + if (DatasetImpl::GetSparseVectors() != nullptr) { + for (int i = 0; i < DatasetImpl::GetNumElements(); i++) { + delete[] DatasetImpl::GetSparseVectors()[i].ids_; + delete[] DatasetImpl::GetSparseVectors()[i].vals_; } - delete[] this->GetSparseVectors(); + delete[] DatasetImpl::GetSparseVectors(); } } - if (this->GetAttributeSets()) { - auto* attrsets = this->GetAttributeSets(); - for (int i = 0; i < this->GetNumElements(); ++i) { + if (DatasetImpl::GetAttributeSets() != nullptr) { + const auto* attrsets = DatasetImpl::GetAttributeSets(); + for (int i = 0; i < DatasetImpl::GetNumElements(); ++i) { for (auto* attr : attrsets[i].attrs_) { delete attr; } @@ -273,7 +271,7 @@ class DatasetImpl : public Dataset { MakeEmptyDataset(); private: - bool owner_ = true; + bool owner_{true}; std::unordered_map data_; Allocator* allocator_ = nullptr; }; diff --git a/src/dataset_impl_test.cpp b/src/dataset_impl_test.cpp index a869b4055..996ffea57 100644 --- a/src/dataset_impl_test.cpp +++ b/src/dataset_impl_test.cpp @@ -17,8 +17,8 @@ #include -#include "default_allocator.h" #include "fixtures.h" +#include "impl/allocator/default_allocator.h" #include "vsag/dataset.h" TEST_CASE("Dataset Implement Test", "[ut][dataset]") { diff --git a/src/default_thread_pool.cpp b/src/default_thread_pool.cpp index bca4b9f7b..7f15c1eb4 100644 --- a/src/default_thread_pool.cpp +++ b/src/default_thread_pool.cpp @@ -15,6 +15,8 @@ #include "default_thread_pool.h" +#include "vsag/options.h" + namespace vsag { DefaultThreadPool::DefaultThreadPool(std::size_t threads) { diff --git a/src/default_thread_pool.h b/src/default_thread_pool.h index 1c05e6ce3..0656ef7b9 100644 --- a/src/default_thread_pool.h +++ b/src/default_thread_pool.h @@ -20,7 +20,6 @@ #include #include -#include "vsag/options.h" #include "vsag/thread_pool.h" namespace vsag { diff --git a/src/engine.cpp b/src/engine.cpp index a5bb96718..89a7750a6 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -15,7 +15,7 @@ #include "vsag/engine.h" -#include +#include #include diff --git a/src/impl/CMakeLists.txt b/src/impl/CMakeLists.txt new file mode 100644 index 000000000..b7b477138 --- /dev/null +++ b/src/impl/CMakeLists.txt @@ -0,0 +1,20 @@ + +add_subdirectory (allocator) +add_subdirectory (filter) +add_subdirectory (heap) +add_subdirectory (transform) +add_subdirectory (bitset) + +file (GLOB IMPL_SRCS "*.cpp") +list (FILTER IMPL_SRCS EXCLUDE REGEX "_test.cpp") +list (FILTER IMPL_SRCS EXCLUDE REGEX kmeans_cluster.cpp) + +add_library (kmeans OBJECT kmeans_cluster.cpp) +target_link_libraries (kmeans PRIVATE fmt::fmt) +maybe_add_dependencies (kmeans spdlog openblas mkl) + +add_library (impl OBJECT ${IMPL_SRCS}) +target_link_libraries (impl PUBLIC transform allocator heap bitset filter kmeans fmt::fmt coverage_config) +maybe_add_dependencies (impl spdlog) + +set (IMPL_LIBS transform allocator heap bitset filter impl kmeans PARENT_SCOPE) diff --git a/src/impl/allocator/CMakeLists.txt b/src/impl/allocator/CMakeLists.txt new file mode 100644 index 000000000..cca76e6c1 --- /dev/null +++ b/src/impl/allocator/CMakeLists.txt @@ -0,0 +1,11 @@ + +set (ALLOCATOR_SRC + default_allocator.cpp + default_allocator.h + safe_allocator.h + allocator_wrapper.h +) + +add_library (allocator OBJECT ${ALLOCATOR_SRC}) +target_link_libraries (allocator PRIVATE fmt::fmt coverage_config) +maybe_add_dependencies (allocator spdlog) diff --git a/src/allocator_wrapper.h b/src/impl/allocator/allocator_wrapper.h similarity index 100% rename from src/allocator_wrapper.h rename to src/impl/allocator/allocator_wrapper.h diff --git a/src/default_allocator.cpp b/src/impl/allocator/default_allocator.cpp similarity index 86% rename from src/default_allocator.cpp rename to src/impl/allocator/default_allocator.cpp index 964f481db..b0791e00e 100644 --- a/src/default_allocator.cpp +++ b/src/impl/allocator/default_allocator.cpp @@ -17,7 +17,20 @@ #include +#include "../../logger.h" + namespace vsag { +#ifndef NDEBUG +DefaultAllocator::~DefaultAllocator() { + if (not allocated_ptrs_.empty()) { + logger::error(fmt::format("There is a memory leak in {}.", DefaultAllocator::Name())); + abort(); + } +} +#else +DefaultAllocator::~DefaultAllocator() = default; +#endif + void* DefaultAllocator::Allocate(size_t size) { auto* ptr = malloc(size); diff --git a/src/default_allocator.h b/src/impl/allocator/default_allocator.h similarity index 81% rename from src/default_allocator.h rename to src/impl/allocator/default_allocator.h index bc5177ee1..4cffca62a 100644 --- a/src/default_allocator.h +++ b/src/impl/allocator/default_allocator.h @@ -15,11 +15,9 @@ #pragma once -#include +#include #include -#include -#include "logger.h" #include "vsag/allocator.h" namespace vsag { @@ -27,14 +25,7 @@ namespace vsag { class DefaultAllocator : public Allocator { public: DefaultAllocator() = default; - ~DefaultAllocator() override { -#ifndef NDEBUG - if (not allocated_ptrs_.empty()) { - logger::error(fmt::format("There is a memory leak in {}.", Name())); - abort(); - } -#endif - } + ~DefaultAllocator() override; DefaultAllocator(const DefaultAllocator&) = delete; DefaultAllocator(DefaultAllocator&&) = delete; diff --git a/src/default_allocator_test.cpp b/src/impl/allocator/default_allocator_test.cpp similarity index 100% rename from src/default_allocator_test.cpp rename to src/impl/allocator/default_allocator_test.cpp diff --git a/src/safe_allocator.h b/src/impl/allocator/safe_allocator.h similarity index 90% rename from src/safe_allocator.h rename to src/impl/allocator/safe_allocator.h index f3268a67b..1977ed7bf 100644 --- a/src/safe_allocator.h +++ b/src/impl/allocator/safe_allocator.h @@ -31,7 +31,9 @@ class SafeAllocator : public Allocator { } public: - explicit SafeAllocator(Allocator* raw_allocator, bool owned = false) + explicit SafeAllocator(Allocator* raw_allocator) : SafeAllocator(raw_allocator, false){}; + + explicit SafeAllocator(Allocator* raw_allocator, bool owned) : raw_allocator_(raw_allocator), owned_(owned) { } @@ -79,7 +81,7 @@ class SafeAllocator : public Allocator { } private: - Allocator* const raw_allocator_ = nullptr; + Allocator* const raw_allocator_{nullptr}; std::shared_ptr const raw_allocator_shared_; diff --git a/src/safe_allocator_test.cpp b/src/impl/allocator/safe_allocator_test.cpp similarity index 100% rename from src/safe_allocator_test.cpp rename to src/impl/allocator/safe_allocator_test.cpp diff --git a/src/impl/basic_optimizer.cpp b/src/impl/basic_optimizer.cpp index c4e194214..4ea72cab8 100644 --- a/src/impl/basic_optimizer.cpp +++ b/src/impl/basic_optimizer.cpp @@ -15,6 +15,8 @@ #include "basic_optimizer.h" +#include "basic_searcher.h" + namespace vsag { template diff --git a/src/impl/basic_optimizer.h b/src/impl/basic_optimizer.h index d1781a6ae..5e340d173 100644 --- a/src/impl/basic_optimizer.h +++ b/src/impl/basic_optimizer.h @@ -17,9 +17,7 @@ #include -#include "basic_searcher.h" #include "common.h" -#include "data_cell/flatten_datacell.h" #include "index/index_common_param.h" #include "runtime_parameter.h" #include "typing.h" @@ -30,8 +28,9 @@ template class Optimizer { public: Optimizer(const IndexCommonParam& common_param) - : parameters_(common_param.allocator_.get()), best_params_(common_param.allocator_.get()) { - allocator_ = common_param.allocator_.get(); + : parameters_(common_param.allocator_.get()), + best_params_(common_param.allocator_.get()), + allocator_(common_param.allocator_.get()) { std::random_device rd; gen_.seed(rd()); } @@ -41,11 +40,11 @@ class Optimizer { void RegisterParameter(const RuntimeParameter& runtime_parameter) { - parameters_.push_back(runtime_parameter); + parameters_.emplace_back(runtime_parameter); } private: - Allocator* allocator_{nullptr}; + Allocator* const allocator_{nullptr}; std::mt19937 gen_; diff --git a/src/impl/basic_searcher.cpp b/src/impl/basic_searcher.cpp index 0a1dd98fa..ac1235936 100644 --- a/src/impl/basic_searcher.cpp +++ b/src/impl/basic_searcher.cpp @@ -17,8 +17,9 @@ #include +#include "impl/heap/standard_heap.h" #include "utils/linear_congruential_generator.h" -#include "utils/standard_heap.h" + namespace vsag { BasicSearcher::BasicSearcher(const IndexCommonParam& common_param, MutexArrayPtr mutex_array) diff --git a/src/impl/basic_searcher.h b/src/impl/basic_searcher.h index c3a868871..1b86d8dd3 100644 --- a/src/impl/basic_searcher.h +++ b/src/impl/basic_searcher.h @@ -15,25 +15,18 @@ #pragma once -#include - -#include "algorithm/hnswlib/algorithm_interface.h" #include "attr/executor/executor.h" -#include "basic_optimizer.h" -#include "common.h" #include "data_cell/flatten_interface.h" #include "data_cell/graph_interface.h" +#include "impl/heap/distance_heap.h" #include "index/index_common_param.h" #include "index/iterator_filter.h" #include "lock_strategy.h" -#include "runtime_parameter.h" -#include "utils/distance_heap.h" -#include "utils/linear_congruential_generator.h" #include "utils/visited_list.h" namespace vsag { -static const uint32_t OPTIMIZE_SEARCHER_SAMPLE_SIZE = 10000; +static constexpr uint32_t OPTIMIZE_SEARCHER_SAMPLE_SIZE = 10000; enum InnerSearchMode { KNN_SEARCH = 1, RANGE_SEARCH = 2 }; diff --git a/src/impl/basic_searcher_test.cpp b/src/impl/basic_searcher_test.cpp index 6e3284cb8..a4dad39fc 100644 --- a/src/impl/basic_searcher_test.cpp +++ b/src/impl/basic_searcher_test.cpp @@ -23,10 +23,10 @@ #include "catch2/catch_template_test_macros.hpp" #include "data_cell/flatten_datacell.h" #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "io/memory_io.h" #include "quantization/fp32_quantizer.h" #include "quantization/scalar_quantization/sq4_uniform_quantizer.h" -#include "safe_allocator.h" #include "test_logger.h" #include "utils/visited_list.h" diff --git a/src/impl/bitset/CMakeLists.txt b/src/impl/bitset/CMakeLists.txt new file mode 100644 index 000000000..367d05a17 --- /dev/null +++ b/src/impl/bitset/CMakeLists.txt @@ -0,0 +1,14 @@ + +set (BITSET_SRC + bitset.cpp + computable_bitset.cpp + computable_bitset.h + fast_bitset.cpp + fast_bitset.h + sparse_bitset.cpp + sparse_bitset.h +) + +add_library (bitset OBJECT ${BITSET_SRC}) +target_link_libraries (bitset PRIVATE roaring coverage_config) +maybe_add_dependencies (bitset spdlog) diff --git a/src/impl/bitset/fast_bitset.h b/src/impl/bitset/fast_bitset.h index 02ab35b79..f35b476c7 100644 --- a/src/impl/bitset/fast_bitset.h +++ b/src/impl/bitset/fast_bitset.h @@ -18,8 +18,8 @@ #include #include "computable_bitset.h" -#include "safe_allocator.h" #include "typing.h" +#include "vsag/allocator.h" namespace vsag { class FastBitset : public ComputableBitset { diff --git a/src/impl/bitset/fast_bitset_test.cpp b/src/impl/bitset/fast_bitset_test.cpp index a0db018b3..fc12a6b50 100644 --- a/src/impl/bitset/fast_bitset_test.cpp +++ b/src/impl/bitset/fast_bitset_test.cpp @@ -19,6 +19,7 @@ #include #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "utils/util_functions.h" using namespace vsag; diff --git a/src/impl/bitset/sparse_bitset_test.cpp b/src/impl/bitset/sparse_bitset_test.cpp index f1cdccaa8..4e16ece42 100644 --- a/src/impl/bitset/sparse_bitset_test.cpp +++ b/src/impl/bitset/sparse_bitset_test.cpp @@ -21,7 +21,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace roaring; using namespace vsag; diff --git a/src/impl/conjugate_graph.h b/src/impl/conjugate_graph.h index 1859cd72e..45cdfba82 100644 --- a/src/impl/conjugate_graph.h +++ b/src/impl/conjugate_graph.h @@ -25,12 +25,12 @@ namespace vsag { -static const int64_t LOOK_AT_K = 20; -static const int64_t MAXIMUM_DEGREE = 128; +static constexpr int64_t LOOK_AT_K = 20; +static constexpr int64_t MAXIMUM_DEGREE = 128; class ConjugateGraph { public: - ConjugateGraph(Allocator* allocator); + explicit ConjugateGraph(Allocator* allocator); tl::expected AddNeighbor(int64_t from_tag_id, int64_t to_tag_id); diff --git a/src/impl/conjugate_graph_test.cpp b/src/impl/conjugate_graph_test.cpp index 2c866784f..14e4bd05e 100644 --- a/src/impl/conjugate_graph_test.cpp +++ b/src/impl/conjugate_graph_test.cpp @@ -21,7 +21,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "storage/stream_reader.h" TEST_CASE("ConjugateGraph Build, Add and Memory Usage", "[ut][ConjugateGraph]") { diff --git a/src/impl/elias_fano_encoder.cpp b/src/impl/elias_fano_encoder.cpp index 1058c440a..70cdb6292 100644 --- a/src/impl/elias_fano_encoder.cpp +++ b/src/impl/elias_fano_encoder.cpp @@ -15,7 +15,6 @@ #include "elias_fano_encoder.h" -#include #include #include diff --git a/src/impl/elias_fano_encoder.h b/src/impl/elias_fano_encoder.h index ac84166e9..13b828361 100644 --- a/src/impl/elias_fano_encoder.h +++ b/src/impl/elias_fano_encoder.h @@ -75,7 +75,8 @@ class EliasFanoEncoder { [[nodiscard]] InnerIdType get_low_bits(size_t index) const; - Allocator* allocator_; +private: + Allocator* const allocator_; }; } // namespace vsag diff --git a/src/impl/elias_fano_encoder_test.cpp b/src/impl/elias_fano_encoder_test.cpp index 17e238dad..fbb56747f 100644 --- a/src/impl/elias_fano_encoder_test.cpp +++ b/src/impl/elias_fano_encoder_test.cpp @@ -19,7 +19,7 @@ #include #include -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" namespace vsag { diff --git a/src/impl/filter/CMakeLists.txt b/src/impl/filter/CMakeLists.txt index 046a5d271..10b2b328b 100644 --- a/src/impl/filter/CMakeLists.txt +++ b/src/impl/filter/CMakeLists.txt @@ -9,5 +9,5 @@ set (FILTER_SRC white_list_filter.cpp ) add_library (filter OBJECT ${FILTER_SRC}) -target_link_libraries (filter PUBLIC fmt::fmt-header-only) +target_link_libraries (filter PRIVATE fmt::fmt coverage_config) maybe_add_dependencies (filter spdlog) diff --git a/src/impl/filter/extrainfo_wrapper_filter.h b/src/impl/filter/extrainfo_wrapper_filter.h index c523c9610..cf59e8252 100644 --- a/src/impl/filter/extrainfo_wrapper_filter.h +++ b/src/impl/filter/extrainfo_wrapper_filter.h @@ -17,9 +17,7 @@ #include -#include "common.h" #include "data_cell/extra_info_interface.h" -#include "label_table.h" #include "typing.h" #include "vsag/bitset.h" #include "vsag/filter.h" diff --git a/src/impl/heap/CMakeLists.txt b/src/impl/heap/CMakeLists.txt new file mode 100644 index 000000000..ede6e1ad8 --- /dev/null +++ b/src/impl/heap/CMakeLists.txt @@ -0,0 +1,11 @@ + +set (HEAP_SRC + distance_heap.cpp + memmove_heap.cpp + standard_heap.cpp +) + +add_library (heap OBJECT ${HEAP_SRC}) +target_link_libraries (heap PUBLIC coverage_config) + +maybe_add_dependencies (heap spdlog fmt::fmt) diff --git a/src/utils/distance_heap.cpp b/src/impl/heap/distance_heap.cpp similarity index 83% rename from src/utils/distance_heap.cpp rename to src/impl/heap/distance_heap.cpp index eb57e115b..6e5ab1799 100644 --- a/src/utils/distance_heap.cpp +++ b/src/impl/heap/distance_heap.cpp @@ -38,4 +38,13 @@ DistanceHeap::MakeInstanceBySize(Allocator* allocator, int64_t max_ template DistHeapPtr DistanceHeap::MakeInstanceBySize(Allocator* allocator, int64_t max_size); +DistanceHeap::DistanceHeap(Allocator* allocator) : DistanceHeap(allocator, -1){}; + +DistanceHeap::DistanceHeap(Allocator* allocator, int64_t max_size) + : allocator_(allocator), max_size_(max_size){}; + +void +DistanceHeap::Push(const DistanceRecord& record) { + return this->Push(record.first, record.second); +} } // namespace vsag diff --git a/src/utils/distance_heap.h b/src/impl/heap/distance_heap.h similarity index 89% rename from src/utils/distance_heap.h rename to src/impl/heap/distance_heap.h index e7ba062ae..b0a45446c 100644 --- a/src/utils/distance_heap.h +++ b/src/impl/heap/distance_heap.h @@ -55,13 +55,14 @@ class DistanceHeap { MakeInstanceBySize(Allocator* allocator, int64_t max_size); public: - DistanceHeap(Allocator* allocator, int64_t max_size = -1) - : allocator_(allocator), max_size_(max_size){}; + explicit DistanceHeap(Allocator* allocator); + + explicit DistanceHeap(Allocator* allocator, int64_t max_size); + + virtual ~DistanceHeap() = default; virtual void - Push(const DistanceRecord& record) { - return this->Push(record.first, record.second); - } + Push(const DistanceRecord& record); virtual void Push(float dist, InnerIdType id) = 0; diff --git a/src/utils/distance_heap_test.cpp b/src/impl/heap/distance_heap_test.cpp similarity index 98% rename from src/utils/distance_heap_test.cpp rename to src/impl/heap/distance_heap_test.cpp index 1d6d1f06c..6832a974a 100644 --- a/src/utils/distance_heap_test.cpp +++ b/src/impl/heap/distance_heap_test.cpp @@ -18,8 +18,8 @@ #include #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "memmove_heap.h" -#include "safe_allocator.h" #include "standard_heap.h" using namespace vsag; diff --git a/src/utils/memmove_heap.h b/src/impl/heap/memmove_heap.cpp similarity index 78% rename from src/utils/memmove_heap.h rename to src/impl/heap/memmove_heap.cpp index 93ed84e6e..b59d57cd4 100644 --- a/src/utils/memmove_heap.h +++ b/src/impl/heap/memmove_heap.cpp @@ -13,48 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#pragma once - -#include - -#include "distance_heap.h" +#include "memmove_heap.h" namespace vsag { -template -class MemmoveHeap : public DistanceHeap { -public: -public: - MemmoveHeap(Allocator* allocator, int64_t max_size); - - void - Push(float dist, InnerIdType id) override; - - [[nodiscard]] const DistanceRecord& - Top() const override { - return this->ordered_buffer_[cur_size_ - 1]; - } - - void - Pop() override { - cur_size_--; - } - - [[nodiscard]] uint64_t - Size() const override { - return this->cur_size_; - } - - [[nodiscard]] bool - Empty() const override { - return this->cur_size_ == 0; - } - -private: - Vector ordered_buffer_; - - int64_t cur_size_{0}; -}; - template MemmoveHeap::MemmoveHeap(Allocator* allocator, int64_t max_size) : DistanceHeap(allocator, max_size), ordered_buffer_(allocator) { @@ -100,4 +61,10 @@ MemmoveHeap::Push(float dist, InnerIdType id) { cur_size_++; } } + +template class MemmoveHeap; +template class MemmoveHeap; +template class MemmoveHeap; +template class MemmoveHeap; + } // namespace vsag diff --git a/src/impl/heap/memmove_heap.h b/src/impl/heap/memmove_heap.h new file mode 100644 index 000000000..b405de1f5 --- /dev/null +++ b/src/impl/heap/memmove_heap.h @@ -0,0 +1,57 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "distance_heap.h" + +namespace vsag { +template +class MemmoveHeap : public DistanceHeap { +public: + explicit MemmoveHeap(Allocator* allocator, int64_t max_size); + + ~MemmoveHeap() override = default; + + void + Push(float dist, InnerIdType id) override; + + [[nodiscard]] const DistanceRecord& + Top() const override { + return this->ordered_buffer_[cur_size_ - 1]; + } + + void + Pop() override { + cur_size_--; + } + + [[nodiscard]] uint64_t + Size() const override { + return this->cur_size_; + } + + [[nodiscard]] bool + Empty() const override { + return this->cur_size_ == 0; + } + +private: + Vector ordered_buffer_; + + int64_t cur_size_{0}; +}; + +} // namespace vsag diff --git a/src/impl/heap/standard_heap.cpp b/src/impl/heap/standard_heap.cpp new file mode 100644 index 000000000..028f01f0e --- /dev/null +++ b/src/impl/heap/standard_heap.cpp @@ -0,0 +1,44 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "standard_heap.h" + +namespace vsag { +template +StandardHeap::StandardHeap(Allocator* allocator, int64_t max_size) + : DistanceHeap(allocator, max_size), queue_(allocator) { +} + +template +void +StandardHeap::Push(float dist, InnerIdType id) { + if constexpr (fixed_size) { + if (this->queue_.size() < max_size_ or (dist < this->queue_.top().first) == max_heap) { + queue_.emplace(dist, id); + if (this->queue_.size() > this->max_size_) { + this->queue_.pop(); + } + } + } else { + queue_.emplace(dist, id); + } +} + +template class StandardHeap; +template class StandardHeap; +template class StandardHeap; +template class StandardHeap; + +} // namespace vsag \ No newline at end of file diff --git a/src/utils/standard_heap.h b/src/impl/heap/standard_heap.h similarity index 68% rename from src/utils/standard_heap.h rename to src/impl/heap/standard_heap.h index 9be43daaf..5ec695505 100644 --- a/src/utils/standard_heap.h +++ b/src/impl/heap/standard_heap.h @@ -30,7 +30,7 @@ class StandardHeap : public DistanceHeap { std::priority_queue>, CompareMin>; public: - StandardHeap(Allocator* allocator, int64_t max_size); + explicit StandardHeap(Allocator* allocator, int64_t max_size); void Push(float dist, InnerIdType id) override; @@ -58,25 +58,4 @@ class StandardHeap : public DistanceHeap { private: typename std::conditional::type queue_; }; - -template -StandardHeap::StandardHeap(Allocator* allocator, int64_t max_size) - : DistanceHeap(allocator, max_size), queue_(allocator) { -} - -template -void -StandardHeap::Push(float dist, InnerIdType id) { - if constexpr (fixed_size) { - if (this->queue_.size() < max_size_ or (dist < this->queue_.top().first) == max_heap) { - queue_.emplace(dist, id); - if (this->queue_.size() > this->max_size_) { - this->queue_.pop(); - } - } - } else { - queue_.emplace(dist, id); - } -} - } // namespace vsag diff --git a/src/impl/kmeans_cluster.cpp b/src/impl/kmeans_cluster.cpp index 246b047b8..a75b6135e 100644 --- a/src/impl/kmeans_cluster.cpp +++ b/src/impl/kmeans_cluster.cpp @@ -18,15 +18,14 @@ #include #include -#include #include #include "algorithm/inner_index_interface.h" #include "byte_buffer.h" -#include "logger.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "simd/fp32_simd.h" #include "utils/util_functions.h" + namespace vsag { KMeansCluster::KMeansCluster(int32_t dim, Allocator* allocator, SafeThreadPoolPtr thread_pool) : dim_(dim), allocator_(allocator), thread_pool_(std::move(thread_pool)) { diff --git a/src/impl/kmeans_cluster_test.cpp b/src/impl/kmeans_cluster_test.cpp index 137180242..67806d4c8 100644 --- a/src/impl/kmeans_cluster_test.cpp +++ b/src/impl/kmeans_cluster_test.cpp @@ -18,7 +18,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" std::vector GenerateDataset(int32_t k, int32_t dim, uint64_t count, std::vector& labels) { diff --git a/src/impl/odescent_graph_builder.cpp b/src/impl/odescent_graph_builder.cpp index bb456065a..727f37e42 100644 --- a/src/impl/odescent_graph_builder.cpp +++ b/src/impl/odescent_graph_builder.cpp @@ -18,6 +18,7 @@ #include #include +#include "simd/simd.h" #include "utils/linear_congruential_generator.h" namespace vsag { diff --git a/src/impl/odescent_graph_builder.h b/src/impl/odescent_graph_builder.h index 9ddae9e5c..481c2abf2 100644 --- a/src/impl/odescent_graph_builder.h +++ b/src/impl/odescent_graph_builder.h @@ -24,10 +24,9 @@ #include "data_cell/flatten_datacell.h" #include "data_cell/graph_datacell.h" #include "data_cell/sparse_graph_datacell.h" +#include "impl/allocator/safe_allocator.h" #include "impl/odescent_graph_parameter.h" #include "logger.h" -#include "safe_allocator.h" -#include "simd/simd.h" #include "utils.h" #include "vsag/dataset.h" @@ -71,7 +70,7 @@ struct Node { struct Linklist { Vector neighbors; float greast_neighbor_distance; - Linklist(Allocator*& allocator) + Linklist(Allocator* allocator) : neighbors(allocator), greast_neighbor_distance(std::numeric_limits::max()) { } }; @@ -157,7 +156,7 @@ class ODescent { const InnerIdType* valid_ids_{nullptr}; bool pruning_{true}; - Allocator* allocator_; + Allocator* const allocator_; const ODescentParameterPtr odescent_param_; diff --git a/src/impl/odescent_graph_builder_test.cpp b/src/impl/odescent_graph_builder_test.cpp index bd8bb4e23..d076fa94b 100644 --- a/src/impl/odescent_graph_builder_test.cpp +++ b/src/impl/odescent_graph_builder_test.cpp @@ -23,9 +23,9 @@ #include "data_cell/flatten_interface.h" #include "data_cell/graph_interface.h" #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "io/memory_io_parameter.h" #include "quantization/fp32_quantizer_parameter.h" -#include "safe_allocator.h" size_t calculate_overlap(const vsag::Vector& vec1, const vsag::Vector& vec2, int K) { diff --git a/src/impl/odescent_graph_parameter.cpp b/src/impl/odescent_graph_parameter.cpp index f487dfd5f..26ed82e49 100644 --- a/src/impl/odescent_graph_parameter.cpp +++ b/src/impl/odescent_graph_parameter.cpp @@ -15,7 +15,7 @@ #include "odescent_graph_parameter.h" -#include +#include #include "vsag/constants.h" diff --git a/src/impl/pruning_strategy.cpp b/src/impl/pruning_strategy.cpp index 2ab77e5d9..b0319a74d 100644 --- a/src/impl/pruning_strategy.cpp +++ b/src/impl/pruning_strategy.cpp @@ -15,7 +15,7 @@ #include "pruning_strategy.h" -#include "utils/standard_heap.h" +#include "impl/heap/standard_heap.h" namespace vsag { diff --git a/src/impl/pruning_strategy.h b/src/impl/pruning_strategy.h index 94f552e3d..5b0014774 100644 --- a/src/impl/pruning_strategy.h +++ b/src/impl/pruning_strategy.h @@ -15,13 +15,11 @@ #pragma once -#include - #include "data_cell/flatten_datacell.h" #include "data_cell/graph_interface.h" +#include "impl/heap/distance_heap.h" #include "lock_strategy.h" #include "typing.h" -#include "utils/distance_heap.h" #include "vsag/allocator.h" namespace vsag { diff --git a/src/impl/reorder.h b/src/impl/reorder.h index 2de03bb30..62fd84985 100644 --- a/src/impl/reorder.h +++ b/src/impl/reorder.h @@ -18,7 +18,8 @@ #include #include "data_cell/flatten_interface.h" -#include "utils/distance_heap.h" +#include "heap/distance_heap.h" + namespace vsag { class Reorder { public: diff --git a/src/impl/transform/CMakeLists.txt b/src/impl/transform/CMakeLists.txt index 367567929..47db054de 100644 --- a/src/impl/transform/CMakeLists.txt +++ b/src/impl/transform/CMakeLists.txt @@ -1,10 +1,14 @@ set (TRANSFORM_SRC vector_transformer.cpp + vector_transformer.h random_orthogonal_transformer.cpp + random_orthogonal_transformer.h fht_kac_rotate_transformer.cpp + fht_kac_rotate_transformer.h pca_transformer.cpp + pca_transformer.h ) add_library (transform OBJECT ${TRANSFORM_SRC}) -target_link_libraries (transform PUBLIC fmt::fmt-header-only) +target_link_libraries (transform PUBLIC fmt::fmt coverage_config) maybe_add_dependencies (transform spdlog openblas mkl) diff --git a/src/impl/transform/fht_kac_rotate_transformer.h b/src/impl/transform/fht_kac_rotate_transformer.h index 018777b40..a50fe9252 100644 --- a/src/impl/transform/fht_kac_rotate_transformer.h +++ b/src/impl/transform/fht_kac_rotate_transformer.h @@ -13,8 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - +#include #include #include "vector_transformer.h" diff --git a/src/impl/transform/fht_kac_rotate_transformer_test.cpp b/src/impl/transform/fht_kac_rotate_transformer_test.cpp index 7dbe205e4..09022283d 100644 --- a/src/impl/transform/fht_kac_rotate_transformer_test.cpp +++ b/src/impl/transform/fht_kac_rotate_transformer_test.cpp @@ -19,7 +19,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/impl/transform/pca_transformer.cpp b/src/impl/transform/pca_transformer.cpp index 5ce02b9a1..565824029 100644 --- a/src/impl/transform/pca_transformer.cpp +++ b/src/impl/transform/pca_transformer.cpp @@ -15,9 +15,12 @@ #include "pca_transformer.h" -#include +#include +#include +#include + +#include -#include "../../logger.h" #include "vsag_exception.h" namespace vsag { diff --git a/src/impl/transform/pca_transformer.h b/src/impl/transform/pca_transformer.h index 92be6aab4..90590d778 100644 --- a/src/impl/transform/pca_transformer.h +++ b/src/impl/transform/pca_transformer.h @@ -15,11 +15,6 @@ #pragma once -#include -#include - -#include - #include "vector_transformer.h" namespace vsag { diff --git a/src/impl/transform/pca_transformer_test.cpp b/src/impl/transform/pca_transformer_test.cpp index 8752a5d84..81a8a6fe2 100644 --- a/src/impl/transform/pca_transformer_test.cpp +++ b/src/impl/transform/pca_transformer_test.cpp @@ -18,7 +18,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/impl/transform/random_orthogonal_transformer.cpp b/src/impl/transform/random_orthogonal_transformer.cpp index f4cd21987..a49f780a3 100644 --- a/src/impl/transform/random_orthogonal_transformer.cpp +++ b/src/impl/transform/random_orthogonal_transformer.cpp @@ -16,7 +16,7 @@ #include "random_orthogonal_transformer.h" #include -#include +#include #include #include diff --git a/src/impl/transform/random_orthogonal_transformer_test.cpp b/src/impl/transform/random_orthogonal_transformer_test.cpp index 3eac9b5be..fdd83e404 100644 --- a/src/impl/transform/random_orthogonal_transformer_test.cpp +++ b/src/impl/transform/random_orthogonal_transformer_test.cpp @@ -20,7 +20,7 @@ #include #include "fixtures.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/index/diskann_zparameters.cpp b/src/index/diskann_zparameters.cpp index a6229fbc6..3853d6757 100644 --- a/src/index/diskann_zparameters.cpp +++ b/src/index/diskann_zparameters.cpp @@ -15,7 +15,7 @@ #include "diskann_zparameters.h" -#include +#include #include "common.h" #include "index_common_param.h" diff --git a/src/index/hnsw.cpp b/src/index/hnsw.cpp index 767dfd240..9a7ee3201 100644 --- a/src/index/hnsw.cpp +++ b/src/index/hnsw.cpp @@ -15,7 +15,7 @@ #include "hnsw.h" -#include +#include #include #include @@ -29,11 +29,11 @@ #include "data_cell/flatten_datacell.h" #include "data_cell/graph_datacell_parameter.h" #include "empty_index_binary_set.h" +#include "impl/allocator/safe_allocator.h" #include "impl/odescent_graph_builder.h" #include "index/hnsw_zparameters.h" #include "io/memory_block_io_parameter.h" #include "quantization/fp32_quantizer_parameter.h" -#include "safe_allocator.h" #include "storage/serialization.h" #include "storage/stream_writer.h" #include "utils/slow_task_timer.h" @@ -331,7 +331,7 @@ HNSW::knn_search(const DatasetPtr& query, results.pop(); } auto [dataset_results, dists, ids] = - CreateFastDataset(static_cast(results.size()), search_allocator); + create_fast_dataset(static_cast(results.size()), search_allocator); for (auto j = static_cast(results.size() - 1); j >= 0; --j) { dists[j] = results.top().first; @@ -445,7 +445,7 @@ HNSW::range_search(const DatasetPtr& query, if (limited_size >= 1) { target_size = std::min(limited_size, target_size); } - auto [dataset_results, dists, ids] = CreateFastDataset(target_size, allocator_.get()); + auto [dataset_results, dists, ids] = create_fast_dataset(target_size, allocator_.get()); for (auto j = static_cast(results.size() - 1); j >= 0; --j) { if (j < target_size) { diff --git a/src/index/hnsw.h b/src/index/hnsw.h index feb4fb523..f3c7405da 100644 --- a/src/index/hnsw.h +++ b/src/index/hnsw.h @@ -32,13 +32,12 @@ #include "data_cell/graph_interface.h" #include "data_type.h" #include "hnsw_zparameters.h" +#include "impl/allocator/safe_allocator.h" #include "impl/conjugate_graph.h" #include "impl/filter/filter_headers.h" #include "index_common_param.h" #include "index_feature_list.h" -#include "index_impl.h" #include "logger.h" -#include "safe_allocator.h" #include "typing.h" #include "utils/window_result_queue.h" #include "vsag/binaryset.h" diff --git a/src/index/hnsw_zparameters.cpp b/src/index/hnsw_zparameters.cpp index 467663c6f..6096b895a 100644 --- a/src/index/hnsw_zparameters.cpp +++ b/src/index/hnsw_zparameters.cpp @@ -15,7 +15,7 @@ #include "hnsw_zparameters.h" -#include +#include #include diff --git a/src/index/hnsw_zparameters.h b/src/index/hnsw_zparameters.h index 948fccf47..56b199e7d 100644 --- a/src/index/hnsw_zparameters.h +++ b/src/index/hnsw_zparameters.h @@ -18,8 +18,8 @@ #include #include -#include "../algorithm/hnswlib/hnswlib.h" -#include "../data_type.h" +#include "algorithm/hnswlib/hnswlib.h" +#include "data_type.h" #include "index_common_param.h" namespace vsag { diff --git a/src/index/index_common_param.cpp b/src/index/index_common_param.cpp index 9dcd65be6..393dce541 100644 --- a/src/index/index_common_param.cpp +++ b/src/index/index_common_param.cpp @@ -15,7 +15,7 @@ #include "index_common_param.h" -#include +#include #include "common.h" #include "vsag/constants.h" diff --git a/src/index/index_common_param.h b/src/index/index_common_param.h index d8c303418..3ff7e9c75 100644 --- a/src/index/index_common_param.h +++ b/src/index/index_common_param.h @@ -16,7 +16,6 @@ #pragma once #include -#include #include "data_type.h" #include "metric_type.h" diff --git a/src/index/iterator_filter_test.cpp b/src/index/iterator_filter_test.cpp index e61c61247..64559c9fa 100644 --- a/src/index/iterator_filter_test.cpp +++ b/src/index/iterator_filter_test.cpp @@ -17,8 +17,8 @@ #include -#include "default_allocator.h" #include "fixtures.h" +#include "impl/allocator/default_allocator.h" #include "vsag/iterator_context.h" using namespace vsag; diff --git a/src/index_feature_list.cpp b/src/index_feature_list.cpp index 8a5e7fcd0..20ff0dbdb 100644 --- a/src/index_feature_list.cpp +++ b/src/index_feature_list.cpp @@ -15,7 +15,7 @@ #include "index_feature_list.h" -#include +#include #include diff --git a/src/io/CMakeLists.txt b/src/io/CMakeLists.txt index de96ae44c..bd8bb3290 100644 --- a/src/io/CMakeLists.txt +++ b/src/io/CMakeLists.txt @@ -4,11 +4,13 @@ set (IO_SRC memory_io_parameter.cpp memory_block_io_parameter.cpp buffer_io_parameter.cpp + buffer_io.cpp async_io_parameter.cpp async_io.cpp mmap_io_parameter.cpp + memory_block_io.cpp ) add_library (io OBJECT ${IO_SRC}) -target_link_libraries (io PUBLIC fmt::fmt-header-only aio) +target_link_libraries (io PUBLIC fmt::fmt aio coverage_config) maybe_add_dependencies (io spdlog) diff --git a/src/io/async_io.cpp b/src/io/async_io.cpp index f282e0b1f..81b39f129 100644 --- a/src/io/async_io.cpp +++ b/src/io/async_io.cpp @@ -17,10 +17,25 @@ #include +#include "direct_io_object.h" +#include "io_context.h" + namespace vsag { std::unique_ptr AsyncIO::io_context_pool = std::make_unique(10, nullptr); +AsyncIO::AsyncIO(std::string filename, Allocator* allocator) + : BasicIO(allocator), filepath_(std::move(filename)) { + this->rfd_ = open(filepath_.c_str(), O_CREAT | O_RDWR | O_DIRECT, 0644); + this->wfd_ = open(filepath_.c_str(), O_CREAT | O_RDWR, 0644); +} + +AsyncIO::AsyncIO(const AsyncIOParameterPtr& io_param, const IndexCommonParam& common_param) + : AsyncIO(io_param->path_, common_param.allocator_.get()){}; + +AsyncIO::AsyncIO(const IOParamPtr& param, const IndexCommonParam& common_param) + : AsyncIO(std::dynamic_pointer_cast(param), common_param){}; + AsyncIO::~AsyncIO() { close(this->wfd_); close(this->rfd_); @@ -28,4 +43,103 @@ AsyncIO::~AsyncIO() { std::filesystem::remove(this->filepath_); } +void +AsyncIO::WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) { + auto ret = pwrite64(this->wfd_, data, size, static_cast(offset)); + if (ret != size) { + throw VsagException(ErrorType::INTERNAL_ERROR, + fmt::format("write bytes {} less than {}", ret, size)); + } + if (size + offset > this->size_) { + this->size_ = size + offset; + } + fsync(wfd_); +} + +bool +AsyncIO::ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const { + bool need_release = true; + const auto* ptr = DirectReadImpl(size, offset, need_release); + memcpy(data, ptr, size); + AsyncIO::ReleaseImpl(ptr); + return true; +} + +const uint8_t* +AsyncIO::DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const { + need_release = true; + if (size == 0) { + return nullptr; + } + DirectIOObject obj(size, offset); + auto ret = pread64(this->rfd_, obj.align_data, obj.size, static_cast(obj.offset)); + if (ret < 0) { + throw VsagException(ErrorType::INTERNAL_ERROR, fmt::format("pread64 error {}", ret)); + } + return obj.data; +} + +void +AsyncIO::ReleaseImpl(const uint8_t* data) { + auto* ptr = const_cast(data); + constexpr int64_t align_bit = DirectIOObject::ALIGN_BIT; + auto raw = reinterpret_cast(ptr); + raw &= ~((1ULL << align_bit) - 1); + // NOLINTNEXTLINE(performance-no-int-to-ptr) + free(reinterpret_cast(raw)); +} + +bool +AsyncIO::MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const { + auto context = io_context_pool->TakeOne(); + uint8_t* cur_data = datas; + auto all_count = static_cast(count); + while (all_count > 0) { + count = std::min(IOContext::DEFAULT_REQUEST_COUNT, all_count); + auto* cb = context->cb_; + std::vector objs(count); + for (int64_t i = 0; i < count; ++i) { + objs[i].Set(sizes[i], offsets[i]); + auto& obj = objs[i]; + io_prep_pread(cb[i], rfd_, obj.align_data, obj.size, static_cast(obj.offset)); + cb[i]->data = &(objs[i]); + } + + int submitted = io_submit(context->ctx_, static_cast(count), cb); + if (submitted < 0) { + io_context_pool->ReturnOne(context); + for (auto& obj : objs) { + obj.Release(); + } + throw VsagException(ErrorType::INTERNAL_ERROR, "io submit failed"); + } + + struct timespec timeout = {1, 0}; + auto num_events = io_getevents(context->ctx_, + static_cast(count), + static_cast(count), + context->events_, + &timeout); + if (num_events != count) { + io_context_pool->ReturnOne(context); + for (auto& obj : objs) { + obj.Release(); + } + throw VsagException(ErrorType::INTERNAL_ERROR, "io async read failed"); + } + + for (int64_t i = 0; i < count; ++i) { + memcpy(cur_data, objs[i].data, sizes[i]); + cur_data += sizes[i]; + this->ReleaseImpl(objs[i].data); + } + + sizes += count; + offsets += count; + all_count -= static_cast(count); + } + io_context_pool->ReturnOne(context); + return true; +} + } // namespace vsag diff --git a/src/io/async_io.h b/src/io/async_io.h index daaa59e22..331a5e94c 100644 --- a/src/io/async_io.h +++ b/src/io/async_io.h @@ -15,128 +15,43 @@ #pragma once -#include - #include "async_io_parameter.h" #include "basic_io.h" -#include "direct_io_object.h" #include "index/index_common_param.h" #include "io_context.h" namespace vsag { + class AsyncIO : public BasicIO { public: - AsyncIO(std::string filename, Allocator* allocator) - : BasicIO(allocator), filepath_(std::move(filename)) { - this->rfd_ = open(filepath_.c_str(), O_CREAT | O_RDWR | O_DIRECT, 0644); - this->wfd_ = open(filepath_.c_str(), O_CREAT | O_RDWR, 0644); - } + explicit AsyncIO(std::string filename, Allocator* allocator); - explicit AsyncIO(const AsyncIOParameterPtr& io_param, const IndexCommonParam& common_param) - : AsyncIO(io_param->path_, common_param.allocator_.get()){}; + explicit AsyncIO(const AsyncIOParameterPtr& io_param, const IndexCommonParam& common_param); - explicit AsyncIO(const IOParamPtr& param, const IndexCommonParam& common_param) - : AsyncIO(std::dynamic_pointer_cast(param), common_param){}; + explicit AsyncIO(const IOParamPtr& param, const IndexCommonParam& common_param); ~AsyncIO() override; public: - inline void - WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) { - auto ret = pwrite64(this->wfd_, data, size, offset); - if (ret != size) { - throw VsagException(ErrorType::INTERNAL_ERROR, - fmt::format("write bytes {} less than {}", ret, size)); - } - if (size + offset > this->size_) { - this->size_ = size + offset; - } - fsync(wfd_); - } + void + WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset); - inline bool - ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const { - bool need_release = true; - auto ptr = DirectReadImpl(size, offset, need_release); - memcpy(data, ptr, size); - this->ReleaseImpl(ptr); - return true; - } + bool + ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const; - [[nodiscard]] inline const uint8_t* - DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const { - need_release = true; - if (size == 0) { - return nullptr; - } - DirectIOObject obj(size, offset); - auto ret = pread64(this->rfd_, obj.align_data, obj.size, obj.offset); - if (ret < 0) { - throw VsagException(ErrorType::INTERNAL_ERROR, fmt::format("pread64 error {}", ret)); - } - return obj.data; - } + [[nodiscard]] const uint8_t* + DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const; - inline void - ReleaseImpl(const uint8_t* data) const { - auto ptr = const_cast(data); - constexpr auto ALIGN_BIT = DirectIOObject::ALIGN_BIT; - free(reinterpret_cast((reinterpret_cast(ptr) >> ALIGN_BIT) << ALIGN_BIT)); - } + static void + ReleaseImpl(const uint8_t* data); - inline bool - MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const { - auto context = io_context_pool->TakeOne(); - uint8_t* cur_data = datas; - int64_t all_count = count; - while (all_count > 0) { - count = std::min(IOContext::DEFAULT_REQUEST_COUNT, all_count); - auto* cb = context->cb_; - std::vector objs(count); - for (int64_t i = 0; i < count; ++i) { - objs[i].Set(sizes[i], offsets[i]); - auto& obj = objs[i]; - io_prep_pread(cb[i], rfd_, obj.align_data, obj.size, obj.offset); - cb[i]->data = &(objs[i]); - } - - int submitted = io_submit(context->ctx_, count, cb); - if (submitted < 0) { - io_context_pool->ReturnOne(context); - for (auto& obj : objs) { - obj.Release(); - } - throw VsagException(ErrorType::INTERNAL_ERROR, "io submit failed"); - } - - struct timespec timeout = {1, 0}; - auto num_events = io_getevents(context->ctx_, count, count, context->events_, &timeout); - if (num_events != count) { - io_context_pool->ReturnOne(context); - for (auto& obj : objs) { - obj.Release(); - } - throw VsagException(ErrorType::INTERNAL_ERROR, "io async read failed"); - } - - for (int64_t i = 0; i < count; ++i) { - memcpy(cur_data, objs[i].data, sizes[i]); - cur_data += sizes[i]; - this->ReleaseImpl(objs[i].data); - } - - sizes += count; - offsets += count; - all_count -= count; - } - io_context_pool->ReturnOne(context); - return true; - } + bool + MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const; - inline void + void PrefetchImpl(uint64_t offset, uint64_t cache_line = 64){}; - static inline bool + static bool InMemoryImpl() { return false; } diff --git a/src/io/async_io_test.cpp b/src/io/async_io_test.cpp index a92fa71c0..79a5c81ae 100644 --- a/src/io/async_io_test.cpp +++ b/src/io/async_io_test.cpp @@ -19,7 +19,7 @@ #include #include "basic_io_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/io/basic_io.h b/src/io/basic_io.h index fc393c03e..e18270a31 100644 --- a/src/io/basic_io.h +++ b/src/io/basic_io.h @@ -15,7 +15,7 @@ #pragma once -#include +#include #include diff --git a/src/io/buffer_io.cpp b/src/io/buffer_io.cpp new file mode 100644 index 000000000..08b4e8ff8 --- /dev/null +++ b/src/io/buffer_io.cpp @@ -0,0 +1,91 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "buffer_io.h" + +#include +#include + +namespace vsag { + +BufferIO::BufferIO(std::string filename, Allocator* allocator) + : BasicIO(allocator), filepath_(std::move(filename)) { + this->fd_ = open(filepath_.c_str(), O_CREAT | O_RDWR, 0644); +} + +BufferIO::BufferIO(const BufferIOParameterPtr& io_param, const IndexCommonParam& common_param) + : BufferIO(io_param->path_, common_param.allocator_.get()){}; + +BufferIO::BufferIO(const IOParamPtr& param, const IndexCommonParam& common_param) + : BufferIO(std::dynamic_pointer_cast(param), common_param){}; + +void +BufferIO::WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) { + auto ret = pwrite64(this->fd_, data, size, static_cast(offset)); + if (ret != size) { + throw VsagException(ErrorType::INTERNAL_ERROR, + fmt::format("write bytes {} less than {}", ret, size)); + } + if (size + offset > this->size_) { + this->size_ = size + offset; + } +} + +bool +BufferIO::ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const { + if (size == 0) { + return true; + } + auto ret = pread64(this->fd_, data, size, static_cast(offset)); + if (ret != size) { + throw VsagException(ErrorType::INTERNAL_ERROR, + fmt::format("read bytes {} less than {}", ret, size)); + } + return true; +} + +[[nodiscard]] const uint8_t* +BufferIO::DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const { + need_release = true; + auto* buf = reinterpret_cast(allocator_->Allocate(size)); + ReadImpl(size, offset, buf); + return buf; +} + +void +BufferIO::ReleaseImpl(const uint8_t* data) const { + auto* ptr = const_cast(data); + allocator_->Deallocate(ptr); +} + +bool +BufferIO::MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const { + bool ret = true; + for (uint64_t i = 0; i < count; ++i) { + ret &= ReadImpl(sizes[i], offsets[i], datas); + datas += sizes[i]; + } + return ret; +} + +void +BufferIO::PrefetchImpl(uint64_t offset, uint64_t cache_line){}; + +bool +BufferIO::InMemoryImpl() { + return false; +} + +} // namespace vsag diff --git a/src/io/buffer_io.h b/src/io/buffer_io.h index 9cf5ea2f5..87444fcb9 100644 --- a/src/io/buffer_io.h +++ b/src/io/buffer_io.h @@ -15,9 +15,6 @@ #pragma once -#include -#include - #include "basic_io.h" #include "buffer_io_parameter.h" #include "index/index_common_param.h" @@ -26,16 +23,11 @@ namespace vsag { class BufferIO : public BasicIO { public: - BufferIO(std::string filename, Allocator* allocator) - : BasicIO(allocator), filepath_(std::move(filename)) { - this->fd_ = open(filepath_.c_str(), O_CREAT | O_RDWR, 0644); - } + BufferIO(std::string filename, Allocator* allocator); - explicit BufferIO(const BufferIOParameterPtr& io_param, const IndexCommonParam& common_param) - : BufferIO(io_param->path_, common_param.allocator_.get()){}; + explicit BufferIO(const BufferIOParameterPtr& io_param, const IndexCommonParam& common_param); - explicit BufferIO(const IOParamPtr& param, const IndexCommonParam& common_param) - : BufferIO(std::dynamic_pointer_cast(param), common_param){}; + explicit BufferIO(const IOParamPtr& param, const IndexCommonParam& common_param); ~BufferIO() override { close(this->fd_); @@ -43,62 +35,26 @@ class BufferIO : public BasicIO { std::filesystem::remove(this->filepath_); } - inline void - WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) { - auto ret = pwrite64(this->fd_, data, size, offset); - if (ret != size) { - throw VsagException(ErrorType::INTERNAL_ERROR, - fmt::format("write bytes {} less than {}", ret, size)); - } - if (size + offset > this->size_) { - this->size_ = size + offset; - } - } + void + WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset); - inline bool - ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const { - if (size == 0) { - return true; - } - auto ret = pread64(this->fd_, data, size, offset); - if (ret != size) { - throw VsagException(ErrorType::INTERNAL_ERROR, - fmt::format("read bytes {} less than {}", ret, size)); - } - return true; - } + bool + ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const; - [[nodiscard]] inline const uint8_t* - DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const { - need_release = true; - auto* buf = reinterpret_cast(allocator_->Allocate(size)); - ReadImpl(size, offset, buf); - return buf; - } + [[nodiscard]] const uint8_t* + DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const; - inline void - ReleaseImpl(const uint8_t* data) const { - auto ptr = const_cast(data); - allocator_->Deallocate(ptr); - } + void + ReleaseImpl(const uint8_t* data) const; - inline bool - MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const { - bool ret = true; - for (uint64_t i = 0; i < count; ++i) { - ret &= ReadImpl(sizes[i], offsets[i], datas); - datas += sizes[i]; - } - return ret; - } + bool + MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const; - inline void - PrefetchImpl(uint64_t offset, uint64_t cache_line = 64){}; + void + PrefetchImpl(uint64_t offset, uint64_t cache_line = 64); - static inline bool - InMemoryImpl() { - return false; - } + static bool + InMemoryImpl(); private: std::string filepath_{}; diff --git a/src/io/buffer_io_test.cpp b/src/io/buffer_io_test.cpp index 6d5f95463..2ca2300bd 100644 --- a/src/io/buffer_io_test.cpp +++ b/src/io/buffer_io_test.cpp @@ -19,7 +19,7 @@ #include #include "basic_io_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/io/io_context.h b/src/io/io_context.h index 5c1693d45..8edc90bbb 100644 --- a/src/io/io_context.h +++ b/src/io/io_context.h @@ -20,7 +20,7 @@ #include "utils/resource_object_pool.h" namespace vsag { -class IOContext : ResourceObject { +class IOContext : public ResourceObject { public: IOContext() { memset(&ctx_, 0, sizeof(ctx_)); diff --git a/src/io/memory_block_io.cpp b/src/io/memory_block_io.cpp new file mode 100644 index 000000000..2cb54093d --- /dev/null +++ b/src/io/memory_block_io.cpp @@ -0,0 +1,156 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "memory_block_io.h" + +#include +#include + +#include "common.h" +#include "index/index_common_param.h" +#include "inner_string_params.h" +#include "prefetch.h" + +namespace vsag { + +MemoryBlockIO::MemoryBlockIO(Allocator* allocator, uint64_t block_size) + : BasicIO(allocator), + block_size_(MemoryBlockIOParameter::NearestPowerOfTwo(block_size)), + blocks_(0, allocator) { + this->update_by_block_size(); +} + +MemoryBlockIO::MemoryBlockIO(const MemoryBlockIOParamPtr& param, + const IndexCommonParam& common_param) + : MemoryBlockIO(common_param.allocator_.get(), param->block_size_) { +} + +MemoryBlockIO::MemoryBlockIO(const IOParamPtr& param, const IndexCommonParam& common_param) + : MemoryBlockIO(std::dynamic_pointer_cast(param), common_param) { +} + +MemoryBlockIO::~MemoryBlockIO() { + for (auto* block : blocks_) { + this->allocator_->Deallocate(block); + } +} + +void +MemoryBlockIO::WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) { + check_and_realloc(size + offset); + uint64_t cur_size = 0; + auto start_no = offset >> block_bit_; + auto start_off = offset & in_block_mask_; + auto max_size = block_size_ - start_off; + while (cur_size < size) { + uint8_t* cur_write = blocks_[start_no] + start_off; + auto cur_length = std::min(size - cur_size, max_size); + memcpy(cur_write, data + cur_size, cur_length); + cur_size += cur_length; + max_size = block_size_; + ++start_no; + start_off = 0; + } + if (size + offset > this->size_) { + this->size_ = size + offset; + } +} + +bool +MemoryBlockIO::ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const { + bool ret = check_valid_offset(size + offset); + if (ret) { + uint64_t cur_size = 0; + auto start_no = offset >> block_bit_; + auto start_off = offset & in_block_mask_; + auto max_size = block_size_ - start_off; + while (cur_size < size) { + const uint8_t* cur_read = blocks_[start_no] + start_off; + auto cur_length = std::min(size - cur_size, max_size); + memcpy(data + cur_size, cur_read, cur_length); + cur_size += cur_length; + max_size = block_size_; + ++start_no; + start_off = 0; + } + } + return ret; +} + +const uint8_t* +MemoryBlockIO::DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const { + if (check_valid_offset(size + offset)) { + if (check_in_one_block(offset, size + offset)) { + need_release = false; + return this->get_data_ptr(offset); + } + need_release = true; + auto* ptr = reinterpret_cast(this->allocator_->Allocate(size)); + this->ReadImpl(size, offset, ptr); + return ptr; + } + return nullptr; +} +bool +MemoryBlockIO::MultiReadImpl(uint8_t* datas, + uint64_t* sizes, + uint64_t* offsets, + uint64_t count) const { + bool ret = true; + for (uint64_t i = 0; i < count; ++i) { + ret &= this->ReadImpl(sizes[i], offsets[i], datas); + datas += sizes[i]; + } + return ret; +} +void +MemoryBlockIO::PrefetchImpl(uint64_t offset, uint64_t cache_line) { + PrefetchLines(get_data_ptr(offset), cache_line); +} + +void +MemoryBlockIO::check_and_realloc(uint64_t size) { + if (size <= (blocks_.size() << block_bit_)) { + return; + } + const uint64_t new_block_count = (size + this->block_size_ - 1) >> block_bit_; + auto cur_block_size = this->blocks_.size(); + this->blocks_.reserve(new_block_count); + while (cur_block_size < new_block_count) { + this->blocks_.emplace_back((uint8_t*)(this->allocator_->Allocate(block_size_))); + ++cur_block_size; + } +} + +static int +countr_zero(uint64_t x) { + if (x == 0) { + return 64; + } + int count = 0; + while ((x & 1) == 0) { + x >>= 1; + ++count; + } + return count; +} + +void +MemoryBlockIO::update_by_block_size() { + this->block_bit_ = countr_zero(this->block_size_); + this->in_block_mask_ = this->block_size_ - 1; +} + +} // namespace vsag diff --git a/src/io/memory_block_io.h b/src/io/memory_block_io.h index d97dad503..34c4fa9ba 100644 --- a/src/io/memory_block_io.h +++ b/src/io/memory_block_io.h @@ -15,100 +15,65 @@ #pragma once -#include -#include -#include -#include -#include - #include "basic_io.h" -#include "common.h" -#include "index/index_common_param.h" -#include "inner_string_params.h" #include "memory_block_io_parameter.h" -#include "prefetch.h" #include "vsag/allocator.h" namespace vsag { +class IndexCommonParam; class MemoryBlockIO : public BasicIO { public: - explicit MemoryBlockIO(Allocator* allocator, uint64_t block_size) - : BasicIO(allocator), - block_size_(MemoryBlockIOParameter::NearestPowerOfTwo(block_size)), - blocks_(0, allocator) { - this->update_by_block_size(); - } + explicit MemoryBlockIO(Allocator* allocator, uint64_t block_size); - explicit MemoryBlockIO(const MemoryBlockIOParamPtr& param, const IndexCommonParam& common_param) - : MemoryBlockIO(common_param.allocator_.get(), param->block_size_){}; + explicit MemoryBlockIO(const MemoryBlockIOParamPtr& param, + const IndexCommonParam& common_param); - explicit MemoryBlockIO(const IOParamPtr& param, const IndexCommonParam& common_param) - : MemoryBlockIO(std::dynamic_pointer_cast(param), common_param){}; + explicit MemoryBlockIO(const IOParamPtr& param, const IndexCommonParam& common_param); - ~MemoryBlockIO() override { - for (auto* block : blocks_) { - this->allocator_->Deallocate(block); - } - } + ~MemoryBlockIO() override; - inline void + void WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset); - inline bool + bool ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const; - [[nodiscard]] inline const uint8_t* + [[nodiscard]] const uint8_t* DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const; - inline void + void ReleaseImpl(const uint8_t* data) const { auto ptr = const_cast(data); this->allocator_->Deallocate(ptr); }; - inline bool + bool MultiReadImpl(uint8_t* datas, uint64_t* sizes, uint64_t* offsets, uint64_t count) const; - inline void + void PrefetchImpl(uint64_t offset, uint64_t cache_line = 64); - static inline bool + static bool InMemoryImpl() { return true; } private: - inline int - countr_zero(uint64_t x) { - if (x == 0) - return 64; - - int count = 0; - while ((x & 1) == 0) { - x >>= 1; - ++count; - } - return count; - } + void + update_by_block_size(); - inline void - update_by_block_size() { - block_bit_ = countr_zero(block_size_); - in_block_mask_ = block_size_ - 1; - } - - inline void + void check_and_realloc(uint64_t size); - [[nodiscard]] inline const uint8_t* + [[nodiscard]] const uint8_t* get_data_ptr(uint64_t offset) const { auto block_no = offset >> block_bit_; auto block_off = offset & in_block_mask_; return blocks_[block_no] + block_off; } - [[nodiscard]] inline bool + [[nodiscard]] bool check_in_one_block(uint64_t off1, uint64_t off2) const { return (off1 ^ off2) < block_size_; } @@ -126,93 +91,4 @@ class MemoryBlockIO : public BasicIO { uint64_t in_block_mask_ = (1 << DEFAULT_BLOCK_BIT) - 1; }; - -void -MemoryBlockIO::WriteImpl(const uint8_t* data, uint64_t size, uint64_t offset) { - check_and_realloc(size + offset); - uint64_t cur_size = 0; - auto start_no = offset >> block_bit_; - auto start_off = offset & in_block_mask_; - auto max_size = block_size_ - start_off; - while (cur_size < size) { - uint8_t* cur_write = blocks_[start_no] + start_off; - auto cur_length = std::min(size - cur_size, max_size); - memcpy(cur_write, data + cur_size, cur_length); - cur_size += cur_length; - max_size = block_size_; - ++start_no; - start_off = 0; - } - if (size + offset > this->size_) { - this->size_ = size + offset; - } -} - -bool -MemoryBlockIO::ReadImpl(uint64_t size, uint64_t offset, uint8_t* data) const { - bool ret = check_valid_offset(size + offset); - if (ret) { - uint64_t cur_size = 0; - auto start_no = offset >> block_bit_; - auto start_off = offset & in_block_mask_; - auto max_size = block_size_ - start_off; - while (cur_size < size) { - const uint8_t* cur_read = blocks_[start_no] + start_off; - auto cur_length = std::min(size - cur_size, max_size); - memcpy(data + cur_size, cur_read, cur_length); - cur_size += cur_length; - max_size = block_size_; - ++start_no; - start_off = 0; - } - } - return ret; -} - -const uint8_t* -MemoryBlockIO::DirectReadImpl(uint64_t size, uint64_t offset, bool& need_release) const { - if (check_valid_offset(size + offset)) { - if (check_in_one_block(offset, size + offset)) { - need_release = false; - return this->get_data_ptr(offset); - } else { - need_release = true; - auto* ptr = reinterpret_cast(this->allocator_->Allocate(size)); - this->ReadImpl(size, offset, ptr); - return ptr; - } - } - return nullptr; -} -bool -MemoryBlockIO::MultiReadImpl(uint8_t* datas, - uint64_t* sizes, - uint64_t* offsets, - uint64_t count) const { - bool ret = true; - for (uint64_t i = 0; i < count; ++i) { - ret &= this->ReadImpl(sizes[i], offsets[i], datas); - datas += sizes[i]; - } - return ret; -} -void -MemoryBlockIO::PrefetchImpl(uint64_t offset, uint64_t cache_line) { - PrefetchLines(get_data_ptr(offset), cache_line); -} - -void -MemoryBlockIO::check_and_realloc(uint64_t size) { - if (size <= (blocks_.size() << block_bit_)) { - return; - } - const uint64_t new_block_count = (size + this->block_size_ - 1) >> block_bit_; - auto cur_block_size = this->blocks_.size(); - this->blocks_.reserve(new_block_count); - while (cur_block_size < new_block_count) { - this->blocks_.emplace_back((uint8_t*)(this->allocator_->Allocate(block_size_))); - ++cur_block_size; - } -} - } // namespace vsag diff --git a/src/io/memory_block_io_test.cpp b/src/io/memory_block_io_test.cpp index 49d7e44a3..702f61d3a 100644 --- a/src/io/memory_block_io_test.cpp +++ b/src/io/memory_block_io_test.cpp @@ -19,7 +19,7 @@ #include #include "basic_io_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/io/memory_io_test.cpp b/src/io/memory_io_test.cpp index 6f15b386b..88f5f674d 100644 --- a/src/io/memory_io_test.cpp +++ b/src/io/memory_io_test.cpp @@ -19,7 +19,7 @@ #include #include "basic_io_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/io/mmap_io_test.cpp b/src/io/mmap_io_test.cpp index b4741bf20..03951ea3a 100644 --- a/src/io/mmap_io_test.cpp +++ b/src/io/mmap_io_test.cpp @@ -19,7 +19,7 @@ #include #include "basic_io_test.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" using namespace vsag; diff --git a/src/label_table.h b/src/label_table.h index aa100de0e..66fd6cc44 100644 --- a/src/label_table.h +++ b/src/label_table.h @@ -15,7 +15,7 @@ #pragma once -#include +#include #include diff --git a/src/parameter.h b/src/parameter.h index b06532fd1..d52315a96 100644 --- a/src/parameter.h +++ b/src/parameter.h @@ -16,7 +16,6 @@ #pragma once #include "common.h" -#include "inner_string_params.h" #include "typing.h" namespace vsag { diff --git a/src/quantization/CMakeLists.txt b/src/quantization/CMakeLists.txt index 8b6c59573..2682860bc 100644 --- a/src/quantization/CMakeLists.txt +++ b/src/quantization/CMakeLists.txt @@ -12,6 +12,11 @@ set (QUANTIZER_SRC rabitq_quantization/rabitq_quantizer_parameter.cpp product_quantization/product_quantizer_parameter.cpp product_quantization/pq_fastscan_quantizer_parameter.cpp + product_quantization/pq_fastscan_quantizer.cpp + product_quantization/product_quantizer.cpp ) add_library (quantizer OBJECT ${QUANTIZER_SRC}) +target_link_libraries(quantizer PUBLIC fmt::fmt coverage_config) + +maybe_add_dependencies (quantizer spdlog openblas mkl) diff --git a/src/quantization/fp32_quantizer_test.cpp b/src/quantization/fp32_quantizer_test.cpp index dc1a73666..543178918 100644 --- a/src/quantization/fp32_quantizer_test.cpp +++ b/src/quantization/fp32_quantizer_test.cpp @@ -18,10 +18,10 @@ #include #include -#include "default_allocator.h" #include "fixtures.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/quantization/product_quantization/pq_fastscan_quantizer.cpp b/src/quantization/product_quantization/pq_fastscan_quantizer.cpp new file mode 100644 index 000000000..8adc186d9 --- /dev/null +++ b/src/quantization/product_quantization/pq_fastscan_quantizer.cpp @@ -0,0 +1,369 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "pq_fastscan_quantizer.h" + +#include + +#include "impl/kmeans_cluster.h" +#include "index/index_common_param.h" +#include "pq_fastscan_quantizer_parameter.h" +#include "prefetch.h" +#include "quantization/quantizer.h" +#include "quantization/scalar_quantization/scalar_quantization_trainer.h" +#include "simd/fp32_simd.h" +#include "simd/normalize.h" +#include "simd/pqfs_simd.h" + +namespace vsag { + +template +PQFastScanQuantizer::PQFastScanQuantizer(int dim, int64_t pq_dim, Allocator* allocator) + : Quantizer>(dim, allocator), + pq_dim_(pq_dim), + codebooks_(allocator) { + if (dim % pq_dim != 0) { + throw VsagException( + ErrorType::INVALID_ARGUMENT, + fmt::format("pq_dim({}) does not divide evenly into dim({})", pq_dim, dim)); + } + this->code_size_ = (this->pq_dim_ + 1) / 2; + this->subspace_dim_ = this->dim_ / pq_dim; + this->metric_ = metric; + codebooks_.resize(this->dim_ * CENTROIDS_PER_SUBSPACE); +} + +template +PQFastScanQuantizer::PQFastScanQuantizer(const PQFastScanQuantizerParamPtr& param, + const IndexCommonParam& common_param) + : PQFastScanQuantizer( + common_param.dim_, param->pq_dim_, common_param.allocator_.get()) { +} + +template +PQFastScanQuantizer::PQFastScanQuantizer(const QuantizerParamPtr& param, + const IndexCommonParam& common_param) + : PQFastScanQuantizer(std::dynamic_pointer_cast(param), + common_param) { +} + +template +bool +PQFastScanQuantizer::TrainImpl(const vsag::DataType* data, uint64_t count) { + if (this->is_trained_) { + return true; + } + count = std::min(count, 65536UL); + Vector slice(this->allocator_); + slice.resize(count * subspace_dim_); + Vector norm_data(this->allocator_); + const float* train_data = data; + if constexpr (metric == MetricType::METRIC_TYPE_COSINE) { + norm_data.resize(count * this->dim_); + for (int64_t i = 0; i < count; ++i) { + Normalize(data + i * this->dim_, norm_data.data() + i * this->dim_, this->dim_); + } + train_data = norm_data.data(); + } + + for (int64_t i = 0; i < pq_dim_; ++i) { + for (int64_t j = 0; j < count; ++j) { + memcpy(slice.data() + j * subspace_dim_, + train_data + j * this->dim_ + i * subspace_dim_, + subspace_dim_ * sizeof(float)); + } + KMeansCluster cluster(subspace_dim_, this->allocator_); + cluster.Run(CENTROIDS_PER_SUBSPACE, slice.data(), count); + memcpy(this->codebooks_.data() + i * CENTROIDS_PER_SUBSPACE * subspace_dim_, + cluster.k_centroids_, + CENTROIDS_PER_SUBSPACE * subspace_dim_ * sizeof(float)); + } + + this->is_trained_ = true; + return true; +} + +template +bool +PQFastScanQuantizer::EncodeOneImpl(const DataType* data, uint8_t* codes) { + const DataType* cur = data; + Vector tmp(this->allocator_); + if constexpr (metric == MetricType::METRIC_TYPE_COSINE) { + tmp.resize(this->dim_); + Normalize(data, tmp.data(), this->dim_); + cur = tmp.data(); + } + memset(codes, 0, this->code_size_); + for (int i = 0; i < pq_dim_; ++i) { + // TODO(LHT): use blas + float nearest_dis = std::numeric_limits::max(); + uint8_t nearest_id = 0; + const float* query = cur + i * subspace_dim_; + const float* base = this->codebooks_.data() + i * subspace_dim_ * CENTROIDS_PER_SUBSPACE; + for (int j = 0; j < CENTROIDS_PER_SUBSPACE; ++j) { + float dist = FP32ComputeL2Sqr(query, base + j * subspace_dim_, subspace_dim_); + if (dist < nearest_dis) { + nearest_dis = dist; + nearest_id = static_cast(j); + } + } + if (i % 2 == 1) { + nearest_id <<= 4L; + } + codes[i / 2] |= nearest_id; + } + return true; +} + +template +bool +PQFastScanQuantizer::EncodeBatchImpl(const DataType* data, uint8_t* codes, uint64_t count) { + for (uint64_t i = 0; i < count; ++i) { + this->EncodeOneImpl(data + i * this->dim_, codes + i * this->code_size_); + } + return true; +} + +template +bool +PQFastScanQuantizer::DecodeBatchImpl(const uint8_t* codes, DataType* data, uint64_t count) { + for (uint64_t i = 0; i < count; ++i) { + this->DecodeOneImpl(codes + i * this->code_size_, data + i * this->dim_); + } + return true; +} + +template +bool +PQFastScanQuantizer::DecodeOneImpl(const uint8_t* codes, DataType* data) { + for (int i = 0; i < pq_dim_; ++i) { + auto idx = codes[i / 2]; + if (i % 2 == 0) { + idx &= 0x0F; + } else { + idx >>= 4L; + } + memcpy(data + i * subspace_dim_, + this->get_codebook_data(i, idx), + subspace_dim_ * sizeof(float)); + } + return true; +} + +template +inline float +PQFastScanQuantizer::ComputeImpl(const uint8_t* codes1, const uint8_t* codes2) { + throw VsagException(ErrorType::INTERNAL_ERROR, "PQFastScan doesn't support ComputeCodes"); +} + +template +void +PQFastScanQuantizer::ComputeDistImpl(Computer& computer, + const uint8_t* codes, + float* dists) const { + throw VsagException(ErrorType::INTERNAL_ERROR, + "PQFastScan doesn't support ComputeDist, only support ComputeBatchDist"); +} + +template +void +PQFastScanQuantizer::ScanBatchDistImpl(Computer>& computer, + uint64_t count, + const uint8_t* codes, + float* dists) const { + auto* sq_info = + reinterpret_cast(computer.buf_ + this->pq_dim_ * CENTROIDS_PER_SUBSPACE); + auto diff = sq_info[0]; + auto lower = sq_info[1]; + auto map_int32_to_float = [&](const int32_t* from, float* to, int64_t map_count) { + for (int j = 0; j < map_count; ++j) { + to[j] = static_cast(from[j]) / 255.0F * diff + lower; + if constexpr (metric == MetricType::METRIC_TYPE_COSINE or + metric == MetricType::METRIC_TYPE_IP) { + to[j] = 1.0F - to[j]; + } + } + }; + + uint64_t block_count = count / BLOCK_SIZE_PACKAGE; + Vector tmp_dist(BLOCK_SIZE_PACKAGE, 0, this->allocator_); + for (int64_t i = 0; i < block_count; ++i) { + PQFastScanLookUp32(computer.buf_, codes, this->pq_dim_, tmp_dist.data()); + map_int32_to_float(tmp_dist.data(), dists, BLOCK_SIZE_PACKAGE); + codes += BLOCK_SIZE_PACKAGE * this->code_size_; + dists += BLOCK_SIZE_PACKAGE; + memset(tmp_dist.data(), 0, BLOCK_SIZE_PACKAGE * sizeof(int32_t)); + } + + if (count > block_count * BLOCK_SIZE_PACKAGE) { + PQFastScanLookUp32(computer.buf_, codes, this->pq_dim_, tmp_dist.data()); + map_int32_to_float(tmp_dist.data(), dists, count - block_count * BLOCK_SIZE_PACKAGE); + } +} + +template +void +PQFastScanQuantizer::SerializeImpl(StreamWriter& writer) { + StreamWriter::WriteObj(writer, this->pq_dim_); + StreamWriter::WriteObj(writer, this->subspace_dim_); + StreamWriter::WriteVector(writer, this->codebooks_); +} + +template +void +PQFastScanQuantizer::DeserializeImpl(StreamReader& reader) { + StreamReader::ReadObj(reader, this->pq_dim_); + StreamReader::ReadObj(reader, this->subspace_dim_); + StreamReader::ReadVector(reader, this->codebooks_); +} + +template +void +PQFastScanQuantizer::ReleaseComputerImpl( + Computer>& computer) const { + this->allocator_->Deallocate(computer.buf_); +} + +template +void +PQFastScanQuantizer::Package32(const uint8_t* codes, + uint8_t* packaged_codes, + int64_t valid_size) const { + constexpr int32_t mapper[32] = {0, 16, 8, 24, 1, 17, 9, 25, 2, 18, 10, 26, 3, 19, 11, 27, + 4, 20, 12, 28, 5, 21, 13, 29, 6, 22, 14, 30, 7, 23, 15, 31}; + if (valid_size == -1) { + valid_size = BLOCK_SIZE_PACKAGE; + } + + auto get_code = [&](int64_t vector_index, int64_t space_index) -> uint8_t { + if (vector_index >= valid_size) { + return 0; + } + uint8_t code = codes[vector_index * this->code_size_ + space_index / 2]; + if (space_index % 2 == 0) { + return code & 0x0F; + } + return code >> 4L; + }; + memset(packaged_codes, 0, this->code_size_ * BLOCK_SIZE_PACKAGE); + for (int i = 0; i < this->pq_dim_; ++i) { + for (int j = 0; j < BLOCK_SIZE_PACKAGE; ++j) { + auto code = get_code(mapper[j], i); + if (j % 2 == 1) { + code <<= 4L; + } + packaged_codes[i * BLOCK_SIZE_PACKAGE / 2 + j / 2] |= code; + } + } +} + +template +void +PQFastScanQuantizer::Unpack32(const uint8_t* packaged_codes, uint8_t* codes) const { + constexpr int32_t mapper[32] = {0, 16, 8, 24, 1, 17, 9, 25, 2, 18, 10, 26, 3, 19, 11, 27, + 4, 20, 12, 28, 5, 21, 13, 29, 6, 22, 14, 30, 7, 23, 15, 31}; + + for (int64_t i = 0; i < this->pq_dim_; ++i) { + for (int64_t j = 0; j < BLOCK_SIZE_PACKAGE; ++j) { + int64_t block_base = i * (BLOCK_SIZE_PACKAGE / 2) + (j / 2); + uint8_t byte = packaged_codes[block_base]; + + uint8_t code; + if (j % 2 == 0) { + code = byte & 0x0F; + } else { + code = byte >> 4; + } + int64_t vector_index = mapper[j]; + + int64_t code_offset = vector_index * this->code_size_ + (i / 2); + + if (i % 2 == 0) { + codes[code_offset] = (codes[code_offset] & 0xF0) | code; + } else { + codes[code_offset] = (codes[code_offset] & 0x0F) | (code << 4); + } + } + } +} + +template +void +PQFastScanQuantizer::ProcessQueryImpl(const DataType* query, + Computer& computer) const { + try { + const float* cur_query = query; + Vector norm_vec(this->allocator_); + if constexpr (metric == MetricType::METRIC_TYPE_COSINE) { + norm_vec.resize(this->dim_); + Normalize(query, norm_vec.data(), this->dim_); + cur_query = norm_vec.data(); + } + Vector lookup_table(this->pq_dim_ * CENTROIDS_PER_SUBSPACE, this->allocator_); + computer.buf_ = reinterpret_cast(this->allocator_->Allocate( + this->pq_dim_ * CENTROIDS_PER_SUBSPACE * sizeof(uint8_t) + 2 * sizeof(float))); + for (int i = 0; i < pq_dim_; ++i) { + const auto* per_query = cur_query + i * subspace_dim_; + const auto* per_code_book = get_codebook_data(i, 0); + auto* per_result = lookup_table.data() + i * CENTROIDS_PER_SUBSPACE; + if constexpr (metric == MetricType::METRIC_TYPE_IP or + metric == MetricType::METRIC_TYPE_COSINE) { + cblas_sgemv(CblasRowMajor, + CblasNoTrans, + CENTROIDS_PER_SUBSPACE, + subspace_dim_, + 1.0F, + per_code_book, + subspace_dim_, + per_query, + 1, + 0.0F, + per_result, + 1); + } else if constexpr (metric == MetricType::METRIC_TYPE_L2SQR) { + // TODO(LHT): use blas opt + for (int64_t j = 0; j < CENTROIDS_PER_SUBSPACE; ++j) { + per_result[j] = FP32ComputeL2Sqr( + per_query, per_code_book + j * subspace_dim_, subspace_dim_); + } + } + } + + ScalarQuantizationTrainer trainer(CENTROIDS_PER_SUBSPACE, 8); + float upper; + float lower; + trainer.TrainUniform( + lookup_table.data(), pq_dim_, upper, lower, false, SQTrainMode::CLASSIC); + auto diff = upper - lower; + int64_t j = 0; + for (; j < this->pq_dim_ * CENTROIDS_PER_SUBSPACE; ++j) { + computer.buf_[j] = (lookup_table[j] - lower) / diff * 255; + } + auto* sq_info = reinterpret_cast(computer.buf_ + j); + sq_info[0] = diff; + sq_info[1] = lower; + } catch (const std::bad_alloc& e) { + if (computer.buf_ != nullptr) { + this->allocator_->Deallocate(computer.buf_); + } + computer.buf_ = nullptr; + throw VsagException(ErrorType::NO_ENOUGH_MEMORY, "bad alloc when init computer buf"); + } +} + +template class PQFastScanQuantizer; +template class PQFastScanQuantizer; +template class PQFastScanQuantizer; +} // namespace vsag diff --git a/src/quantization/product_quantization/pq_fastscan_quantizer.h b/src/quantization/product_quantization/pq_fastscan_quantizer.h index 75a288710..d223dc793 100644 --- a/src/quantization/product_quantization/pq_fastscan_quantizer.h +++ b/src/quantization/product_quantization/pq_fastscan_quantizer.h @@ -15,24 +15,16 @@ #pragma once -#include - #include #include #include #include #include -#include "impl/kmeans_cluster.h" #include "index/index_common_param.h" #include "inner_string_params.h" #include "pq_fastscan_quantizer_parameter.h" -#include "prefetch.h" #include "quantization/quantizer.h" -#include "quantization/scalar_quantization/scalar_quantization_trainer.h" -#include "simd/fp32_simd.h" -#include "simd/normalize.h" -#include "simd/pqfs_simd.h" namespace vsag { @@ -46,7 +38,7 @@ class PQFastScanQuantizer : public Quantizer> { PQFastScanQuantizer(const QuantizerParamPtr& param, const IndexCommonParam& common_param); - ~PQFastScanQuantizer() = default; + ~PQFastScanQuantizer() override = default; bool TrainImpl(const DataType* data, uint64_t count); @@ -63,30 +55,30 @@ class PQFastScanQuantizer : public Quantizer> { bool DecodeBatchImpl(const uint8_t* codes, DataType* data, uint64_t count); - inline float + float ComputeImpl(const uint8_t* codes1, const uint8_t* codes2); - inline void + void ProcessQueryImpl(const DataType* query, Computer& computer) const; - inline void + void ComputeDistImpl(Computer& computer, const uint8_t* codes, float* dists) const; - inline void + void ScanBatchDistImpl(Computer>& computer, uint64_t count, const uint8_t* codes, float* dists) const; - inline void + void SerializeImpl(StreamWriter& writer); - inline void + void DeserializeImpl(StreamReader& reader); - inline void + void ReleaseComputerImpl(Computer>& computer) const; [[nodiscard]] inline std::string @@ -119,337 +111,4 @@ class PQFastScanQuantizer : public Quantizer> { Vector codebooks_; }; -template -PQFastScanQuantizer::PQFastScanQuantizer(int dim, int64_t pq_dim, Allocator* allocator) - : Quantizer>(dim, allocator), - pq_dim_(pq_dim), - codebooks_(allocator) { - if (dim % pq_dim != 0) { - throw VsagException( - ErrorType::INVALID_ARGUMENT, - fmt::format("pq_dim({}) does not divide evenly into dim({})", pq_dim, dim)); - } - this->code_size_ = (this->pq_dim_ + 1) / 2; - this->subspace_dim_ = this->dim_ / pq_dim; - this->metric_ = metric; - codebooks_.resize(this->dim_ * CENTROIDS_PER_SUBSPACE); -} - -template -PQFastScanQuantizer::PQFastScanQuantizer(const PQFastScanQuantizerParamPtr& param, - const IndexCommonParam& common_param) - : PQFastScanQuantizer( - common_param.dim_, param->pq_dim_, common_param.allocator_.get()) { -} - -template -PQFastScanQuantizer::PQFastScanQuantizer(const QuantizerParamPtr& param, - const IndexCommonParam& common_param) - : PQFastScanQuantizer(std::dynamic_pointer_cast(param), - common_param) { -} - -template -bool -PQFastScanQuantizer::TrainImpl(const vsag::DataType* data, uint64_t count) { - if (this->is_trained_) { - return true; - } - count = std::min(count, 65536UL); - Vector slice(this->allocator_); - slice.resize(count * subspace_dim_); - Vector norm_data(this->allocator_); - const float* train_data = data; - if constexpr (metric == MetricType::METRIC_TYPE_COSINE) { - norm_data.resize(count * this->dim_); - for (int64_t i = 0; i < count; ++i) { - Normalize(data + i * this->dim_, norm_data.data() + i * this->dim_, this->dim_); - } - train_data = norm_data.data(); - } - - for (int64_t i = 0; i < pq_dim_; ++i) { - for (int64_t j = 0; j < count; ++j) { - memcpy(slice.data() + j * subspace_dim_, - train_data + j * this->dim_ + i * subspace_dim_, - subspace_dim_ * sizeof(float)); - } - KMeansCluster cluster(subspace_dim_, this->allocator_); - cluster.Run(CENTROIDS_PER_SUBSPACE, slice.data(), count); - memcpy(this->codebooks_.data() + i * CENTROIDS_PER_SUBSPACE * subspace_dim_, - cluster.k_centroids_, - CENTROIDS_PER_SUBSPACE * subspace_dim_ * sizeof(float)); - } - - this->is_trained_ = true; - return true; -} - -template -bool -PQFastScanQuantizer::EncodeOneImpl(const DataType* data, uint8_t* codes) { - const DataType* cur = data; - Vector tmp(this->allocator_); - if constexpr (metric == MetricType::METRIC_TYPE_COSINE) { - tmp.resize(this->dim_); - Normalize(data, tmp.data(), this->dim_); - cur = tmp.data(); - } - memset(codes, 0, this->code_size_); - for (int i = 0; i < pq_dim_; ++i) { - // TODO(LHT): use blas - float nearest_dis = std::numeric_limits::max(); - uint8_t nearest_id = 0; - const float* query = cur + i * subspace_dim_; - const float* base = this->codebooks_.data() + i * subspace_dim_ * CENTROIDS_PER_SUBSPACE; - for (int j = 0; j < CENTROIDS_PER_SUBSPACE; ++j) { - float dist = FP32ComputeL2Sqr(query, base + j * subspace_dim_, subspace_dim_); - if (dist < nearest_dis) { - nearest_dis = dist; - nearest_id = static_cast(j); - } - } - if (i % 2 == 1) { - nearest_id <<= 4L; - } - codes[i / 2] |= nearest_id; - } - return true; -} - -template -bool -PQFastScanQuantizer::EncodeBatchImpl(const DataType* data, uint8_t* codes, uint64_t count) { - for (uint64_t i = 0; i < count; ++i) { - this->EncodeOneImpl(data + i * this->dim_, codes + i * this->code_size_); - } - return true; -} - -template -bool -PQFastScanQuantizer::DecodeBatchImpl(const uint8_t* codes, DataType* data, uint64_t count) { - for (uint64_t i = 0; i < count; ++i) { - this->DecodeOneImpl(codes + i * this->code_size_, data + i * this->dim_); - } - return true; -} - -template -bool -PQFastScanQuantizer::DecodeOneImpl(const uint8_t* codes, DataType* data) { - for (int i = 0; i < pq_dim_; ++i) { - auto idx = codes[i / 2]; - if (i % 2 == 0) { - idx &= 0x0F; - } else { - idx >>= 4L; - } - memcpy(data + i * subspace_dim_, - this->get_codebook_data(i, idx), - subspace_dim_ * sizeof(float)); - } - return true; -} - -template -inline float -PQFastScanQuantizer::ComputeImpl(const uint8_t* codes1, const uint8_t* codes2) { - throw VsagException(ErrorType::INTERNAL_ERROR, "PQFastScan doesn't support ComputeCodes"); -} - -template -void -PQFastScanQuantizer::ProcessQueryImpl(const DataType* query, - Computer& computer) const { - try { - const float* cur_query = query; - Vector norm_vec(this->allocator_); - if constexpr (metric == MetricType::METRIC_TYPE_COSINE) { - norm_vec.resize(this->dim_); - Normalize(query, norm_vec.data(), this->dim_); - cur_query = norm_vec.data(); - } - Vector lookup_table(this->pq_dim_ * CENTROIDS_PER_SUBSPACE, this->allocator_); - computer.buf_ = reinterpret_cast(this->allocator_->Allocate( - this->pq_dim_ * CENTROIDS_PER_SUBSPACE * sizeof(uint8_t) + 2 * sizeof(float))); - for (int i = 0; i < pq_dim_; ++i) { - const auto* per_query = cur_query + i * subspace_dim_; - const auto* per_code_book = get_codebook_data(i, 0); - auto* per_result = lookup_table.data() + i * CENTROIDS_PER_SUBSPACE; - if constexpr (metric == MetricType::METRIC_TYPE_IP or - metric == MetricType::METRIC_TYPE_COSINE) { - cblas_sgemv(CblasRowMajor, - CblasNoTrans, - CENTROIDS_PER_SUBSPACE, - subspace_dim_, - 1.0F, - per_code_book, - subspace_dim_, - per_query, - 1, - 0.0F, - per_result, - 1); - } else if constexpr (metric == MetricType::METRIC_TYPE_L2SQR) { - // TODO(LHT): use blas opt - for (int64_t j = 0; j < CENTROIDS_PER_SUBSPACE; ++j) { - per_result[j] = FP32ComputeL2Sqr( - per_query, per_code_book + j * subspace_dim_, subspace_dim_); - } - } - } - - ScalarQuantizationTrainer trainer(CENTROIDS_PER_SUBSPACE, 8); - float upper, lower; - trainer.TrainUniform( - lookup_table.data(), pq_dim_, upper, lower, false, SQTrainMode::CLASSIC); - auto diff = upper - lower; - int64_t j = 0; - for (; j < this->pq_dim_ * CENTROIDS_PER_SUBSPACE; ++j) { - computer.buf_[j] = (lookup_table[j] - lower) / diff * 255; - } - auto* sq_info = reinterpret_cast(computer.buf_ + j); - sq_info[0] = diff; - sq_info[1] = lower; - } catch (const std::bad_alloc& e) { - if (computer.buf_ != nullptr) { - this->allocator_->Deallocate(computer.buf_); - } - computer.buf_ = nullptr; - throw VsagException(ErrorType::NO_ENOUGH_MEMORY, "bad alloc when init computer buf"); - } -} - -template -void -PQFastScanQuantizer::ComputeDistImpl(Computer& computer, - const uint8_t* codes, - float* dists) const { - throw VsagException(ErrorType::INTERNAL_ERROR, - "PQFastScan doesn't support ComputeDist, only support ComputeBatchDist"); -} - -template -void -PQFastScanQuantizer::ScanBatchDistImpl(Computer>& computer, - uint64_t count, - const uint8_t* codes, - float* dists) const { - auto* sq_info = - reinterpret_cast(computer.buf_ + this->pq_dim_ * CENTROIDS_PER_SUBSPACE); - auto diff = sq_info[0]; - auto lower = sq_info[1]; - auto map_int32_to_float = [&](int32_t* from, float* to, int64_t map_count) { - for (int j = 0; j < map_count; ++j) { - to[j] = from[j] / 255.0F * diff + lower; - if constexpr (metric == MetricType::METRIC_TYPE_COSINE or - metric == MetricType::METRIC_TYPE_IP) { - to[j] = 1.0F - to[j]; - } - } - }; - - uint64_t block_count = count / BLOCK_SIZE_PACKAGE; - Vector tmp_dist(BLOCK_SIZE_PACKAGE, 0, this->allocator_); - for (int64_t i = 0; i < block_count; ++i) { - PQFastScanLookUp32(computer.buf_, codes, this->pq_dim_, tmp_dist.data()); - map_int32_to_float(tmp_dist.data(), dists, BLOCK_SIZE_PACKAGE); - codes += BLOCK_SIZE_PACKAGE * this->code_size_; - dists += BLOCK_SIZE_PACKAGE; - memset(tmp_dist.data(), 0, BLOCK_SIZE_PACKAGE * sizeof(int32_t)); - } - - if (count > block_count * BLOCK_SIZE_PACKAGE) { - PQFastScanLookUp32(computer.buf_, codes, this->pq_dim_, tmp_dist.data()); - map_int32_to_float(tmp_dist.data(), dists, count - block_count * BLOCK_SIZE_PACKAGE); - } -} - -template -void -PQFastScanQuantizer::SerializeImpl(StreamWriter& writer) { - StreamWriter::WriteObj(writer, this->pq_dim_); - StreamWriter::WriteObj(writer, this->subspace_dim_); - StreamWriter::WriteVector(writer, this->codebooks_); -} - -template -void -PQFastScanQuantizer::DeserializeImpl(StreamReader& reader) { - StreamReader::ReadObj(reader, this->pq_dim_); - StreamReader::ReadObj(reader, this->subspace_dim_); - StreamReader::ReadVector(reader, this->codebooks_); -} - -template -void -PQFastScanQuantizer::ReleaseComputerImpl( - Computer>& computer) const { - this->allocator_->Deallocate(computer.buf_); -} - -template -void -PQFastScanQuantizer::Package32(const uint8_t* codes, - uint8_t* packaged_codes, - int64_t valid_size) const { - constexpr int32_t mapper[32] = {0, 16, 8, 24, 1, 17, 9, 25, 2, 18, 10, 26, 3, 19, 11, 27, - 4, 20, 12, 28, 5, 21, 13, 29, 6, 22, 14, 30, 7, 23, 15, 31}; - if (valid_size == -1) { - valid_size = BLOCK_SIZE_PACKAGE; - } - - auto get_code = [&](int64_t vector_index, int64_t space_index) -> uint8_t { - if (vector_index >= valid_size) { - return 0; - } - uint8_t code = codes[vector_index * this->code_size_ + space_index / 2]; - if (space_index % 2 == 0) { - return code & 0x0F; - } - return code >> 4L; - }; - memset(packaged_codes, 0, this->code_size_ * BLOCK_SIZE_PACKAGE); - for (int i = 0; i < this->pq_dim_; ++i) { - for (int j = 0; j < BLOCK_SIZE_PACKAGE; ++j) { - auto code = get_code(mapper[j], i); - if (j % 2 == 1) { - code <<= 4L; - } - packaged_codes[i * BLOCK_SIZE_PACKAGE / 2 + j / 2] |= code; - } - } -} - -template -void -PQFastScanQuantizer::Unpack32(const uint8_t* packaged_codes, uint8_t* codes) const { - constexpr int32_t mapper[32] = {0, 16, 8, 24, 1, 17, 9, 25, 2, 18, 10, 26, 3, 19, 11, 27, - 4, 20, 12, 28, 5, 21, 13, 29, 6, 22, 14, 30, 7, 23, 15, 31}; - - for (int i = 0; i < this->pq_dim_; ++i) { - for (int j = 0; j < BLOCK_SIZE_PACKAGE; ++j) { - int block_base = i * (BLOCK_SIZE_PACKAGE / 2) + (j / 2); - uint8_t byte = packaged_codes[block_base]; - - uint8_t code; - if (j % 2 == 0) { - code = byte & 0x0F; - } else { - code = byte >> 4; - } - int64_t vector_index = mapper[j]; - - int64_t code_offset = vector_index * this->code_size_ + (i / 2); - - if (i % 2 == 0) { - codes[code_offset] = (codes[code_offset] & 0xF0) | code; - } else { - codes[code_offset] = (codes[code_offset] & 0x0F) | (code << 4); - } - } - } -} - } // namespace vsag diff --git a/src/quantization/product_quantization/pq_fastscan_quantizer_test.cpp b/src/quantization/product_quantization/pq_fastscan_quantizer_test.cpp index eda6ecc19..8fe210d63 100644 --- a/src/quantization/product_quantization/pq_fastscan_quantizer_test.cpp +++ b/src/quantization/product_quantization/pq_fastscan_quantizer_test.cpp @@ -19,8 +19,8 @@ #include #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/quantization/product_quantization/product_quantizer.cpp b/src/quantization/product_quantization/product_quantizer.cpp new file mode 100644 index 000000000..da5a054ae --- /dev/null +++ b/src/quantization/product_quantization/product_quantizer.cpp @@ -0,0 +1,79 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "product_quantizer.h" + +#include + +namespace vsag { + +template +void +ProductQuantizer::ProcessQueryImpl(const DataType* query, + Computer& computer) const { + try { + const float* cur_query = query; + Vector norm_vec(this->allocator_); + if constexpr (metric == MetricType::METRIC_TYPE_COSINE) { + norm_vec.resize(this->dim_); + Normalize(query, norm_vec.data(), this->dim_); + cur_query = norm_vec.data(); + } + auto* lookup_table = reinterpret_cast( + this->allocator_->Allocate(this->pq_dim_ * CENTROIDS_PER_SUBSPACE * sizeof(float))); + + for (int i = 0; i < pq_dim_; ++i) { + const auto* per_query = cur_query + i * subspace_dim_; + const auto* per_code_book = get_codebook_data(i, 0); + auto* per_result = lookup_table + i * CENTROIDS_PER_SUBSPACE; + if constexpr (metric == MetricType::METRIC_TYPE_IP or + metric == MetricType::METRIC_TYPE_COSINE) { + cblas_sgemv(CblasRowMajor, + CblasNoTrans, + CENTROIDS_PER_SUBSPACE, + subspace_dim_, + 1.0F, + per_code_book, + subspace_dim_, + per_query, + 1, + 0.0F, + per_result, + 1); + } else if constexpr (metric == MetricType::METRIC_TYPE_L2SQR) { + // TODO(LHT): use blas opt + for (int64_t j = 0; j < CENTROIDS_PER_SUBSPACE; ++j) { + per_result[j] = FP32ComputeL2Sqr( + per_query, per_code_book + j * subspace_dim_, subspace_dim_); + } + } + } + + computer.buf_ = reinterpret_cast(lookup_table); + + } catch (const std::bad_alloc& e) { + if (computer.buf_ != nullptr) { + this->allocator_->Deallocate(computer.buf_); + } + computer.buf_ = nullptr; + throw VsagException(ErrorType::NO_ENOUGH_MEMORY, "bad alloc when init computer buf"); + } +} + +template class ProductQuantizer; +template class ProductQuantizer; +template class ProductQuantizer; + +} // namespace vsag \ No newline at end of file diff --git a/src/quantization/product_quantization/product_quantizer.h b/src/quantization/product_quantization/product_quantizer.h index 95db788ba..acedef482 100644 --- a/src/quantization/product_quantization/product_quantizer.h +++ b/src/quantization/product_quantization/product_quantizer.h @@ -63,7 +63,7 @@ class ProductQuantizer : public Quantizer> { inline float ComputeImpl(const uint8_t* codes1, const uint8_t* codes2); - inline void + void ProcessQueryImpl(const DataType* query, Computer& computer) const; inline void @@ -266,81 +266,6 @@ ProductQuantizer::ComputeImpl(const uint8_t* codes1, const uint8_t* code return dist; } -template -void -ProductQuantizer::ProcessQueryImpl(const DataType* query, - Computer& computer) const { - try { - const float* cur_query = query; - Vector norm_vec(this->allocator_); - if constexpr (metric == MetricType::METRIC_TYPE_COSINE) { - norm_vec.resize(this->dim_); - Normalize(query, norm_vec.data(), this->dim_); - cur_query = norm_vec.data(); - } - auto* lookup_table = reinterpret_cast( - this->allocator_->Allocate(this->pq_dim_ * CENTROIDS_PER_SUBSPACE * sizeof(float))); - if (true) { - for (int i = 0; i < pq_dim_; ++i) { - const auto* per_query = cur_query + i * subspace_dim_; - const auto* per_code_book = get_codebook_data(i, 0); - auto* per_result = lookup_table + i * CENTROIDS_PER_SUBSPACE; - if constexpr (metric == MetricType::METRIC_TYPE_IP or - metric == MetricType::METRIC_TYPE_COSINE) { - cblas_sgemv(CblasRowMajor, - CblasNoTrans, - CENTROIDS_PER_SUBSPACE, - subspace_dim_, - 1.0F, - per_code_book, - subspace_dim_, - per_query, - 1, - 0.0F, - per_result, - 1); - } else if constexpr (metric == MetricType::METRIC_TYPE_L2SQR) { - // TODO(LHT): use blas opt - for (int64_t j = 0; j < CENTROIDS_PER_SUBSPACE; ++j) { - per_result[j] = FP32ComputeL2Sqr( - per_query, per_code_book + j * subspace_dim_, subspace_dim_); - } - } - } - } else { - Vector tmp(this->allocator_); - tmp.resize(this->dim_); - for (int i = 0; i < CENTROIDS_PER_SUBSPACE; ++i) { - if constexpr (metric == MetricType::METRIC_TYPE_IP or - metric == MetricType::METRIC_TYPE_COSINE) { - FP32Mul(reverse_codebooks_.data() + i * this->dim_, - cur_query, - tmp.data(), - this->dim_); - } else if constexpr (metric == MetricType::METRIC_TYPE_L2SQR) { - FP32Sub(reverse_codebooks_.data() + i * this->dim_, - cur_query, - tmp.data(), - this->dim_); - FP32Mul(tmp.data(), tmp.data(), tmp.data(), this->dim_); - } - for (int j = 0; j < pq_dim_; ++j) { - lookup_table[j * CENTROIDS_PER_SUBSPACE + i] = - FP32ReduceAdd(tmp.data() + j * subspace_dim_, subspace_dim_); - } - } - } - computer.buf_ = reinterpret_cast(lookup_table); - - } catch (const std::bad_alloc& e) { - if (computer.buf_ != nullptr) { - this->allocator_->Deallocate(computer.buf_); - } - computer.buf_ = nullptr; - throw VsagException(ErrorType::NO_ENOUGH_MEMORY, "bad alloc when init computer buf"); - } -} - template void ProductQuantizer::ComputeDistImpl(Computer& computer, diff --git a/src/quantization/product_quantization/product_quantizer_test.cpp b/src/quantization/product_quantization/product_quantizer_test.cpp index be5416c8b..59cceb3d3 100644 --- a/src/quantization/product_quantization/product_quantizer_test.cpp +++ b/src/quantization/product_quantization/product_quantizer_test.cpp @@ -19,8 +19,8 @@ #include #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/quantization/quantizer.h b/src/quantization/quantizer.h index 20b2a6d41..49551aed4 100644 --- a/src/quantization/quantizer.h +++ b/src/quantization/quantizer.h @@ -38,7 +38,7 @@ class Quantizer { explicit Quantizer(int dim, Allocator* allocator) : dim_(dim), code_size_(dim * sizeof(DataType)), allocator_(allocator){}; - ~Quantizer() = default; + virtual ~Quantizer() = default; /** * @brief Trains the model using the provided data. diff --git a/src/quantization/quantizer_parameter.cpp b/src/quantization/quantizer_parameter.cpp index da9a3e90a..41d1b3c4b 100644 --- a/src/quantization/quantizer_parameter.cpp +++ b/src/quantization/quantizer_parameter.cpp @@ -15,7 +15,7 @@ #include "quantizer_parameter.h" -#include +#include #include "fp32_quantizer_parameter.h" #include "inner_string_params.h" diff --git a/src/quantization/rabitq_quantization/rabitq_quantizer_test.cpp b/src/quantization/rabitq_quantization/rabitq_quantizer_test.cpp index da3a94d19..6ad178d3e 100644 --- a/src/quantization/rabitq_quantization/rabitq_quantizer_test.cpp +++ b/src/quantization/rabitq_quantization/rabitq_quantizer_test.cpp @@ -19,12 +19,11 @@ #include #include -#include "../scalar_quantization/sq4_uniform_quantizer.h" -#include "default_allocator.h" #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "logger.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" +#include "quantization/scalar_quantization/sq4_uniform_quantizer.h" using namespace vsag; diff --git a/src/quantization/scalar_quantization/bf16_quantizer_test.cpp b/src/quantization/scalar_quantization/bf16_quantizer_test.cpp index c210d700d..291f56a14 100644 --- a/src/quantization/scalar_quantization/bf16_quantizer_test.cpp +++ b/src/quantization/scalar_quantization/bf16_quantizer_test.cpp @@ -18,10 +18,10 @@ #include #include -#include "default_allocator.h" #include "fixtures.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/quantization/scalar_quantization/fp16_quantizer_test.cpp b/src/quantization/scalar_quantization/fp16_quantizer_test.cpp index 9a674c6dc..90654f180 100644 --- a/src/quantization/scalar_quantization/fp16_quantizer_test.cpp +++ b/src/quantization/scalar_quantization/fp16_quantizer_test.cpp @@ -19,8 +19,8 @@ #include #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/quantization/scalar_quantization/sq4_quantizer_test.cpp b/src/quantization/scalar_quantization/sq4_quantizer_test.cpp index 8d2253766..6630e5a63 100644 --- a/src/quantization/scalar_quantization/sq4_quantizer_test.cpp +++ b/src/quantization/scalar_quantization/sq4_quantizer_test.cpp @@ -18,10 +18,10 @@ #include #include -#include "default_allocator.h" #include "fixtures.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/quantization/scalar_quantization/sq4_uniform_quantizer_test.cpp b/src/quantization/scalar_quantization/sq4_uniform_quantizer_test.cpp index 0631cf676..cf2af12ca 100644 --- a/src/quantization/scalar_quantization/sq4_uniform_quantizer_test.cpp +++ b/src/quantization/scalar_quantization/sq4_uniform_quantizer_test.cpp @@ -19,8 +19,8 @@ #include #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/quantization/scalar_quantization/sq8_quantizer_test.cpp b/src/quantization/scalar_quantization/sq8_quantizer_test.cpp index 4fc9f4d63..979c788ba 100644 --- a/src/quantization/scalar_quantization/sq8_quantizer_test.cpp +++ b/src/quantization/scalar_quantization/sq8_quantizer_test.cpp @@ -18,10 +18,10 @@ #include #include -#include "default_allocator.h" #include "fixtures.h" +#include "impl/allocator/default_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/quantization/scalar_quantization/sq8_uniform_quantizer_test.cpp b/src/quantization/scalar_quantization/sq8_uniform_quantizer_test.cpp index 08e84c0c4..73ac34228 100644 --- a/src/quantization/scalar_quantization/sq8_uniform_quantizer_test.cpp +++ b/src/quantization/scalar_quantization/sq8_uniform_quantizer_test.cpp @@ -19,8 +19,8 @@ #include #include "fixtures.h" +#include "impl/allocator/safe_allocator.h" #include "quantization/quantizer_test.h" -#include "safe_allocator.h" using namespace vsag; diff --git a/src/resource.cpp b/src/resource.cpp index 7b0295abb..9312bd7bf 100644 --- a/src/resource.cpp +++ b/src/resource.cpp @@ -15,7 +15,7 @@ #include "vsag/resource.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "safe_thread_pool.h" namespace vsag { diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 2515cf507..070d52fef 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -7,5 +7,5 @@ set (STORAGE_SRC ) add_library (storage OBJECT ${STORAGE_SRC}) -target_link_libraries (storage PUBLIC fmt::fmt-header-only) +target_link_libraries (storage PUBLIC fmt::fmt coverage_config) add_dependencies (storage spdlog) diff --git a/src/storage/footer.h b/src/storage/footer.h index 130804d96..db5fe3ea7 100644 --- a/src/storage/footer.h +++ b/src/storage/footer.h @@ -14,7 +14,8 @@ // limitations under the License. #pragma once -#include "fmt/format.h" +#include + #include "nlohmann/json.hpp" #include "stream_reader.h" #include "typing.h" diff --git a/src/storage/stream_reader.cpp b/src/storage/stream_reader.cpp index 192b0741e..1b7850147 100644 --- a/src/storage/stream_reader.cpp +++ b/src/storage/stream_reader.cpp @@ -15,7 +15,7 @@ #include "stream_reader.h" -#include +#include #include #include diff --git a/src/typing.h b/src/typing.h index b62078fbe..07e3c5a05 100644 --- a/src/typing.h +++ b/src/typing.h @@ -19,7 +19,7 @@ #include #include -#include "allocator_wrapper.h" +#include "impl/allocator/allocator_wrapper.h" #include "nlohmann/json.hpp" #include "tsl/robin_map.h" #include "tsl/robin_set.h" diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt new file mode 100644 index 000000000..d70df1fb1 --- /dev/null +++ b/src/utils/CMakeLists.txt @@ -0,0 +1,13 @@ + +set (UTILS_SRC + util_functions.cpp + linear_congruential_generator.cpp + visited_list.cpp + slow_task_timer.cpp + timer.cpp + window_result_queue.cpp +) + +add_library (utils OBJECT ${UTILS_SRC}) +target_link_libraries (utils PUBLIC coverage_config) +add_dependencies (utils spdlog fmt::fmt) diff --git a/src/utils/linear_congruential_generator.cpp b/src/utils/linear_congruential_generator.cpp new file mode 100644 index 000000000..b15305e67 --- /dev/null +++ b/src/utils/linear_congruential_generator.cpp @@ -0,0 +1,33 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "linear_congruential_generator.h" + +#include + +namespace vsag { +LinearCongruentialGenerator::LinearCongruentialGenerator() { + auto now = std::chrono::steady_clock::now(); + auto timestamp = + std::chrono::duration_cast(now.time_since_epoch()).count(); + current_ = static_cast(timestamp); +} + +float +LinearCongruentialGenerator::NextFloat() { + current_ = (A * current_ + C) % M; + return static_cast(current_) / static_cast(M); +} +} // namespace vsag diff --git a/src/utils/linear_congruential_generator.h b/src/utils/linear_congruential_generator.h index 38b42f36f..3c70a1f54 100644 --- a/src/utils/linear_congruential_generator.h +++ b/src/utils/linear_congruential_generator.h @@ -15,28 +15,20 @@ #pragma once -#include +#include namespace vsag { class LinearCongruentialGenerator { public: - LinearCongruentialGenerator() { - auto now = std::chrono::steady_clock::now(); - auto timestamp = - std::chrono::duration_cast(now.time_since_epoch()).count(); - current_ = static_cast(timestamp); - } + LinearCongruentialGenerator(); float - NextFloat() { - current_ = (A * current_ + C) % M; - return static_cast(current_) / static_cast(M); - } + NextFloat(); private: unsigned int current_; - static const uint32_t A = 1664525; - static const uint32_t C = 1013904223; - static const uint32_t M = 4294967295; // 2^32 - 1 + static constexpr uint32_t A = 1664525; + static constexpr uint32_t C = 1013904223; + static constexpr uint32_t M = 4294967295; // 2^32 - 1 }; } // namespace vsag diff --git a/src/utils/resource_object_pool.h b/src/utils/resource_object_pool.h index 1b2ac06ac..910fec39e 100644 --- a/src/utils/resource_object_pool.h +++ b/src/utils/resource_object_pool.h @@ -22,8 +22,8 @@ #include #include +#include "impl/allocator/safe_allocator.h" #include "resource_object.h" -#include "safe_allocator.h" #include "typing.h" namespace vsag { diff --git a/src/utils/util_functions.cpp b/src/utils/util_functions.cpp index 247ed52bd..264212cc1 100644 --- a/src/utils/util_functions.cpp +++ b/src/utils/util_functions.cpp @@ -17,6 +17,8 @@ #include +#include "vsag_exception.h" + namespace vsag { std::string @@ -55,7 +57,7 @@ mapping_external_param_to_inner(const JsonType& external_json, } std::tuple -CreateFastDataset(int64_t dim, Allocator* allocator) { +create_fast_dataset(int64_t dim, Allocator* allocator) { auto dataset = Dataset::Make(); dataset->Dim(static_cast(dim))->NumElements(1)->Owner(true, allocator); auto* ids = reinterpret_cast(allocator->Allocate(sizeof(int64_t) * dim)); @@ -143,7 +145,7 @@ split_string(const std::string& str, const char delimiter) { std::stringstream ss(str); std::string token; while (std::getline(ss, token, delimiter)) { - tokens.push_back(token); + tokens.emplace_back(token); } return tokens; } diff --git a/src/utils/util_functions.h b/src/utils/util_functions.h index 9ff81e10e..26b6f991b 100644 --- a/src/utils/util_functions.h +++ b/src/utils/util_functions.h @@ -20,8 +20,6 @@ #include #include "index/index_common_param.h" -#include "logger.h" -#include "spdlog/spdlog.h" #include "vsag/dataset.h" #include "vsag/expected.hpp" #include "vsag_exception.h" @@ -66,7 +64,7 @@ mapping_external_param_to_inner(const JsonType& external_json, JsonType& inner_json); std::tuple -CreateFastDataset(int64_t dim, Allocator* allocator); +create_fast_dataset(int64_t dim, Allocator* allocator); std::vector select_k_numbers(int64_t n, int k); diff --git a/src/utils/visited_list.cpp b/src/utils/visited_list.cpp new file mode 100644 index 000000000..dca1ad6fd --- /dev/null +++ b/src/utils/visited_list.cpp @@ -0,0 +1,41 @@ + +// Copyright 2024-present the vsag project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "visited_list.h" + +#include + +namespace vsag { +VisitedList::VisitedList(InnerIdType max_size, Allocator* allocator) + : max_size_(max_size), allocator_(allocator) { + this->list_ = reinterpret_cast( + allocator_->Allocate((uint64_t)max_size * sizeof(VisitedListType))); + memset(list_, 0, max_size_ * sizeof(VisitedListType)); + tag_ = 1; +} + +VisitedList::~VisitedList() { + allocator_->Deallocate(list_); +} + +void +VisitedList::Reset() { + if (tag_ == std::numeric_limits::max()) { + memset(list_, 0, max_size_ * sizeof(VisitedListType)); + tag_ = 0; + } + ++tag_; +} +} // namespace vsag diff --git a/src/utils/visited_list.h b/src/utils/visited_list.h index f25f328b5..be1109f26 100644 --- a/src/utils/visited_list.h +++ b/src/utils/visited_list.h @@ -15,7 +15,6 @@ #pragma once #include -#include #include "prefetch.h" #include "resource_object.h" @@ -30,41 +29,26 @@ class VisitedList : public ResourceObject { using VisitedListType = uint16_t; public: - explicit VisitedList(InnerIdType max_size, Allocator* allocator) - : max_size_(max_size), allocator_(allocator) { - this->list_ = reinterpret_cast( - allocator_->Allocate((uint64_t)max_size * sizeof(VisitedListType))); - memset(list_, 0, max_size_ * sizeof(VisitedListType)); - tag_ = 1; - } - - ~VisitedList() override { - allocator_->Deallocate(list_); - } + explicit VisitedList(InnerIdType max_size, Allocator* allocator); + ~VisitedList() override; - inline void + void Set(const InnerIdType& id) { this->list_[id] = this->tag_; } - inline bool + [[nodiscard]] bool Get(const InnerIdType& id) { return this->list_[id] == this->tag_; } - inline void + void Prefetch(const InnerIdType& id) { PrefetchLines(this->list_ + id, 64); } void - Reset() override { - if (tag_ == std::numeric_limits::max()) { - memset(list_, 0, max_size_ * sizeof(VisitedListType)); - tag_ = 0; - } - ++tag_; - } + Reset() override; private: Allocator* const allocator_{nullptr}; diff --git a/src/utils/visited_list_test.cpp b/src/utils/visited_list_test.cpp index 8432bf093..9c25361e0 100644 --- a/src/utils/visited_list_test.cpp +++ b/src/utils/visited_list_test.cpp @@ -18,7 +18,7 @@ #include #include "catch2/catch_test_macros.hpp" -#include "default_allocator.h" +#include "impl/allocator/default_allocator.h" using namespace vsag; TEST_CASE("VisitedList Basic Test", "[ut][VisitedList]") { diff --git a/tests/test_fc_visitor.cpp b/tests/test_fc_visitor.cpp index 0102f9fd7..e8bdf6a4d 100644 --- a/tests/test_fc_visitor.cpp +++ b/tests/test_fc_visitor.cpp @@ -22,7 +22,7 @@ #undef EOF #include "attr/expression_visitor.h" -#include "safe_allocator.h" +#include "impl/allocator/safe_allocator.h" #include "vsag_exception.h" using namespace vsag;