risingwavelabs · wenym1 · Apr 29, 2025 · May 22, 2025 · May 6, 2025 · May 6, 2025
diff --git a/Cargo.toml b/Cargo.toml
@@ -380,6 +380,7 @@ debug = "line-tables-only"
 opt-level = 2
 
 [patch.crates-io]
+faiss = { git = "https://github.com/risingwavelabs/faiss-rs.git", rev = "aa605e3" }
 # Patch third-party crates for deterministic simulation.
 getrandom = { git = "https://github.com/madsim-rs/getrandom.git", rev = "e79a7ae" }
 # Don't patch `tokio-stream`, but only use the madsim version for **direct** dependencies.

diff --git a/ci/.env b/ci/.env
@@ -1 +1 @@
-BUILD_ENV_VERSION=v20250509-1
+BUILD_ENV_VERSION=FAISS-v20250506
diff --git a/ci/Dockerfile b/ci/Dockerfile
@@ -16,6 +16,7 @@ RUN apt-get update -yy && \
     python3.12 python3.12-dev \
     golang perl \
     dumb-init \
+    libblas-dev liblapack-dev libomp-dev \
     && rm -rf /var/lib/{apt,dpkg,cache,log}/
 ENV PYO3_PYTHON=python3.12
 

diff --git a/src/storage/Cargo.toml b/src/storage/Cargo.toml
@@ -19,6 +19,7 @@ dyn-clone = "1.0.14"
 either = "1"
 enum-as-inner = "0.6"
 fail = "0.5"
+faiss = { version = "0.12.2-alpha.0", features = ["static"] }
 foyer = { workspace = true }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 futures-async-stream = { workspace = true }

diff --git a/src/storage/src/lib.rs b/src/storage/src/lib.rs
@@ -35,6 +35,7 @@
 #![feature(iter_from_coroutine)]
 #![feature(get_mut_unchecked)]
 #![feature(map_try_insert)]
+#![feature(portable_simd)]
 
 pub mod hummock;
 pub mod memory;

diff --git a/src/storage/src/store.rs b/src/storage/src/store.rs
@@ -435,7 +435,7 @@ pub struct VectorNearestOptions {
     pub measure: DistanceMeasurement,
 }
 
-pub trait OnNearestItemFn<O> = OnNearestItem<O> + Send + 'static;
+pub trait OnNearestItemFn<O> = OnNearestItem<O> + Send + Sync + 'static;
 
 pub trait StateStoreReadVector: StaticSendSync {
     fn nearest<O: Send + 'static>(

diff --git a/src/storage/src/vector/distance.rs b/src/storage/src/vector/distance.rs
@@ -12,6 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::simd::Simd;
+use std::simd::num::SimdFloat;
+
 use crate::vector::{
     MeasureDistance, MeasureDistanceBuilder, VectorDistance, VectorItem, VectorRef,
 };
@@ -155,14 +158,42 @@ impl MeasureDistanceBuilder for InnerProductDistance {
     }
 }
 
+#[cfg_attr(not(test), expect(dead_code))]
+fn inner_product_trivial(first: VectorRef<'_>, second: VectorRef<'_>) -> VectorDistance {
+    let len = first.0.len();
+    assert_eq!(len, second.0.len());
+    -(0..len)
+        .map(|i| first.0[i] * second.0[i])
+        .sum::<VectorItem>()
+}
+
+#[cfg_attr(not(test), expect(dead_code))]
+fn inner_product_simd(first: VectorRef<'_>, second: VectorRef<'_>) -> VectorDistance {
+    let len = first.0.len();
+    assert_eq!(len, second.0.len());
+    let mut sum = 0.0;
+    let mut start = 0;
+    let mut end = start + 32;
+    while end <= len {
+        let this = Simd::<VectorItem, 32>::from_slice(&first.0[start..end]);
+        let target = Simd::<VectorItem, 32>::from_slice(&second.0[start..end]);
+        sum += (this * target).reduce_sum();
+        start += 32;
+        end += 32;
+    }
+    -((start..len)
+        .map(|i| first.0[i] * second.0[i])
+        .sum::<VectorDistance>()
+        + sum)
+}
+
+fn inner_product_faiss(first: VectorRef<'_>, second: VectorRef<'_>) -> VectorDistance {
+    -faiss::utils::fvec_inner_product(first.0, second.0)
+}
+
 impl<'a> MeasureDistance for InnerProductDistanceMeasure<'a> {
     fn measure(&self, other: VectorRef<'_>) -> VectorDistance {
-        // TODO: use some library with simd support
-        let len = self.0.0.len();
-        assert_eq!(len, other.0.len());
-        -(0..len)
-            .map(|i| self.0.0[i] * other.0[i])
-            .sum::<VectorDistance>()
+        inner_product_faiss(self.0, other)
     }
 }
 
@@ -172,6 +203,7 @@ mod tests {
     use expect_test::expect;
 
     use super::*;
+    use crate::vector::test_utils::gen_vector;
     use crate::vector::{MeasureDistanceBuilder, Vector, VectorInner};
 
     const VECTOR_LEN: usize = 10;
@@ -186,6 +218,14 @@ mod tests {
         0.22877127, 0.97690505, 0.44438475,
     ];
 
+        const FLOAT_ALLOWED_BIAS: f32 = 1e-5;
+
+    macro_rules! assert_eq_float {
+        ($first:expr, $second:expr) => {
+            assert!(($first - $second) < FLOAT_ALLOWED_BIAS)
+        };
+    }
+
     #[test]
     fn test_distance() {
         let first_vec = [0.238474, 0.578234];
@@ -223,6 +263,13 @@ mod tests {
             InnerProductDistance::distance(first_vec, second_vec),
             -(v1_1 * v2_1 + v1_2 * v2_2)
         );
+        {
+            let v1 = gen_vector(128);
+            let v2 = gen_vector(128);
+            let trivial = inner_product_trivial(v1.to_ref(), v2.to_ref());
+            assert_eq_float!(inner_product_simd(v1.to_ref(), v2.to_ref()), trivial);
+            assert_eq_float!(inner_product_faiss(v1.to_ref(), v2.to_ref()), trivial);
+        }
     }
 
     #[test]
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		BUILD_ENV_VERSION=v20250509-1
		BUILD_ENV_VERSION=FAISS-v20250506
Copy link Collaborator hzxa21 May 21, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Is this expected? related to a new image with the new dependencies `libblas-dev liblapack-dev libomp-dev` for linear algebra ?