diff --git a/Cargo.lock b/Cargo.lock index a927291..609d782 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -143,6 +143,36 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base" +version = "0.0.0" +dependencies = [ + "arc-swap", + "bincode", + "bytemuck", + "byteorder", + "c", + "crc32fast", + "crossbeam", + "dashmap", + "detect", + "half 2.3.1", + "libc", + "log", + "memmap2", + "multiversion", + "num-traits", + "parking_lot", + "rand", + "rayon", + "rustix", + "serde", + "serde_json", + "thiserror", + "uuid", + "validator", +] + [[package]] name = "base64" version = "0.21.7" @@ -1739,6 +1769,7 @@ name = "service" version = "0.0.0" dependencies = [ "arc-swap", + "base", "bincode", "bytemuck", "byteorder", @@ -2197,6 +2228,7 @@ name = "vectors" version = "0.0.0" dependencies = [ "arrayvec", + "base", "bincode", "bytemuck", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 1a932d4..5b7e589 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ serde.workspace = true serde_json.workspace = true thiserror.workspace = true validator.workspace = true +base = { path = "crates/base" } detect = { path = "crates/detect" } send_fd = { path = "crates/send_fd" } service = { path = "crates/service" } diff --git a/crates/base/Cargo.toml b/crates/base/Cargo.toml new file mode 100644 index 0000000..79c2f8d --- /dev/null +++ b/crates/base/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "base" +version.workspace = true +edition.workspace = true + +[dependencies] +bincode.workspace = true +bytemuck.workspace = true +byteorder.workspace = true +half.workspace = true +libc.workspace = true +log.workspace = true +memmap2.workspace = true +num-traits.workspace = true +rand.workspace = true +rustix.workspace = true +serde.workspace = true +serde_json.workspace = true +thiserror.workspace = true +uuid.workspace = true +validator.workspace = true +c = { path = "../c" } +detect = { path = "../detect" } +crc32fast = "1.4.0" +crossbeam = "0.8.4" +dashmap = "5.5.3" +parking_lot = "0.12.1" +rayon = "1.8.1" +arc-swap = "1.6.0" +multiversion = "0.7.3" + +[lints] +clippy.derivable_impls = "allow" +clippy.len_without_is_empty = "allow" +clippy.needless_range_loop = "allow" +clippy.too_many_arguments = "allow" +rust.internal_features = "allow" +rust.unsafe_op_in_unsafe_fn = "forbid" +rust.unused_lifetimes = "warn" +rust.unused_qualifications = "warn" diff --git a/crates/service/src/prelude/error.rs b/crates/base/src/error.rs similarity index 100% rename from crates/service/src/prelude/error.rs rename to crates/base/src/error.rs diff --git a/crates/base/src/lib.rs b/crates/base/src/lib.rs new file mode 100644 index 0000000..653153e --- /dev/null +++ b/crates/base/src/lib.rs @@ -0,0 +1,7 @@ +#![feature(core_intrinsics)] + +pub mod error; +pub mod scalar; +pub mod search; +pub mod sys; +pub mod vector; diff --git a/crates/service/src/prelude/scalar/f16.rs b/crates/base/src/scalar/f16.rs similarity index 99% rename from crates/service/src/prelude/scalar/f16.rs rename to crates/base/src/scalar/f16.rs index 467542f..da5735b 100644 --- a/crates/service/src/prelude/scalar/f16.rs +++ b/crates/base/src/scalar/f16.rs @@ -1,4 +1,4 @@ -use crate::prelude::global::FloatCast; +use super::FloatCast; use half::f16; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; diff --git a/crates/service/src/prelude/scalar/f32.rs b/crates/base/src/scalar/f32.rs similarity index 99% rename from crates/service/src/prelude/scalar/f32.rs rename to crates/base/src/scalar/f32.rs index a4e70a1..c6e431b 100644 --- a/crates/service/src/prelude/scalar/f32.rs +++ b/crates/base/src/scalar/f32.rs @@ -1,4 +1,4 @@ -use crate::prelude::global::FloatCast; +use super::FloatCast; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; use std::fmt::{Debug, Display}; diff --git a/crates/base/src/scalar/mod.rs b/crates/base/src/scalar/mod.rs new file mode 100644 index 0000000..8e30d33 --- /dev/null +++ b/crates/base/src/scalar/mod.rs @@ -0,0 +1,16 @@ +mod f16; +mod f32; + +pub use f16::F16; +pub use f32::F32; + +pub trait FloatCast: Sized { + fn from_f32(x: f32) -> Self; + fn to_f32(self) -> f32; + fn from_f(x: F32) -> Self { + Self::from_f32(x.0) + } + fn to_f(self) -> F32 { + F32(Self::to_f32(self)) + } +} diff --git a/crates/service/src/prelude/search.rs b/crates/base/src/search.rs similarity index 90% rename from crates/service/src/prelude/search.rs rename to crates/base/src/search.rs index 2009730..c5e946b 100644 --- a/crates/service/src/prelude/search.rs +++ b/crates/base/src/search.rs @@ -1,4 +1,4 @@ -use crate::prelude::F32; +use crate::scalar::F32; pub type Payload = u64; diff --git a/crates/service/src/prelude/sys.rs b/crates/base/src/sys.rs similarity index 100% rename from crates/service/src/prelude/sys.rs rename to crates/base/src/sys.rs diff --git a/crates/base/src/vector/mod.rs b/crates/base/src/vector/mod.rs new file mode 100644 index 0000000..8f61177 --- /dev/null +++ b/crates/base/src/vector/mod.rs @@ -0,0 +1,19 @@ +mod sparse_f32; + +pub use sparse_f32::{SparseF32, SparseF32Ref}; + +pub trait Vector { + fn dims(&self) -> u16; +} + +impl Vector for Vec { + fn dims(&self) -> u16 { + self.len().try_into().unwrap() + } +} + +impl<'a, T> Vector for &'a [T] { + fn dims(&self) -> u16 { + self.len().try_into().unwrap() + } +} diff --git a/crates/service/src/prelude/scalar/sparse_f32.rs b/crates/base/src/vector/sparse_f32.rs similarity index 95% rename from crates/service/src/prelude/scalar/sparse_f32.rs rename to crates/base/src/vector/sparse_f32.rs index d8d28a2..d529032 100644 --- a/crates/service/src/prelude/scalar/sparse_f32.rs +++ b/crates/base/src/vector/sparse_f32.rs @@ -1,4 +1,6 @@ -use crate::prelude::*; +use super::Vector; +use crate::scalar::F32; +use num_traits::Zero; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/service/Cargo.toml b/crates/service/Cargo.toml index 9f71438..8cd06be 100644 --- a/crates/service/Cargo.toml +++ b/crates/service/Cargo.toml @@ -19,6 +19,7 @@ serde_json.workspace = true thiserror.workspace = true uuid.workspace = true validator.workspace = true +base = { path = "../base" } c = { path = "../c" } detect = { path = "../detect" } crc32fast = "1.4.0" diff --git a/crates/service/src/algorithms/clustering/elkan_k_means.rs b/crates/service/src/algorithms/clustering/elkan_k_means.rs index fa3ad4a..e746568 100644 --- a/crates/service/src/algorithms/clustering/elkan_k_means.rs +++ b/crates/service/src/algorithms/clustering/elkan_k_means.rs @@ -1,5 +1,6 @@ use crate::prelude::*; use crate::utils::vec2::Vec2; +use base::scalar::FloatCast; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use std::ops::{Index, IndexMut}; diff --git a/crates/service/src/algorithms/quantization/scalar.rs b/crates/service/src/algorithms/quantization/scalar.rs index f613df8..17467b4 100644 --- a/crates/service/src/algorithms/quantization/scalar.rs +++ b/crates/service/src/algorithms/quantization/scalar.rs @@ -5,6 +5,7 @@ use crate::index::IndexOptions; use crate::prelude::*; use crate::utils::dir_ops::sync_dir; use crate::utils::mmap_array::MmapArray; +use base::scalar::FloatCast; use serde::{Deserialize, Serialize}; use std::path::Path; use std::sync::Arc; diff --git a/crates/service/src/prelude/global/f16.rs b/crates/service/src/prelude/global/f16.rs index d877b5a..be5c560 100644 --- a/crates/service/src/prelude/global/f16.rs +++ b/crates/service/src/prelude/global/f16.rs @@ -1,4 +1,5 @@ use crate::prelude::*; +use base::scalar::FloatCast; pub fn cosine(lhs: &[F16], rhs: &[F16]) -> F32 { #[inline(always)] diff --git a/crates/service/src/prelude/global/f16_cos.rs b/crates/service/src/prelude/global/f16_cos.rs index 3e9a350..a8e7f33 100644 --- a/crates/service/src/prelude/global/f16_cos.rs +++ b/crates/service/src/prelude/global/f16_cos.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; - use crate::prelude::*; +use base::scalar::FloatCast; +use std::borrow::Cow; #[derive(Debug, Clone, Copy)] pub enum F16Cos {} diff --git a/crates/service/src/prelude/global/f16_dot.rs b/crates/service/src/prelude/global/f16_dot.rs index 4f5d2d7..353efba 100644 --- a/crates/service/src/prelude/global/f16_dot.rs +++ b/crates/service/src/prelude/global/f16_dot.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; - use crate::prelude::*; +use base::scalar::FloatCast; +use std::borrow::Cow; #[derive(Debug, Clone, Copy)] pub enum F16Dot {} diff --git a/crates/service/src/prelude/global/f16_l2.rs b/crates/service/src/prelude/global/f16_l2.rs index 5313554..46d9568 100644 --- a/crates/service/src/prelude/global/f16_l2.rs +++ b/crates/service/src/prelude/global/f16_l2.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; - use crate::prelude::*; +use base::scalar::FloatCast; +use std::borrow::Cow; #[derive(Debug, Clone, Copy)] pub enum F16L2 {} diff --git a/crates/service/src/prelude/global/f32_l2.rs b/crates/service/src/prelude/global/f32_l2.rs index fd9ea6f..9836f4b 100644 --- a/crates/service/src/prelude/global/f32_l2.rs +++ b/crates/service/src/prelude/global/f32_l2.rs @@ -1,6 +1,5 @@ -use std::borrow::Cow; - use crate::prelude::*; +use std::borrow::Cow; #[derive(Debug, Clone, Copy)] pub enum F32L2 {} diff --git a/crates/service/src/prelude/global/mod.rs b/crates/service/src/prelude/global/mod.rs index aa0cbf2..802b09a 100644 --- a/crates/service/src/prelude/global/mod.rs +++ b/crates/service/src/prelude/global/mod.rs @@ -43,7 +43,7 @@ pub trait G: Copy + Debug + 'static { + Zero + num_traits::NumOps + num_traits::NumAssignOps - + FloatCast; + + base::scalar::FloatCast; type Storage: for<'a> Storage = Self::VectorRef<'a>>; type L2: for<'a> G = &'a [Self::Scalar]>; type VectorOwned: Vector + Clone + Serialize + for<'a> Deserialize<'a>; @@ -103,33 +103,6 @@ pub trait G: Copy + Debug + 'static { ) -> F32; } -pub trait FloatCast: Sized { - fn from_f32(x: f32) -> Self; - fn to_f32(self) -> f32; - fn from_f(x: F32) -> Self { - Self::from_f32(x.0) - } - fn to_f(self) -> F32 { - F32(Self::to_f32(self)) - } -} - -pub trait Vector { - fn dims(&self) -> u16; -} - -impl Vector for Vec { - fn dims(&self) -> u16 { - self.len().try_into().unwrap() - } -} - -impl<'a, T> Vector for &'a [T] { - fn dims(&self) -> u16 { - self.len().try_into().unwrap() - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub enum DynamicVector { F32(Vec), diff --git a/crates/service/src/prelude/mod.rs b/crates/service/src/prelude/mod.rs index 67c1b16..a559acb 100644 --- a/crates/service/src/prelude/mod.rs +++ b/crates/service/src/prelude/mod.rs @@ -1,15 +1,13 @@ -mod error; mod global; -mod scalar; -mod search; mod storage; -mod sys; -pub use self::error::*; pub use self::global::*; -pub use self::scalar::{SparseF32, SparseF32Ref, F16, F32}; -pub use self::search::{Element, Filter, Payload}; pub use self::storage::{DenseMmap, SparseMmap, Storage}; -pub use self::sys::{Handle, Pointer}; + +pub use base::error::*; +pub use base::scalar::{F16, F32}; +pub use base::search::{Element, Filter, Payload}; +pub use base::sys::{Handle, Pointer}; +pub use base::vector::{SparseF32, SparseF32Ref, Vector}; pub use num_traits::{Float, Zero}; diff --git a/crates/service/src/prelude/scalar/mod.rs b/crates/service/src/prelude/scalar/mod.rs deleted file mode 100644 index 1be763f..0000000 --- a/crates/service/src/prelude/scalar/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod f16; -mod f32; -mod sparse_f32; - -pub use f16::F16; -pub use f32::F32; -pub use sparse_f32::{SparseF32, SparseF32Ref}; diff --git a/src/datatype/casts_f32.rs b/src/datatype/casts_f32.rs index d28f7d3..123434c 100644 --- a/src/datatype/casts_f32.rs +++ b/src/datatype/casts_f32.rs @@ -2,6 +2,7 @@ use crate::datatype::svecf32::{SVecf32, SVecf32Input, SVecf32Output}; use crate::datatype::vecf16::{Vecf16, Vecf16Input, Vecf16Output}; use crate::datatype::vecf32::{Vecf32, Vecf32Input, Vecf32Output}; use crate::prelude::check_value_dimensions; +use base::scalar::FloatCast; use service::prelude::*; #[pgrx::pg_extern(immutable, parallel_safe, strict)] diff --git a/src/datatype/operators_svecf32.rs b/src/datatype/operators_svecf32.rs index 88fa790..03f3616 100644 --- a/src/datatype/operators_svecf32.rs +++ b/src/datatype/operators_svecf32.rs @@ -1,5 +1,6 @@ use crate::datatype::svecf32::{SVecf32, SVecf32Input, SVecf32Output}; use crate::prelude::*; +use base::scalar::FloatCast; use service::prelude::*; use std::ops::Deref; diff --git a/src/datatype/operators_vecf16.rs b/src/datatype/operators_vecf16.rs index c817f8e..191c0e3 100644 --- a/src/datatype/operators_vecf16.rs +++ b/src/datatype/operators_vecf16.rs @@ -1,5 +1,6 @@ use crate::datatype::vecf16::{Vecf16, Vecf16Input, Vecf16Output}; use crate::prelude::*; +use base::scalar::FloatCast; use service::prelude::*; use std::ops::Deref; diff --git a/src/datatype/operators_vecf32.rs b/src/datatype/operators_vecf32.rs index 31c6360..50649f8 100644 --- a/src/datatype/operators_vecf32.rs +++ b/src/datatype/operators_vecf32.rs @@ -1,5 +1,6 @@ use crate::datatype::vecf32::{Vecf32, Vecf32Input, Vecf32Output}; use crate::prelude::*; +use base::scalar::FloatCast; use service::prelude::*; use std::ops::Deref;