You've already forked pgvecto.rs
mirror of
https://github.com/tensorchord/pgvecto.rs.git
synced 2025-08-01 06:46:52 +03:00
C code tests & avx512f f16 implement (#183)
* test: add tests for c code Signed-off-by: usamoi <usamoi@outlook.com> * fix: relax EPSILON for tests Signed-off-by: usamoi <usamoi@outlook.com> --------- Signed-off-by: usamoi <usamoi@outlook.com>
This commit is contained in:
@ -16,7 +16,7 @@ bincode.workspace = true
|
||||
half.workspace = true
|
||||
num-traits.workspace = true
|
||||
c = { path = "../c" }
|
||||
std_detect = { git = "https://github.com/tensorchord/stdarch.git", branch = "avx512fp16" }
|
||||
detect = { path = "../detect" }
|
||||
rand = "0.8.5"
|
||||
crc32fast = "1.3.2"
|
||||
crossbeam = "0.8.2"
|
||||
@ -32,7 +32,6 @@ arc-swap = "1.6.0"
|
||||
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
|
||||
serde_with = "3.4.0"
|
||||
multiversion = "0.7.3"
|
||||
ctor = "0.2.6"
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
ulock-sys = "0.1.0"
|
||||
|
@ -22,7 +22,7 @@ pub fn cosine(lhs: &[F16], rhs: &[F16]) -> F32 {
|
||||
xy / (x2 * y2).sqrt()
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::utils::detect::x86_64::detect_avx512fp16() {
|
||||
if detect::x86_64::detect_avx512fp16() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
@ -30,7 +30,15 @@ pub fn cosine(lhs: &[F16], rhs: &[F16]) -> F32 {
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::utils::detect::x86_64::detect_v3() {
|
||||
if detect::x86_64::detect_v4() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
return c::v_f16_cosine_v4(lhs.as_ptr().cast(), rhs.as_ptr().cast(), n).into();
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if detect::x86_64::detect_v3() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
@ -58,7 +66,7 @@ pub fn dot(lhs: &[F16], rhs: &[F16]) -> F32 {
|
||||
xy
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::utils::detect::x86_64::detect_avx512fp16() {
|
||||
if detect::x86_64::detect_avx512fp16() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
@ -66,7 +74,15 @@ pub fn dot(lhs: &[F16], rhs: &[F16]) -> F32 {
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::utils::detect::x86_64::detect_v3() {
|
||||
if detect::x86_64::detect_v4() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
return c::v_f16_dot_v4(lhs.as_ptr().cast(), rhs.as_ptr().cast(), n).into();
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if detect::x86_64::detect_v3() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
@ -95,7 +111,7 @@ pub fn sl2(lhs: &[F16], rhs: &[F16]) -> F32 {
|
||||
d2
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::utils::detect::x86_64::detect_avx512fp16() {
|
||||
if detect::x86_64::detect_avx512fp16() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
@ -103,7 +119,15 @@ pub fn sl2(lhs: &[F16], rhs: &[F16]) -> F32 {
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::utils::detect::x86_64::detect_v3() {
|
||||
if detect::x86_64::detect_v4() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
return c::v_f16_sl2_v4(lhs.as_ptr().cast(), rhs.as_ptr().cast(), n).into();
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if detect::x86_64::detect_v3() {
|
||||
assert!(lhs.len() == rhs.len());
|
||||
let n = lhs.len();
|
||||
unsafe {
|
||||
|
@ -1 +0,0 @@
|
||||
pub mod x86_64;
|
@ -1,85 +0,0 @@
|
||||
#![cfg(target_arch = "x86_64")]
|
||||
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
static ATOMIC_AVX512FP16: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
pub fn test_avx512fp16() -> bool {
|
||||
std_detect::is_x86_feature_detected!("avx512fp16") && test_v4()
|
||||
}
|
||||
|
||||
#[ctor::ctor]
|
||||
fn ctor_avx512fp16() {
|
||||
ATOMIC_AVX512FP16.store(test_avx512fp16(), Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn detect_avx512fp16() -> bool {
|
||||
ATOMIC_AVX512FP16.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
static ATOMIC_V4: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
pub fn test_v4() -> bool {
|
||||
std_detect::is_x86_feature_detected!("avx512bw")
|
||||
&& std_detect::is_x86_feature_detected!("avx512cd")
|
||||
&& std_detect::is_x86_feature_detected!("avx512dq")
|
||||
&& std_detect::is_x86_feature_detected!("avx512f")
|
||||
&& std_detect::is_x86_feature_detected!("avx512vl")
|
||||
&& test_v3()
|
||||
}
|
||||
|
||||
#[ctor::ctor]
|
||||
fn ctor_v4() {
|
||||
ATOMIC_V4.store(test_v4(), Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn _detect_v4() -> bool {
|
||||
ATOMIC_V4.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
static ATOMIC_V3: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
pub fn test_v3() -> bool {
|
||||
std_detect::is_x86_feature_detected!("avx")
|
||||
&& std_detect::is_x86_feature_detected!("avx2")
|
||||
&& std_detect::is_x86_feature_detected!("bmi1")
|
||||
&& std_detect::is_x86_feature_detected!("bmi2")
|
||||
&& std_detect::is_x86_feature_detected!("f16c")
|
||||
&& std_detect::is_x86_feature_detected!("fma")
|
||||
&& std_detect::is_x86_feature_detected!("lzcnt")
|
||||
&& std_detect::is_x86_feature_detected!("movbe")
|
||||
&& std_detect::is_x86_feature_detected!("xsave")
|
||||
&& test_v2()
|
||||
}
|
||||
|
||||
#[ctor::ctor]
|
||||
fn ctor_v3() {
|
||||
ATOMIC_V3.store(test_v3(), Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn detect_v3() -> bool {
|
||||
ATOMIC_V3.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
static ATOMIC_V2: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
pub fn test_v2() -> bool {
|
||||
std_detect::is_x86_feature_detected!("cmpxchg16b")
|
||||
&& std_detect::is_x86_feature_detected!("fxsr")
|
||||
&& std_detect::is_x86_feature_detected!("popcnt")
|
||||
&& std_detect::is_x86_feature_detected!("sse")
|
||||
&& std_detect::is_x86_feature_detected!("sse2")
|
||||
&& std_detect::is_x86_feature_detected!("sse3")
|
||||
&& std_detect::is_x86_feature_detected!("sse4.1")
|
||||
&& std_detect::is_x86_feature_detected!("sse4.2")
|
||||
&& std_detect::is_x86_feature_detected!("ssse3")
|
||||
}
|
||||
|
||||
#[ctor::ctor]
|
||||
fn ctor_v2() {
|
||||
ATOMIC_V2.store(test_v2(), Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub fn _detect_v2() -> bool {
|
||||
ATOMIC_V2.load(Ordering::Relaxed)
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
pub mod cells;
|
||||
pub mod clean;
|
||||
pub mod detect;
|
||||
pub mod dir_ops;
|
||||
pub mod file_atomic;
|
||||
pub mod file_wal;
|
||||
|
Reference in New Issue
Block a user