mirror of
https://github.com/tensorchord/pgvecto.rs.git
synced 2025-04-18 21:44:00 +03:00
feat: pyo3 bindings of indexing (#565)
* feat: pyo3 bindings of indexing Signed-off-by: usamoi <usamoi@outlook.com> * fix: ci Signed-off-by: usamoi <usamoi@outlook.com> --------- Signed-off-by: usamoi <usamoi@outlook.com>
This commit is contained in:
parent
20e84ca9a2
commit
ab1edc9777
49
.github/workflows/psql.yml
vendored
49
.github/workflows/psql.yml
vendored
@ -42,16 +42,19 @@ env:
|
||||
SCCACHE_GHA_ENABLED: true
|
||||
RUSTC_WRAPPER: sccache
|
||||
RUSTFLAGS: "-Dwarnings"
|
||||
CARGO_PROFILE_OPT_BUILD_OVERRIDE_DEBUG: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
strategy:
|
||||
matrix:
|
||||
version: [14, 15, 16]
|
||||
arch: ["x86_64"]
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
SEMVER: "0.0.0"
|
||||
VERSION: ${{ matrix.version }}
|
||||
ARCH: ${{ matrix.arch }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@ -81,41 +84,41 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y clang-16
|
||||
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-16 128
|
||||
- name: Set up Pgrx
|
||||
run: |
|
||||
# pg_config
|
||||
mkdir -p ~/.pg_config
|
||||
touch ~/.pg_config/pg_config
|
||||
chmod 777 ~/.pg_config/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg_config/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg${VERSION}_$(uname --machine)-unknown-linux-gnu.txt" >> ~/.pg_config/pg_config
|
||||
mkdir -p ~/.pgrx && echo "configs.pg$VERSION=\"$HOME/.pg_config/pg_config\"" > ~/.pgrx/config.toml
|
||||
# pgrx_binding
|
||||
mkdir -p ~/.pgrx_binding
|
||||
cp ./vendor/pgrx_binding/pg${VERSION}_$(uname --machine)-unknown-linux-gnu.rs ~/.pgrx_binding/pg${VERSION}_raw_bindings.rs
|
||||
echo PGRX_TARGET_INFO_PATH_PG$VERSION=$HOME/.pgrx_binding >> "$GITHUB_ENV"
|
||||
- name: Build Release
|
||||
run: |
|
||||
cargo build --lib --features "pg$VERSION" --profile opt
|
||||
./tools/schema.sh --features "pg$VERSION" --profile opt | expand -t 4 > ./target/vectors--$SEMVER.sql
|
||||
- name: Set up PostgreSQL
|
||||
run: |
|
||||
mkdir -p ~/.pg14/pg_binding && touch ~/.pg14/pg_config && chmod 777 ~/.pg14/pg_config
|
||||
mkdir -p ~/.pg15/pg_binding && touch ~/.pg15/pg_config && chmod 777 ~/.pg15/pg_config
|
||||
mkdir -p ~/.pg16/pg_binding && touch ~/.pg16/pg_config && chmod 777 ~/.pg16/pg_config
|
||||
cp ./vendor/pgrx_binding/pg14_${ARCH}-unknown-linux-gnu.rs ~/.pg14/pg_binding/pg14_raw_bindings.rs
|
||||
cp ./vendor/pgrx_binding/pg15_${ARCH}-unknown-linux-gnu.rs ~/.pg15/pg_binding/pg15_raw_bindings.rs
|
||||
cp ./vendor/pgrx_binding/pg16_${ARCH}-unknown-linux-gnu.rs ~/.pg16/pg_binding/pg16_raw_bindings.rs
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg14/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg14_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg14/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg15/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg15_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg15/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg16/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg16_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg16/pg_config
|
||||
- name: Build
|
||||
run: |
|
||||
export PGRX_PG_CONFIG_PATH=$HOME/.pg$VERSION/pg_config
|
||||
export PGRX_TARGET_INFO_PATH_PG$VERSION=$HOME/.pg$VERSION/pg_binding
|
||||
cargo build --package pgvectors --lib --features pg$VERSION --target $ARCH-unknown-linux-gnu --profile opt
|
||||
./tools/schema.sh --features pg$VERSION --target $ARCH-unknown-linux-gnu --profile opt | expand -t 4 > ./target/schema.sql
|
||||
- name: Install
|
||||
run: |
|
||||
sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" >> /etc/apt/sources.list.d/pgdg.list'
|
||||
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install postgresql-$VERSION
|
||||
|
||||
echo "local all all trust" | sudo tee /etc/postgresql/$VERSION/main/pg_hba.conf
|
||||
echo "host all all 127.0.0.1/32 trust" | sudo tee -a /etc/postgresql/$VERSION/main/pg_hba.conf
|
||||
echo "host all all ::1/128 trust" | sudo tee -a /etc/postgresql/$VERSION/main/pg_hba.conf
|
||||
sudo systemctl restart postgresql
|
||||
|
||||
sudo -iu postgres createuser -s -r $USER
|
||||
createdb
|
||||
- name: Install Release
|
||||
run: |
|
||||
sudo cp ./target/vectors--$SEMVER.sql /usr/share/postgresql/$VERSION/extension/vectors--$SEMVER.sql
|
||||
sudo cp ./target/opt/libvectors.so "/usr/lib/postgresql/$VERSION/lib/vectors.so"
|
||||
|
||||
sudo cp ./target/schema.sql /usr/share/postgresql/$VERSION/extension/vectors--$SEMVER.sql
|
||||
sudo cp ./target/$ARCH-unknown-linux-gnu/opt/libvectors.so "/usr/lib/postgresql/$VERSION/lib/vectors.so"
|
||||
sed -e "s/@CARGO_VERSION@/$SEMVER/g" < ./vectors.control | sudo tee "/usr/share/postgresql/$VERSION/extension/vectors.control"
|
||||
|
||||
psql -c 'ALTER SYSTEM SET shared_preload_libraries = "vectors.so"'
|
||||
|
31
.github/workflows/release.yml
vendored
31
.github/workflows/release.yml
vendored
@ -78,23 +78,26 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y clang-16
|
||||
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-16 128
|
||||
- name: Set up Pgrx
|
||||
- name: Set up PostgreSQL
|
||||
run: |
|
||||
# pg_config
|
||||
mkdir -p ~/.pg_config
|
||||
touch ~/.pg_config/pg_config
|
||||
chmod 777 ~/.pg_config/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg_config/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg${VERSION}_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg_config/pg_config
|
||||
mkdir -p ~/.pgrx && echo "configs.pg$VERSION=\"$HOME/.pg_config/pg_config\"" > ~/.pgrx/config.toml
|
||||
# pgrx_binding
|
||||
mkdir -p ~/.pgrx_binding
|
||||
cp ./vendor/pgrx_binding/pg${VERSION}_$(uname --machine)-unknown-linux-gnu.rs ~/.pgrx_binding/pg${VERSION}_raw_bindings.rs
|
||||
echo PGRX_TARGET_INFO_PATH_PG$VERSION=$HOME/.pgrx_binding >> "$GITHUB_ENV"
|
||||
mkdir -p ~/.pg14/pg_binding && touch ~/.pg14/pg_config && chmod 777 ~/.pg14/pg_config
|
||||
mkdir -p ~/.pg15/pg_binding && touch ~/.pg15/pg_config && chmod 777 ~/.pg15/pg_config
|
||||
mkdir -p ~/.pg16/pg_binding && touch ~/.pg16/pg_config && chmod 777 ~/.pg16/pg_config
|
||||
cp ./vendor/pgrx_binding/pg14_${ARCH}-unknown-linux-gnu.rs ~/.pg14/pg_binding/pg14_raw_bindings.rs
|
||||
cp ./vendor/pgrx_binding/pg15_${ARCH}-unknown-linux-gnu.rs ~/.pg15/pg_binding/pg15_raw_bindings.rs
|
||||
cp ./vendor/pgrx_binding/pg16_${ARCH}-unknown-linux-gnu.rs ~/.pg16/pg_binding/pg16_raw_bindings.rs
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg14/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg14_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg14/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg15/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg15_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg15/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg16/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg16_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg16/pg_config
|
||||
- name: Build
|
||||
run: |
|
||||
cargo build --lib --features pg$VERSION --release --target $ARCH-unknown-linux-gnu
|
||||
./tools/schema.sh --features pg$VERSION --release --target $ARCH-unknown-linux-gnu | expand -t 4 > ./target/vectors--$SEMVER.sql
|
||||
export PGRX_PG_CONFIG_PATH=$HOME/.pg$VERSION/pg_config
|
||||
export PGRX_TARGET_INFO_PATH_PG$VERSION=$HOME/.pg$VERSION/pg_binding
|
||||
cargo build --package pgvectors --lib --features pg$VERSION --target $ARCH-unknown-linux-gnu --release
|
||||
./tools/schema.sh --features pg$VERSION --target $ARCH-unknown-linux-gnu --release | expand -t 4 > ./target/schema.sql
|
||||
- name: Package
|
||||
run: |
|
||||
export PLATFORM=$(echo $ARCH | sed 's/aarch64/arm64/; s/x86_64/amd64/')
|
||||
|
126
.github/workflows/rust.yml
vendored
126
.github/workflows/rust.yml
vendored
@ -42,83 +42,15 @@ env:
|
||||
SCCACHE_GHA_ENABLED: true
|
||||
RUSTC_WRAPPER: sccache
|
||||
RUSTFLAGS: "-Dwarnings"
|
||||
CARGO_PROFILE_OPT_BUILD_OVERRIDE_DEBUG: true
|
||||
|
||||
jobs:
|
||||
check:
|
||||
strategy:
|
||||
matrix:
|
||||
version: [14, 15, 16]
|
||||
arch: ["x86_64", "aarch64"]
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
SEMVER: "0.0.0"
|
||||
VERSION: ${{ matrix.version }}
|
||||
ARCH: ${{ matrix.arch }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Environment
|
||||
run: |
|
||||
sudo apt-get remove -y '^postgres.*' '^libpq.*' '^clang.*' '^llvm.*' '^libclang.*' '^libllvm.*' '^mono-llvm.*'
|
||||
sudo apt-get purge -y '^postgres.*' '^libpq.*' '^clang.*' '^llvm.*' '^libclang.*' '^libllvm.*' '^mono-llvm.*'
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y build-essential crossbuild-essential-arm64
|
||||
sudo apt-get install -y qemu-user-static
|
||||
touch ~/.cargo/config.toml
|
||||
echo 'target.aarch64-unknown-linux-gnu.linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config.toml
|
||||
echo 'target.aarch64-unknown-linux-gnu.runner = ["qemu-aarch64-static", "-L", "/usr/aarch64-linux-gnu"]' >> ~/.cargo/config.toml
|
||||
- name: Set up Sccache
|
||||
uses: mozilla-actions/sccache-action@v0.0.4
|
||||
- name: Set up Cache
|
||||
uses: actions/cache/restore@v4
|
||||
id: cache
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
~/.cargo/git/db/
|
||||
key: ${{ github.job }}-${{ hashFiles('./Cargo.lock') }}-${{ matrix.version }}-${{ matrix.arch }}
|
||||
- name: Set up Clang-16
|
||||
run: |
|
||||
sudo sh -c 'echo "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-16 main" >> /etc/apt/sources.list'
|
||||
wget --quiet -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y clang-16
|
||||
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-16 128
|
||||
- name: Set up Pgrx
|
||||
run: |
|
||||
# pg_config
|
||||
mkdir -p ~/.pg_config
|
||||
touch ~/.pg_config/pg_config
|
||||
chmod 777 ~/.pg_config/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg_config/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg${VERSION}_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg_config/pg_config
|
||||
mkdir -p ~/.pgrx && echo "configs.pg$VERSION=\"$HOME/.pg_config/pg_config\"" > ~/.pgrx/config.toml
|
||||
# pgrx_binding
|
||||
mkdir -p ~/.pgrx_binding
|
||||
cp ./vendor/pgrx_binding/pg${VERSION}_$(uname --machine)-unknown-linux-gnu.rs ~/.pgrx_binding/pg${VERSION}_raw_bindings.rs
|
||||
echo PGRX_TARGET_INFO_PATH_PG$VERSION=$HOME/.pgrx_binding >> "$GITHUB_ENV"
|
||||
- name: Clippy
|
||||
run: cargo clippy --features "pg$VERSION" --target $ARCH-unknown-linux-gnu
|
||||
- name: Build
|
||||
run: cargo build --lib --features "pg$VERSION" --target $ARCH-unknown-linux-gnu
|
||||
- name: Post Set up Cache
|
||||
uses: actions/cache/save@v4
|
||||
if: ${{ !steps.cache.outputs.cache-hit }}
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
~/.cargo/git/db/
|
||||
key: ${{ github.job }}-${{ hashFiles('./Cargo.lock') }}-${{ matrix.version }}-${{ matrix.arch }}
|
||||
test:
|
||||
check_and_test:
|
||||
strategy:
|
||||
matrix:
|
||||
arch: ["x86_64", "aarch64"]
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
SEMVER: "0.0.0"
|
||||
VERSION: "16"
|
||||
ARCH: ${{ matrix.arch }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
@ -151,28 +83,54 @@ jobs:
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y clang-16
|
||||
sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-16 128
|
||||
- name: Set up Pgrx
|
||||
- name: Set up PostgreSQL
|
||||
run: |
|
||||
# pg_config
|
||||
mkdir -p ~/.pg_config
|
||||
touch ~/.pg_config/pg_config
|
||||
chmod 777 ~/.pg_config/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg_config/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg${VERSION}_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg_config/pg_config
|
||||
mkdir -p ~/.pgrx && echo "configs.pg$VERSION=\"$HOME/.pg_config/pg_config\"" > ~/.pgrx/config.toml
|
||||
# pgrx_binding
|
||||
mkdir -p ~/.pgrx_binding
|
||||
cp ./vendor/pgrx_binding/pg${VERSION}_$(uname --machine)-unknown-linux-gnu.rs ~/.pgrx_binding/pg${VERSION}_raw_bindings.rs
|
||||
echo PGRX_TARGET_INFO_PATH_PG$VERSION=$HOME/.pgrx_binding >> "$GITHUB_ENV"
|
||||
mkdir -p ~/.pg14/pg_binding && touch ~/.pg14/pg_config && chmod 777 ~/.pg14/pg_config
|
||||
mkdir -p ~/.pg15/pg_binding && touch ~/.pg15/pg_config && chmod 777 ~/.pg15/pg_config
|
||||
mkdir -p ~/.pg16/pg_binding && touch ~/.pg16/pg_config && chmod 777 ~/.pg16/pg_config
|
||||
cp ./vendor/pgrx_binding/pg14_${ARCH}-unknown-linux-gnu.rs ~/.pg14/pg_binding/pg14_raw_bindings.rs
|
||||
cp ./vendor/pgrx_binding/pg15_${ARCH}-unknown-linux-gnu.rs ~/.pg15/pg_binding/pg15_raw_bindings.rs
|
||||
cp ./vendor/pgrx_binding/pg16_${ARCH}-unknown-linux-gnu.rs ~/.pg16/pg_binding/pg16_raw_bindings.rs
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg14/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg14_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg14/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg15/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg15_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg15/pg_config
|
||||
echo "#!/usr/bin/env bash" >> ~/.pg16/pg_config
|
||||
echo "$(pwd)/tools/pg_config.sh \"\$@\" < $(pwd)/vendor/pg_config/pg16_${ARCH}-unknown-linux-gnu.txt" >> ~/.pg16/pg_config
|
||||
- name: Clippy
|
||||
run: |
|
||||
cargo clippy --workspace --exclude pgvectors --exclude pyvectors --target $ARCH-unknown-linux-gnu
|
||||
export PGRX_PG_CONFIG_PATH=$HOME/.pg14/pg_config
|
||||
export PGRX_TARGET_INFO_PATH_PG14=$HOME/.pg14/pg_binding
|
||||
cargo clippy --package pgvectors --features pg14 --no-deps --target $ARCH-unknown-linux-gnu
|
||||
export PGRX_PG_CONFIG_PATH=$HOME/.pg15/pg_config
|
||||
export PGRX_TARGET_INFO_PATH_PG15=$HOME/.pg15/pg_binding
|
||||
cargo clippy --package pgvectors --features pg15 --no-deps --target $ARCH-unknown-linux-gnu
|
||||
export PGRX_PG_CONFIG_PATH=$HOME/.pg16/pg_config
|
||||
export PGRX_TARGET_INFO_PATH_PG16=$HOME/.pg16/pg_binding
|
||||
cargo clippy --package pgvectors --features pg16 --no-deps --target $ARCH-unknown-linux-gnu
|
||||
- name: Build
|
||||
run: |
|
||||
cargo build --workspace --exclude pgvectors --exclude pyvectors --target $ARCH-unknown-linux-gnu
|
||||
export PGRX_PG_CONFIG_PATH=$HOME/.pg14/pg_config
|
||||
export PGRX_TARGET_INFO_PATH_PG14=$HOME/.pg14/pg_binding
|
||||
cargo build --package pgvectors --lib --features pg14 --target $ARCH-unknown-linux-gnu
|
||||
export PGRX_PG_CONFIG_PATH=$HOME/.pg15/pg_config
|
||||
export PGRX_TARGET_INFO_PATH_PG15=$HOME/.pg15/pg_binding
|
||||
cargo build --package pgvectors --lib --features pg15 --target $ARCH-unknown-linux-gnu
|
||||
export PGRX_PG_CONFIG_PATH=$HOME/.pg16/pg_config
|
||||
export PGRX_TARGET_INFO_PATH_PG16=$HOME/.pg16/pg_binding
|
||||
cargo build --package pgvectors --lib --features pg16 --target $ARCH-unknown-linux-gnu
|
||||
- name: Test
|
||||
run: cargo test --all --no-fail-fast --features "pg$VERSION" --target $ARCH-unknown-linux-gnu -- --nocapture
|
||||
run: |
|
||||
cargo test --workspace --exclude pgvectors --exclude pyvectors --no-fail-fast --target $ARCH-unknown-linux-gnu
|
||||
- name: Test (x86_64)
|
||||
if: matrix.arch == 'x86_64'
|
||||
run: |
|
||||
ASSETS=$(mktemp -d)
|
||||
wget https://downloadmirror.intel.com/813591/sde-external-9.33.0-2024-01-07-lin.tar.xz -O $ASSETS/sde-external.tar.xz
|
||||
tar -xf $ASSETS/sde-external.tar.xz -C $ASSETS
|
||||
cargo --config "target.x86_64-unknown-linux-gnu.runner = [\"$ASSETS/sde-external-9.33.0-2024-01-07-lin/sde64\", \"-spr\", \"--\"]" test "_v4" --all --no-fail-fast --features "pg$VERSION" --target $ARCH-unknown-linux-gnu -- --nocapture
|
||||
cargo --config "target.x86_64-unknown-linux-gnu.runner = [\"$ASSETS/sde-external-9.33.0-2024-01-07-lin/sde64\", \"-spr\", \"--\"]" test "_v4" --workspace --exclude pgvectors --exclude pyvectors --no-fail-fast --target $ARCH-unknown-linux-gnu
|
||||
- name: Post Set up Cache
|
||||
uses: actions/cache/save@v4
|
||||
if: ${{ !steps.cache.outputs.cache-hit }}
|
||||
|
249
Cargo.lock
generated
249
Cargo.lock
generated
@ -827,11 +827,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "5.5.3"
|
||||
version = "6.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
|
||||
checksum = "804c8821570c3f8b70230c2ba75ffa5c0f9a4189b9a432b6656c536712acae28"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"hashbrown",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
@ -1242,6 +1243,12 @@ dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
@ -1527,24 +1534,30 @@ dependencies = [
|
||||
"crc32fast",
|
||||
"crossbeam",
|
||||
"dashmap",
|
||||
"flat",
|
||||
"hnsw",
|
||||
"inverted",
|
||||
"ivf",
|
||||
"k_means",
|
||||
"indexing",
|
||||
"log",
|
||||
"parking_lot",
|
||||
"quantization",
|
||||
"rabitq",
|
||||
"rand",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"stoppable_rayon",
|
||||
"storage",
|
||||
"thiserror",
|
||||
"validator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexing"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"base",
|
||||
"flat",
|
||||
"hnsw",
|
||||
"inverted",
|
||||
"ivf",
|
||||
"rabitq",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.3.0"
|
||||
@ -1555,6 +1568,12 @@ dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indoc"
|
||||
version = "2.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
|
||||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.13"
|
||||
@ -1825,6 +1844,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
@ -1887,6 +1915,19 @@ dependencies = [
|
||||
"syn 2.0.72",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ndarray"
|
||||
version = "0.15.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32"
|
||||
dependencies = [
|
||||
"matrixmultiply",
|
||||
"num-complex",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"rawpointer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "new_debug_unreachable"
|
||||
version = "1.0.6"
|
||||
@ -1952,6 +1993,21 @@ dependencies = [
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec170733ca37175f5d75a5bea5911d6ff45d2cd52849ce98b685394e4f2f37f4"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"ndarray",
|
||||
"num-complex",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"pyo3",
|
||||
"rustc-hash 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.36.3"
|
||||
@ -2137,6 +2193,38 @@ dependencies = [
|
||||
"unescape",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pgvectors"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"base",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"chrono",
|
||||
"detect",
|
||||
"embedding",
|
||||
"interprocess_atomic_wait",
|
||||
"libc",
|
||||
"log",
|
||||
"memfd",
|
||||
"memmap2",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"pgrx",
|
||||
"rand",
|
||||
"rustix 0.38.34",
|
||||
"scopeguard",
|
||||
"send_fd",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"service",
|
||||
"thiserror",
|
||||
"tikv-jemallocator",
|
||||
"toml",
|
||||
"validator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.10.0"
|
||||
@ -2226,6 +2314,12 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265"
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.20"
|
||||
@ -2274,6 +2368,97 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"indoc",
|
||||
"libc",
|
||||
"memoffset",
|
||||
"parking_lot",
|
||||
"portable-atomic",
|
||||
"pyo3-build-config",
|
||||
"pyo3-ffi",
|
||||
"pyo3-macros",
|
||||
"unindent",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-build-config"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"target-lexicon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-ffi"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"pyo3-build-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-macros-backend",
|
||||
"quote",
|
||||
"syn 2.0.72",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros-backend"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"pyo3-build-config",
|
||||
"quote",
|
||||
"syn 2.0.72",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pythonize"
|
||||
version = "0.21.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d0664248812c38cc55a4ed07f88e4df516ce82604b93b1ffdc041aa77a6cb3c"
|
||||
dependencies = [
|
||||
"pyo3",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyvectors"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"base",
|
||||
"detect",
|
||||
"indexing",
|
||||
"ndarray",
|
||||
"num-traits",
|
||||
"numpy",
|
||||
"pyo3",
|
||||
"pythonize",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"stoppable_rayon",
|
||||
"validator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quantization"
|
||||
version = "0.0.0"
|
||||
@ -2982,6 +3167,12 @@ version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
||||
|
||||
[[package]]
|
||||
name = "target-lexicon"
|
||||
version = "0.12.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
|
||||
|
||||
[[package]]
|
||||
name = "term"
|
||||
version = "0.7.0"
|
||||
@ -3242,6 +3433,12 @@ version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
|
||||
|
||||
[[package]]
|
||||
name = "unindent"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
@ -3310,38 +3507,6 @@ version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a84c137d37ab0142f0f2ddfe332651fdbf252e7b7dbb4e67b6c1f1b2e925101"
|
||||
|
||||
[[package]]
|
||||
name = "vectors"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"base",
|
||||
"bincode",
|
||||
"byteorder",
|
||||
"chrono",
|
||||
"detect",
|
||||
"embedding",
|
||||
"interprocess_atomic_wait",
|
||||
"libc",
|
||||
"log",
|
||||
"memfd",
|
||||
"memmap2",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"pgrx",
|
||||
"rand",
|
||||
"rustix 0.38.34",
|
||||
"scopeguard",
|
||||
"send_fd",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"service",
|
||||
"thiserror",
|
||||
"tikv-jemallocator",
|
||||
"toml",
|
||||
"validator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.5"
|
||||
|
15
Cargo.toml
15
Cargo.toml
@ -1,13 +1,14 @@
|
||||
[package]
|
||||
name = "vectors"
|
||||
name = "pgvectors"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[lib]
|
||||
name = "vectors"
|
||||
crate-type = ["cdylib", "lib"]
|
||||
|
||||
[[bin]]
|
||||
name = "pgrx_embed_vectors"
|
||||
name = "pgrx_embed_pgvectors"
|
||||
path = "./src/bin/pgrx_embed.rs"
|
||||
|
||||
[features]
|
||||
@ -49,13 +50,8 @@ tikv-jemallocator = { version = "0.6.0", features = [
|
||||
"disable_initial_exec_tls",
|
||||
] }
|
||||
|
||||
[patch.crates-io]
|
||||
pgrx = { git = "https://github.com/tensorchord/pgrx.git", branch = "v0.12.0-alpha.1-patch3" }
|
||||
|
||||
[lints]
|
||||
rust.unsafe_op_in_unsafe_fn = "forbid"
|
||||
rust.unused_lifetimes = "warn"
|
||||
rust.unused_qualifications = "warn"
|
||||
workspace = true
|
||||
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
@ -90,6 +86,9 @@ rust.unsafe_op_in_unsafe_fn = "deny"
|
||||
rust.unused_lifetimes = "warn"
|
||||
rust.unused_qualifications = "warn"
|
||||
|
||||
[patch.crates-io]
|
||||
pgrx = { git = "https://github.com/tensorchord/pgrx.git", branch = "v0.12.0-alpha.1-patch3" }
|
||||
|
||||
[profile.opt]
|
||||
inherits = "dev"
|
||||
opt-level = 3
|
||||
|
@ -560,31 +560,106 @@ impl Default for ProductQuantizationOptions {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Validate, Alter)]
|
||||
pub struct SearchOptions {
|
||||
#[serde(default = "SearchOptions::default_flat_sq_rerank_size")]
|
||||
#[validate(range(min = 0, max = 65535))]
|
||||
pub flat_sq_rerank_size: u32,
|
||||
#[serde(default = "SearchOptions::default_flat_sq_fast_scan")]
|
||||
pub flat_sq_fast_scan: bool,
|
||||
#[serde(default = "SearchOptions::default_flat_pq_rerank_size")]
|
||||
#[validate(range(min = 0, max = 65535))]
|
||||
pub flat_pq_rerank_size: u32,
|
||||
#[serde(default = "SearchOptions::default_flat_pq_fast_scan")]
|
||||
pub flat_pq_fast_scan: bool,
|
||||
#[serde(default = "SearchOptions::default_ivf_sq_rerank_size")]
|
||||
#[validate(range(min = 0, max = 65535))]
|
||||
pub ivf_sq_rerank_size: u32,
|
||||
#[serde(default = "SearchOptions::default_ivf_sq_fast_scan")]
|
||||
pub ivf_sq_fast_scan: bool,
|
||||
#[serde(default = "SearchOptions::default_ivf_pq_rerank_size")]
|
||||
#[validate(range(min = 0, max = 65535))]
|
||||
pub ivf_pq_rerank_size: u32,
|
||||
#[serde(default = "SearchOptions::default_ivf_pq_fast_scan")]
|
||||
pub ivf_pq_fast_scan: bool,
|
||||
#[serde(default = "SearchOptions::default_ivf_nprobe")]
|
||||
#[validate(range(min = 1, max = 65535))]
|
||||
pub ivf_nprobe: u32,
|
||||
#[serde(default = "SearchOptions::default_hnsw_ef_search")]
|
||||
#[validate(range(min = 1, max = 65535))]
|
||||
pub hnsw_ef_search: u32,
|
||||
#[serde(default = "SearchOptions::default_rabitq_nprobe")]
|
||||
#[validate(range(min = 1, max = 65535))]
|
||||
pub rabitq_nprobe: u32,
|
||||
#[serde(default = "SearchOptions::default_rabitq_fast_scan")]
|
||||
pub rabitq_fast_scan: bool,
|
||||
#[serde(default = "SearchOptions::default_diskann_ef_search")]
|
||||
#[validate(range(min = 1, max = 65535))]
|
||||
pub diskann_ef_search: u32,
|
||||
}
|
||||
|
||||
impl SearchOptions {
|
||||
pub const fn default_flat_sq_rerank_size() -> u32 {
|
||||
0
|
||||
}
|
||||
pub const fn default_flat_sq_fast_scan() -> bool {
|
||||
false
|
||||
}
|
||||
pub const fn default_flat_pq_rerank_size() -> u32 {
|
||||
0
|
||||
}
|
||||
pub const fn default_flat_pq_fast_scan() -> bool {
|
||||
false
|
||||
}
|
||||
pub const fn default_ivf_sq_rerank_size() -> u32 {
|
||||
0
|
||||
}
|
||||
pub const fn default_ivf_sq_fast_scan() -> bool {
|
||||
false
|
||||
}
|
||||
pub const fn default_ivf_pq_rerank_size() -> u32 {
|
||||
0
|
||||
}
|
||||
pub const fn default_ivf_pq_fast_scan() -> bool {
|
||||
false
|
||||
}
|
||||
pub const fn default_ivf_nprobe() -> u32 {
|
||||
10
|
||||
}
|
||||
pub const fn default_hnsw_ef_search() -> u32 {
|
||||
100
|
||||
}
|
||||
pub const fn default_rabitq_nprobe() -> u32 {
|
||||
10
|
||||
}
|
||||
pub const fn default_rabitq_fast_scan() -> bool {
|
||||
true
|
||||
}
|
||||
pub const fn default_diskann_ef_search() -> u32 {
|
||||
100
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SearchOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
flat_sq_rerank_size: Self::default_flat_sq_rerank_size(),
|
||||
flat_sq_fast_scan: Self::default_flat_sq_fast_scan(),
|
||||
flat_pq_rerank_size: Self::default_flat_pq_rerank_size(),
|
||||
flat_pq_fast_scan: Self::default_flat_pq_fast_scan(),
|
||||
ivf_sq_rerank_size: Self::default_ivf_sq_rerank_size(),
|
||||
ivf_sq_fast_scan: Self::default_ivf_sq_fast_scan(),
|
||||
ivf_pq_rerank_size: Self::default_ivf_pq_rerank_size(),
|
||||
ivf_pq_fast_scan: Self::default_ivf_pq_fast_scan(),
|
||||
ivf_nprobe: Self::default_ivf_nprobe(),
|
||||
hnsw_ef_search: Self::default_hnsw_ef_search(),
|
||||
rabitq_nprobe: Self::default_rabitq_nprobe(),
|
||||
rabitq_fast_scan: Self::default_rabitq_fast_scan(),
|
||||
diskann_ef_search: Self::default_diskann_ef_search(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct IndexStat {
|
||||
pub indexing: bool,
|
||||
|
@ -1,6 +1,6 @@
|
||||
use crate::always_equal::AlwaysEqual;
|
||||
use crate::operator::{Borrowed, Operator};
|
||||
use crate::scalar::F32;
|
||||
use crate::vector::VectorOwned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::any::Any;
|
||||
use std::fmt::Display;
|
||||
@ -73,17 +73,17 @@ pub struct Element {
|
||||
pub payload: AlwaysEqual<Payload>,
|
||||
}
|
||||
|
||||
pub trait Vectors<O: Operator>: Send + Sync {
|
||||
pub trait Vectors<V: VectorOwned> {
|
||||
fn dims(&self) -> u32;
|
||||
fn len(&self) -> u32;
|
||||
fn vector(&self, i: u32) -> Borrowed<'_, O>;
|
||||
fn vector(&self, i: u32) -> V::Borrowed<'_>;
|
||||
}
|
||||
|
||||
pub trait Collection<O: Operator>: Vectors<O> {
|
||||
pub trait Collection {
|
||||
fn payload(&self, i: u32) -> Payload;
|
||||
}
|
||||
|
||||
pub trait Source<O: Operator>: Collection<O> {
|
||||
pub trait Source {
|
||||
fn get_main<T: Any>(&self) -> Option<&T>;
|
||||
fn get_main_len(&self) -> u32;
|
||||
fn check_existing(&self, i: u32) -> bool;
|
||||
|
@ -1,5 +1,5 @@
|
||||
use base::operator::*;
|
||||
use base::search::*;
|
||||
use base::vector::VectorOwned;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub fn remap(
|
||||
@ -36,14 +36,14 @@ pub fn remap(
|
||||
remap
|
||||
}
|
||||
|
||||
pub struct RemappedCollection<'a, O: Operator, C: Collection<O>> {
|
||||
pub struct RemappedCollection<'a, V: VectorOwned, C: Collection> {
|
||||
collection: &'a C,
|
||||
remap: Vec<u32>,
|
||||
barrier: u32,
|
||||
_phantom: PhantomData<fn(O) -> O>,
|
||||
_phantom: PhantomData<fn(V) -> V>,
|
||||
}
|
||||
|
||||
impl<'a, O: Operator, S: Source<O>> RemappedCollection<'a, O, S> {
|
||||
impl<'a, V: VectorOwned, S: Vectors<V> + Collection + Source> RemappedCollection<'a, V, S> {
|
||||
pub fn from_source(source: &'a S) -> Self {
|
||||
let barrier = source.get_main_len();
|
||||
let remap = remap(source.len(), barrier, |i| source.check_existing(i));
|
||||
@ -56,7 +56,7 @@ impl<'a, O: Operator, S: Source<O>> RemappedCollection<'a, O, S> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, O: Operator, C: Collection<O>> RemappedCollection<'a, O, C> {
|
||||
impl<'a, V: VectorOwned, C: Vectors<V> + Collection> RemappedCollection<'a, V, C> {
|
||||
pub fn from_collection(collection: &'a C, remap: Vec<u32>) -> Self {
|
||||
assert_eq!(remap.len(), collection.len() as usize);
|
||||
let barrier = collection.len();
|
||||
@ -69,7 +69,7 @@ impl<'a, O: Operator, C: Collection<O>> RemappedCollection<'a, O, C> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, O: Operator, C: Collection<O>> RemappedCollection<'a, O, C> {
|
||||
impl<V: VectorOwned, C: Collection> RemappedCollection<'_, V, C> {
|
||||
#[inline(always)]
|
||||
pub fn skip(&self, x: u32) -> bool {
|
||||
x < self.barrier && (x as usize) < self.remap.len() && self.remap[x as usize] == x
|
||||
@ -80,7 +80,7 @@ impl<'a, O: Operator, C: Collection<O>> RemappedCollection<'a, O, C> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Operator, C: Collection<O>> Vectors<O> for RemappedCollection<'_, O, C> {
|
||||
impl<V: VectorOwned, C: Vectors<V> + Collection> Vectors<V> for RemappedCollection<'_, V, C> {
|
||||
fn dims(&self) -> u32 {
|
||||
self.collection.dims()
|
||||
}
|
||||
@ -89,12 +89,12 @@ impl<O: Operator, C: Collection<O>> Vectors<O> for RemappedCollection<'_, O, C>
|
||||
self.remap.len() as u32
|
||||
}
|
||||
|
||||
fn vector(&self, i: u32) -> Borrowed<'_, O> {
|
||||
fn vector(&self, i: u32) -> V::Borrowed<'_> {
|
||||
self.collection.vector(self.remap[i as usize])
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Operator, C: Collection<O>> Collection<O> for RemappedCollection<'_, O, C> {
|
||||
impl<V: VectorOwned, C: Collection> Collection for RemappedCollection<'_, V, C> {
|
||||
fn payload(&self, i: u32) -> Payload {
|
||||
self.collection.payload(self.remap[i as usize])
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ use base::vector::VectorOwned;
|
||||
|
||||
const SAMPLES: usize = 65536;
|
||||
|
||||
pub fn sample<O: Operator>(vectors: &impl Vectors<O>) -> Vec2<Scalar<O>> {
|
||||
pub fn sample<O: Operator>(vectors: &impl Vectors<Owned<O>>) -> Vec2<Scalar<O>> {
|
||||
let n = vectors.len();
|
||||
let m = std::cmp::min(SAMPLES as u32, n);
|
||||
let f = super::rand::sample_u32(&mut rand::thread_rng(), n, m);
|
||||
@ -20,7 +20,7 @@ pub fn sample<O: Operator>(vectors: &impl Vectors<O>) -> Vec2<Scalar<O>> {
|
||||
samples
|
||||
}
|
||||
|
||||
pub fn sample_cast<O: Operator>(vectors: &impl Vectors<O>) -> Vec2<F32> {
|
||||
pub fn sample_cast<O: Operator>(vectors: &impl Vectors<Owned<O>>) -> Vec2<F32> {
|
||||
let n = vectors.len();
|
||||
let m = std::cmp::min(SAMPLES as u32, n);
|
||||
let f = super::rand::sample_u32(&mut rand::thread_rng(), n, m);
|
||||
@ -38,7 +38,7 @@ pub fn sample_cast<O: Operator>(vectors: &impl Vectors<O>) -> Vec2<F32> {
|
||||
}
|
||||
|
||||
pub fn sample_subvector_transform<O: Operator>(
|
||||
vectors: &impl Vectors<O>,
|
||||
vectors: &impl Vectors<Owned<O>>,
|
||||
s: usize,
|
||||
e: usize,
|
||||
transform: impl Fn(Borrowed<'_, O>) -> Owned<O>,
|
||||
|
@ -25,7 +25,11 @@ pub struct Flat<O: OperatorFlat> {
|
||||
}
|
||||
|
||||
impl<O: OperatorFlat> Flat<O> {
|
||||
pub fn create(path: impl AsRef<Path>, options: IndexOptions, source: &impl Source<O>) -> Self {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source + Sync),
|
||||
) -> Self {
|
||||
let remapped = RemappedCollection::from_source(source);
|
||||
from_nothing(path, options, &remapped)
|
||||
}
|
||||
@ -62,6 +66,10 @@ impl<O: OperatorFlat> Flat<O> {
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn dims(&self) -> u32 {
|
||||
self.storage.dims()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
self.storage.len()
|
||||
}
|
||||
@ -78,12 +86,12 @@ impl<O: OperatorFlat> Flat<O> {
|
||||
fn from_nothing<O: OperatorFlat>(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
collection: &impl Collection<O>,
|
||||
collection: &(impl Vectors<Owned<O>> + Collection + Sync),
|
||||
) -> Flat<O> {
|
||||
create_dir(path.as_ref()).unwrap();
|
||||
let flat_indexing_options = options.indexing.clone().unwrap_flat();
|
||||
let storage = O::Storage::create(path.as_ref().join("storage"), collection);
|
||||
let quantization = Quantization::create(
|
||||
let quantization = Quantization::<O>::create(
|
||||
path.as_ref().join("quantization"),
|
||||
options.vector,
|
||||
flat_indexing_options.quantization,
|
||||
|
@ -46,7 +46,7 @@ impl<O: OperatorHnsw> Hnsw<O> {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
source: &(impl Source<O> + Sync),
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source + Sync),
|
||||
) -> Self {
|
||||
let remapped = RemappedCollection::from_source(source);
|
||||
if let Some(main) = source.get_main::<Self>() {
|
||||
@ -97,6 +97,10 @@ impl<O: OperatorHnsw> Hnsw<O> {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn dims(&self) -> u32 {
|
||||
self.storage.dims()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
self.storage.len()
|
||||
}
|
||||
@ -113,7 +117,7 @@ impl<O: OperatorHnsw> Hnsw<O> {
|
||||
fn from_nothing<O: OperatorHnsw>(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
collection: &(impl Collection<O> + Sync),
|
||||
collection: &(impl Vectors<Owned<O>> + Collection + Sync),
|
||||
) -> Hnsw<O> {
|
||||
create_dir(path.as_ref()).unwrap();
|
||||
let HnswIndexingOptions {
|
||||
@ -134,7 +138,7 @@ fn from_nothing<O: OperatorHnsw>(
|
||||
finish(&mut g, m);
|
||||
let storage = O::Storage::create(path.as_ref().join("storage"), collection);
|
||||
rayon::check();
|
||||
let quantization = Quantization::create(
|
||||
let quantization = Quantization::<O>::create(
|
||||
path.as_ref().join("quantization"),
|
||||
options.vector,
|
||||
quantization_options,
|
||||
@ -195,7 +199,7 @@ fn from_nothing<O: OperatorHnsw>(
|
||||
fn from_main<O: OperatorHnsw>(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
remapped: &RemappedCollection<O, impl Collection<O> + Sync>,
|
||||
remapped: &RemappedCollection<Owned<O>, impl Vectors<Owned<O>> + Collection + Sync>,
|
||||
main: &Hnsw<O>,
|
||||
) -> Hnsw<O> {
|
||||
create_dir(path.as_ref()).unwrap();
|
||||
@ -232,7 +236,7 @@ fn from_main<O: OperatorHnsw>(
|
||||
finish(&mut g, m);
|
||||
let storage = O::Storage::create(path.as_ref().join("storage"), remapped);
|
||||
rayon::check();
|
||||
let quantization = Quantization::create(
|
||||
let quantization = Quantization::<O>::create(
|
||||
path.as_ref().join("quantization"),
|
||||
options.vector,
|
||||
quantization_options,
|
||||
|
@ -9,7 +9,7 @@ bincode.workspace = true
|
||||
byteorder.workspace = true
|
||||
crc32fast = "1.4.0"
|
||||
crossbeam = "0.8.4"
|
||||
dashmap = "5.5.3"
|
||||
dashmap = "6.0.1"
|
||||
log.workspace = true
|
||||
parking_lot.workspace = true
|
||||
rand.workspace = true
|
||||
@ -20,17 +20,8 @@ validator.workspace = true
|
||||
|
||||
base = { path = "../base" }
|
||||
common = { path = "../common" }
|
||||
k_means = { path = "../k_means" }
|
||||
quantization = { path = "../quantization" }
|
||||
indexing = { path = "../indexing" }
|
||||
stoppable_rayon = { path = "../stoppable_rayon" }
|
||||
storage = { path = "../storage" }
|
||||
|
||||
# algorithms
|
||||
flat = { path = "../flat" }
|
||||
hnsw = { path = "../hnsw" }
|
||||
inverted = { path = "../inverted" }
|
||||
ivf = { path = "../ivf" }
|
||||
rabitq = { path = "../rabitq" }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
@ -1,52 +0,0 @@
|
||||
use crate::Op;
|
||||
use base::index::*;
|
||||
use base::operator::*;
|
||||
use base::search::*;
|
||||
use std::convert::Infallible;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error("`GrowingIndexing` is read-only.")]
|
||||
pub struct GrowingIndexingInsertError;
|
||||
|
||||
pub enum GrowingIndexing<O: Op> {
|
||||
Infallible(Infallible, fn(O) -> O),
|
||||
}
|
||||
|
||||
impl<O: Op> GrowingIndexing<O> {
|
||||
pub fn new(_: VectorOptions, _: usize) -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn is_full(&self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn seal(&self) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn insert(&self, _: O::VectorOwned, _: Payload) -> Result<(), GrowingIndexingInsertError> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn vbase<'a>(
|
||||
&'a self,
|
||||
_: Borrowed<'a, O>,
|
||||
_: &'a SearchOptions,
|
||||
) -> Box<dyn Iterator<Item = Element> + 'a> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn vector(&self, _i: u32) -> Borrowed<'_, O> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn payload(&self, _i: u32) -> Payload {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
pub mod growing;
|
||||
pub mod sealed;
|
@ -1,7 +1,6 @@
|
||||
#![allow(clippy::len_without_is_empty)]
|
||||
|
||||
pub mod delete;
|
||||
pub mod indexing;
|
||||
pub mod optimizing;
|
||||
pub mod segment;
|
||||
|
||||
@ -25,11 +24,8 @@ use common::dir_ops::sync_walk_from_dir;
|
||||
use common::file_atomic::FileAtomic;
|
||||
use crossbeam::atomic::AtomicCell;
|
||||
use crossbeam::channel::Sender;
|
||||
use inverted::operator::OperatorInvertedIndex;
|
||||
use ivf::operator::OperatorIvf;
|
||||
use indexing::OperatorIndexing;
|
||||
use parking_lot::Mutex;
|
||||
use quantization::operator::OperatorQuantization;
|
||||
use rabitq::operator::OperatorRabitq;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
@ -39,30 +35,12 @@ use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::thread::JoinHandle;
|
||||
use std::time::Instant;
|
||||
use storage::OperatorStorage;
|
||||
use thiserror::Error;
|
||||
use validator::Validate;
|
||||
|
||||
pub trait Op:
|
||||
Operator
|
||||
+ OperatorQuantization
|
||||
+ OperatorStorage
|
||||
+ OperatorIvf
|
||||
+ OperatorInvertedIndex
|
||||
+ OperatorRabitq
|
||||
{
|
||||
}
|
||||
pub trait Op: OperatorIndexing {}
|
||||
|
||||
impl<
|
||||
T: Operator
|
||||
+ OperatorQuantization
|
||||
+ OperatorStorage
|
||||
+ OperatorIvf
|
||||
+ OperatorInvertedIndex
|
||||
+ OperatorRabitq,
|
||||
> Op for T
|
||||
{
|
||||
}
|
||||
impl<T: OperatorIndexing> Op for T {}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error("The index view is outdated.")]
|
||||
@ -338,7 +316,7 @@ impl<O: Op> Index<O> {
|
||||
}
|
||||
pub fn create_sealed_segment(
|
||||
&self,
|
||||
source: &(impl Source<O> + Sync),
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source + Sync),
|
||||
sealed_segment_ids: &[NonZeroU128],
|
||||
growing_segment_ids: &[NonZeroU128],
|
||||
) -> Option<Arc<SealedSegment<O>>> {
|
||||
|
@ -2,20 +2,22 @@ use crate::delete::Delete;
|
||||
use crate::Op;
|
||||
use crate::{GrowingSegment, SealedSegment};
|
||||
use base::index::IndexOptions;
|
||||
use base::operator::Borrowed;
|
||||
use base::operator::{Borrowed, Owned};
|
||||
use base::search::*;
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct IndexSource<O: Op> {
|
||||
pub struct IndexSource<V, O: Op> {
|
||||
pub(super) sealed: Option<Arc<SealedSegment<O>>>,
|
||||
pub(super) growing: Vec<Arc<GrowingSegment<O>>>,
|
||||
pub(super) dims: u32,
|
||||
pub(super) delete: Arc<Delete>,
|
||||
_phantom: PhantomData<fn(V) -> V>,
|
||||
}
|
||||
|
||||
impl<O: Op> IndexSource<O> {
|
||||
impl<O: Op> IndexSource<Owned<O>, O> {
|
||||
pub fn new(
|
||||
options: IndexOptions,
|
||||
sealed: Option<Arc<SealedSegment<O>>>,
|
||||
@ -27,11 +29,12 @@ impl<O: Op> IndexSource<O> {
|
||||
growing,
|
||||
dims: options.vector.dims,
|
||||
delete,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Op> Vectors<O> for IndexSource<O> {
|
||||
impl<O: Op> Vectors<Owned<O>> for IndexSource<Owned<O>, O> {
|
||||
fn dims(&self) -> u32 {
|
||||
self.dims
|
||||
}
|
||||
@ -58,7 +61,7 @@ impl<O: Op> Vectors<O> for IndexSource<O> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Op> Collection<O> for IndexSource<O> {
|
||||
impl<O: Op> Collection for IndexSource<Owned<O>, O> {
|
||||
fn payload(&self, mut index: u32) -> Payload {
|
||||
for x in self.sealed.iter() {
|
||||
if index < x.len() {
|
||||
@ -76,7 +79,7 @@ impl<O: Op> Collection<O> for IndexSource<O> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Op> Source<O> for IndexSource<O> {
|
||||
impl<O: Op> Source for IndexSource<Owned<O>, O> {
|
||||
fn get_main<T: Any>(&self) -> Option<&T> {
|
||||
let x = self.sealed.as_ref()?;
|
||||
Some(
|
||||
@ -95,12 +98,13 @@ impl<O: Op> Source<O> for IndexSource<O> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RoGrowingCollection<O: Op> {
|
||||
pub struct RoGrowingCollection<V, O: Op> {
|
||||
pub(super) growing: Vec<Arc<GrowingSegment<O>>>,
|
||||
pub(super) dims: u32,
|
||||
_phantom: PhantomData<fn(V) -> V>,
|
||||
}
|
||||
|
||||
impl<O: Op> Debug for RoGrowingCollection<O> {
|
||||
impl<O: Op> Debug for RoGrowingCollection<Owned<O>, O> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RoGrowingCollection")
|
||||
.field("growing", &self.growing)
|
||||
@ -109,7 +113,7 @@ impl<O: Op> Debug for RoGrowingCollection<O> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Op> Vectors<O> for RoGrowingCollection<O> {
|
||||
impl<O: Op> Vectors<Owned<O>> for RoGrowingCollection<Owned<O>, O> {
|
||||
fn dims(&self) -> u32 {
|
||||
self.dims
|
||||
}
|
||||
@ -129,7 +133,7 @@ impl<O: Op> Vectors<O> for RoGrowingCollection<O> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Op> Collection<O> for RoGrowingCollection<O> {
|
||||
impl<O: Op> Collection for RoGrowingCollection<Owned<O>, O> {
|
||||
fn payload(&self, mut index: u32) -> Payload {
|
||||
for x in self.growing.iter() {
|
||||
if index < x.len() {
|
||||
|
@ -1,13 +1,14 @@
|
||||
use crate::optimizing::index_source::IndexSource;
|
||||
use crate::Index;
|
||||
use crate::Op;
|
||||
use base::operator::Owned;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub fn scan<O: Op>(
|
||||
index: Arc<Index<O>>,
|
||||
capacity: u32,
|
||||
delete_threshold: f64,
|
||||
) -> Option<IndexSource<O>> {
|
||||
) -> Option<IndexSource<Owned<O>, O>> {
|
||||
let (sealed, growing) = 'a: {
|
||||
let protect = index.protect.lock();
|
||||
// approach 1: merge small segments to a big segment
|
||||
@ -86,7 +87,7 @@ pub fn scan<O: Op>(
|
||||
))
|
||||
}
|
||||
|
||||
pub fn make<O: Op>(index: Arc<Index<O>>, source: IndexSource<O>) {
|
||||
pub fn make<O: Op>(index: Arc<Index<O>>, source: IndexSource<Owned<O>, O>) {
|
||||
let _ = index.create_sealed_segment(
|
||||
&source,
|
||||
&source.sealed.iter().map(|x| x.id()).collect::<Vec<_>>(),
|
||||
|
@ -1,4 +1,3 @@
|
||||
use crate::indexing::sealed::SealedIndexing;
|
||||
use crate::utils::dir_ops::dir_size;
|
||||
use crate::IndexTracker;
|
||||
use crate::Op;
|
||||
@ -6,6 +5,7 @@ use base::index::*;
|
||||
use base::operator::*;
|
||||
use base::search::*;
|
||||
use crossbeam::atomic::AtomicCell;
|
||||
use indexing::SealedIndexing;
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::num::NonZeroU128;
|
||||
@ -37,7 +37,7 @@ impl<O: Op> SealedSegment<O> {
|
||||
path: PathBuf,
|
||||
id: NonZeroU128,
|
||||
options: IndexOptions,
|
||||
source: &(impl Source<O> + Sync),
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source + Sync),
|
||||
) -> Arc<Self> {
|
||||
let indexing = SealedIndexing::create(&path, options, source);
|
||||
Arc::new(Self {
|
||||
|
19
crates/indexing/Cargo.toml
Normal file
19
crates/indexing/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "indexing"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[dependencies]
|
||||
thiserror.workspace = true
|
||||
|
||||
base = { path = "../base" }
|
||||
|
||||
# algorithms
|
||||
flat = { path = "../flat" }
|
||||
hnsw = { path = "../hnsw" }
|
||||
inverted = { path = "../inverted" }
|
||||
ivf = { path = "../ivf" }
|
||||
rabitq = { path = "../rabitq" }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
15
crates/indexing/src/lib.rs
Normal file
15
crates/indexing/src/lib.rs
Normal file
@ -0,0 +1,15 @@
|
||||
pub mod sealed;
|
||||
|
||||
pub use sealed::SealedIndexing;
|
||||
|
||||
use base::operator::Operator;
|
||||
use inverted::operator::OperatorInvertedIndex;
|
||||
use ivf::operator::OperatorIvf;
|
||||
use rabitq::operator::OperatorRabitq;
|
||||
|
||||
pub trait OperatorIndexing:
|
||||
Operator + OperatorIvf + OperatorInvertedIndex + OperatorRabitq
|
||||
{
|
||||
}
|
||||
|
||||
impl<T: Operator + OperatorIvf + OperatorInvertedIndex + OperatorRabitq> OperatorIndexing for T {}
|
@ -1,4 +1,4 @@
|
||||
use crate::Op;
|
||||
use crate::OperatorIndexing;
|
||||
use base::index::*;
|
||||
use base::operator::*;
|
||||
use base::search::*;
|
||||
@ -9,7 +9,7 @@ use ivf::Ivf;
|
||||
use rabitq::Rabitq;
|
||||
use std::path::Path;
|
||||
|
||||
pub enum SealedIndexing<O: Op> {
|
||||
pub enum SealedIndexing<O: OperatorIndexing> {
|
||||
Flat(Flat<O>),
|
||||
Ivf(Ivf<O>),
|
||||
Hnsw(Hnsw<O>),
|
||||
@ -17,11 +17,11 @@ pub enum SealedIndexing<O: Op> {
|
||||
Rabitq(Rabitq<O>),
|
||||
}
|
||||
|
||||
impl<O: Op> SealedIndexing<O> {
|
||||
impl<O: OperatorIndexing> SealedIndexing<O> {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
source: &(impl Source<O> + Sync),
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source + Sync),
|
||||
) -> Self {
|
||||
match options.indexing {
|
||||
IndexingOptions::Flat(_) => Self::Flat(Flat::create(path, options, source)),
|
||||
@ -57,8 +57,20 @@ impl<O: Op> SealedIndexing<O> {
|
||||
SealedIndexing::Rabitq(x) => x.vbase(vector, opts),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
impl<O: OperatorIndexing> Vectors<Owned<O>> for SealedIndexing<O> {
|
||||
fn dims(&self) -> u32 {
|
||||
match self {
|
||||
SealedIndexing::Flat(x) => x.dims(),
|
||||
SealedIndexing::Ivf(x) => x.dims(),
|
||||
SealedIndexing::Hnsw(x) => x.dims(),
|
||||
SealedIndexing::InvertedIndex(x) => x.dims(),
|
||||
SealedIndexing::Rabitq(x) => x.dims(),
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> u32 {
|
||||
match self {
|
||||
SealedIndexing::Flat(x) => x.len(),
|
||||
SealedIndexing::Ivf(x) => x.len(),
|
||||
@ -68,7 +80,7 @@ impl<O: Op> SealedIndexing<O> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn vector(&self, i: u32) -> Borrowed<'_, O> {
|
||||
fn vector(&self, i: u32) -> Borrowed<'_, O> {
|
||||
match self {
|
||||
SealedIndexing::Flat(x) => x.vector(i),
|
||||
SealedIndexing::Ivf(x) => x.vector(i),
|
||||
@ -77,8 +89,10 @@ impl<O: Op> SealedIndexing<O> {
|
||||
SealedIndexing::Rabitq(x) => x.vector(i),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn payload(&self, i: u32) -> Payload {
|
||||
impl<O: OperatorIndexing> Collection for SealedIndexing<O> {
|
||||
fn payload(&self, i: u32) -> Payload {
|
||||
match self {
|
||||
SealedIndexing::Flat(x) => x.payload(i),
|
||||
SealedIndexing::Ivf(x) => x.payload(i),
|
@ -5,7 +5,7 @@ pub mod operator;
|
||||
use self::operator::OperatorInvertedIndex;
|
||||
use base::always_equal::AlwaysEqual;
|
||||
use base::index::{IndexOptions, SearchOptions};
|
||||
use base::operator::Borrowed;
|
||||
use base::operator::{Borrowed, Owned};
|
||||
use base::scalar::{ScalarLike, F32};
|
||||
use base::search::{Collection, Element, Payload, Source, Vectors};
|
||||
use common::json::Json;
|
||||
@ -29,7 +29,11 @@ pub struct InvertedIndex<O: OperatorInvertedIndex> {
|
||||
}
|
||||
|
||||
impl<O: OperatorInvertedIndex> InvertedIndex<O> {
|
||||
pub fn create(path: impl AsRef<Path>, options: IndexOptions, source: &impl Source<O>) -> Self {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source),
|
||||
) -> Self {
|
||||
let remapped = RemappedCollection::from_source(source);
|
||||
from_nothing(path, options, &remapped)
|
||||
}
|
||||
@ -65,6 +69,10 @@ impl<O: OperatorInvertedIndex> InvertedIndex<O> {
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn dims(&self) -> u32 {
|
||||
self.storage.dims()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
self.storage.len()
|
||||
}
|
||||
@ -81,7 +89,7 @@ impl<O: OperatorInvertedIndex> InvertedIndex<O> {
|
||||
fn from_nothing<O: OperatorInvertedIndex>(
|
||||
path: impl AsRef<Path>,
|
||||
opts: IndexOptions,
|
||||
collection: &impl Collection<O>,
|
||||
collection: &(impl Vectors<Owned<O>> + Collection),
|
||||
) -> InvertedIndex<O> {
|
||||
create_dir(path.as_ref()).expect("failed to create path for inverted index");
|
||||
|
||||
|
@ -26,7 +26,11 @@ pub struct IvfNaive<O: Op> {
|
||||
}
|
||||
|
||||
impl<O: Op> IvfNaive<O> {
|
||||
pub fn create(path: impl AsRef<Path>, options: IndexOptions, source: &impl Source<O>) -> Self {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source + Sync),
|
||||
) -> Self {
|
||||
let remapped = RemappedCollection::from_source(source);
|
||||
from_nothing(path, options, &remapped)
|
||||
}
|
||||
@ -35,6 +39,10 @@ impl<O: Op> IvfNaive<O> {
|
||||
open(path)
|
||||
}
|
||||
|
||||
pub fn dims(&self) -> u32 {
|
||||
self.storage.dims()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
self.storage.len()
|
||||
}
|
||||
@ -87,7 +95,7 @@ impl<O: Op> IvfNaive<O> {
|
||||
fn from_nothing<O: Op>(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
collection: &impl Collection<O>,
|
||||
collection: &(impl Vectors<Owned<O>> + Collection + Sync),
|
||||
) -> IvfNaive<O> {
|
||||
create_dir(path.as_ref()).unwrap();
|
||||
let IvfIndexingOptions {
|
||||
@ -96,7 +104,7 @@ fn from_nothing<O: Op>(
|
||||
residual_quantization: _,
|
||||
quantization: quantization_options,
|
||||
} = options.indexing.clone().unwrap_ivf();
|
||||
let samples = common::sample::sample(collection);
|
||||
let samples = common::sample::sample::<O>(collection);
|
||||
rayon::check();
|
||||
let centroids = k_means(nlist as usize, samples, spherical_centroids);
|
||||
rayon::check();
|
||||
@ -115,7 +123,7 @@ fn from_nothing<O: Op>(
|
||||
let collection = RemappedCollection::from_collection(collection, remap);
|
||||
rayon::check();
|
||||
let storage = O::Storage::create(path.as_ref().join("storage"), &collection);
|
||||
let quantization = Quantization::create(
|
||||
let quantization = Quantization::<O>::create(
|
||||
path.as_ref().join("quantization"),
|
||||
options.vector,
|
||||
quantization_options,
|
||||
|
@ -26,7 +26,11 @@ pub struct IvfResidual<O: Op> {
|
||||
}
|
||||
|
||||
impl<O: Op> IvfResidual<O> {
|
||||
pub fn create(path: impl AsRef<Path>, options: IndexOptions, source: &impl Source<O>) -> Self {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source + Sync),
|
||||
) -> Self {
|
||||
let remapped = RemappedCollection::from_source(source);
|
||||
from_nothing(path, options, &remapped)
|
||||
}
|
||||
@ -35,6 +39,10 @@ impl<O: Op> IvfResidual<O> {
|
||||
open(path)
|
||||
}
|
||||
|
||||
pub fn dims(&self) -> u32 {
|
||||
self.storage.dims()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
self.storage.len()
|
||||
}
|
||||
@ -89,7 +97,7 @@ impl<O: Op> IvfResidual<O> {
|
||||
fn from_nothing<O: Op>(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
collection: &impl Collection<O>,
|
||||
collection: &(impl Vectors<Owned<O>> + Collection + Sync),
|
||||
) -> IvfResidual<O> {
|
||||
create_dir(path.as_ref()).unwrap();
|
||||
let IvfIndexingOptions {
|
||||
@ -98,7 +106,7 @@ fn from_nothing<O: Op>(
|
||||
residual_quantization: _,
|
||||
quantization: quantization_options,
|
||||
} = options.indexing.clone().unwrap_ivf();
|
||||
let samples = common::sample::sample(collection);
|
||||
let samples = common::sample::sample::<O>(collection);
|
||||
rayon::check();
|
||||
let centroids = k_means(nlist as usize, samples, spherical_centroids);
|
||||
rayon::check();
|
||||
@ -117,7 +125,7 @@ fn from_nothing<O: Op>(
|
||||
let collection = RemappedCollection::from_collection(collection, remap);
|
||||
rayon::check();
|
||||
let storage = O::Storage::create(path.as_ref().join("storage"), &collection);
|
||||
let quantization = Quantization::create(
|
||||
let quantization = Quantization::<O>::create(
|
||||
path.as_ref().join("quantization"),
|
||||
options.vector,
|
||||
quantization_options,
|
||||
|
@ -20,7 +20,11 @@ pub enum Ivf<O: OperatorIvf> {
|
||||
}
|
||||
|
||||
impl<O: OperatorIvf> Ivf<O> {
|
||||
pub fn create(path: impl AsRef<Path>, options: IndexOptions, source: &impl Source<O>) -> Self {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source + Sync),
|
||||
) -> Self {
|
||||
let IvfIndexingOptions {
|
||||
quantization: quantization_options,
|
||||
residual_quantization,
|
||||
@ -54,6 +58,13 @@ impl<O: OperatorIvf> Ivf<O> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dims(&self) -> u32 {
|
||||
match self {
|
||||
Ivf::Naive(x) => x.dims(),
|
||||
Ivf::Residual(x) => x.dims(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
match self {
|
||||
Ivf::Naive(x) => x.len(),
|
||||
|
30
crates/pyvectors/Cargo.toml
Normal file
30
crates/pyvectors/Cargo.toml
Normal file
@ -0,0 +1,30 @@
|
||||
[package]
|
||||
name = "pyvectors"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[lib]
|
||||
name = "pyvectors"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
num-traits.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
validator.workspace = true
|
||||
|
||||
# python
|
||||
ndarray = { version = "0.15" }
|
||||
numpy = { version = "0.21" }
|
||||
pyo3 = { version = "0.21", features = ["extension-module"] }
|
||||
pythonize = "0.21"
|
||||
|
||||
base = { path = "../base" }
|
||||
detect = { path = "../detect" }
|
||||
indexing = { path = "../indexing" }
|
||||
stoppable_rayon = { path = "../stoppable_rayon" }
|
||||
|
||||
[lints]
|
||||
rust.unsafe_op_in_unsafe_fn = "allow"
|
||||
rust.unused_lifetimes = "warn"
|
||||
rust.unused_qualifications = "warn"
|
9
crates/pyvectors/pyproject.toml
Normal file
9
crates/pyvectors/pyproject.toml
Normal file
@ -0,0 +1,9 @@
|
||||
[build-system]
|
||||
requires = ["maturin>=1,<2"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[tool.maturin]
|
||||
module-name = "vectors"
|
||||
|
||||
[project]
|
||||
name = "vectors"
|
41
crates/pyvectors/src/dataset.rs
Normal file
41
crates/pyvectors/src/dataset.rs
Normal file
@ -0,0 +1,41 @@
|
||||
use base::scalar::F32;
|
||||
use base::search::Vectors;
|
||||
use base::vector::{Vecf32Borrowed, Vecf32Owned};
|
||||
use ndarray::{s, ArrayView2};
|
||||
|
||||
pub struct Dataset<'a> {
|
||||
underlying: ArrayView2<'a, f32>,
|
||||
}
|
||||
|
||||
impl<'a> Dataset<'a> {
|
||||
pub fn new(dataset: ArrayView2<'a, f32>) -> Self {
|
||||
assert!(1 <= dataset.dim().1 && dataset.dim().1 <= 65535);
|
||||
assert!(dataset.dim().1 <= u32::MAX as usize);
|
||||
assert!(dataset.dim().0 <= u32::MAX as usize);
|
||||
Self {
|
||||
underlying: dataset,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Vectors<Vecf32Owned> for Dataset<'a> {
|
||||
fn dims(&self) -> u32 {
|
||||
self.underlying.dim().1 as _
|
||||
}
|
||||
|
||||
fn len(&self) -> u32 {
|
||||
self.underlying.dim().0 as _
|
||||
}
|
||||
|
||||
fn vector(&self, i: u32) -> Vecf32Borrowed<'_> {
|
||||
let s = self
|
||||
.underlying
|
||||
.slice(s!(i as usize, ..))
|
||||
.to_slice()
|
||||
.expect("memory is non continuous");
|
||||
fn cast(x: &[f32]) -> &[F32] {
|
||||
unsafe { std::mem::transmute(x) }
|
||||
}
|
||||
Vecf32Borrowed::new(cast(s))
|
||||
}
|
||||
}
|
122
crates/pyvectors/src/indexing.rs
Normal file
122
crates/pyvectors/src/indexing.rs
Normal file
@ -0,0 +1,122 @@
|
||||
use base::distance::DistanceKind;
|
||||
use base::index::{IndexOptions, SearchOptions};
|
||||
use base::operator::*;
|
||||
use base::scalar::F32;
|
||||
use base::search::{Collection, Element, Pointer, Source, Vectors};
|
||||
use base::vector::*;
|
||||
use std::path::Path;
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub enum Indexing {
|
||||
Vecf32Dot(indexing::SealedIndexing<Vecf32Dot>),
|
||||
Vecf32L2(indexing::SealedIndexing<Vecf32L2>),
|
||||
Vecf16Dot(indexing::SealedIndexing<Vecf16Dot>),
|
||||
Vecf16L2(indexing::SealedIndexing<Vecf16L2>),
|
||||
BVectorDot(indexing::SealedIndexing<BVectorDot>),
|
||||
BVectorHamming(indexing::SealedIndexing<BVectorHamming>),
|
||||
BVectorJaccard(indexing::SealedIndexing<BVectorJaccard>),
|
||||
SVecf32Dot(indexing::SealedIndexing<SVecf32Dot>),
|
||||
SVecf32L2(indexing::SealedIndexing<SVecf32L2>),
|
||||
}
|
||||
|
||||
impl Indexing {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
index_options: IndexOptions,
|
||||
source: impl Vectors<Vecf32Owned> + Collection + Source + Sync,
|
||||
) -> Self {
|
||||
let path = path.as_ref();
|
||||
match (index_options.vector.v, index_options.vector.d) {
|
||||
(VectorKind::Vecf32, DistanceKind::L2) => Self::Vecf32L2(
|
||||
stoppable_rayon::ThreadPoolBuilder::new()
|
||||
.build_scoped(|pool| {
|
||||
pool.install(|| {
|
||||
let x = indexing::SealedIndexing::create(
|
||||
&path,
|
||||
index_options.clone(),
|
||||
&source,
|
||||
);
|
||||
// write options
|
||||
std::fs::write(
|
||||
path.join(".index_options"),
|
||||
serde_json::to_string(&index_options).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
x
|
||||
})
|
||||
})
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
),
|
||||
(VectorKind::Vecf32, DistanceKind::Dot) => Self::Vecf32Dot(
|
||||
stoppable_rayon::ThreadPoolBuilder::new()
|
||||
.build_scoped(|pool| {
|
||||
pool.install(|| {
|
||||
let x = indexing::SealedIndexing::create(
|
||||
&path,
|
||||
index_options.clone(),
|
||||
&source,
|
||||
);
|
||||
// write options
|
||||
std::fs::write(
|
||||
path.join(".index_options"),
|
||||
serde_json::to_string(&index_options).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
x
|
||||
})
|
||||
})
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
pub fn open(path: impl AsRef<Path>) -> Self {
|
||||
let path = path.as_ref();
|
||||
// read options
|
||||
let index_options: IndexOptions =
|
||||
serde_json::from_slice(&std::fs::read(path.join(".index_options")).unwrap()).unwrap();
|
||||
match (index_options.vector.v, index_options.vector.d) {
|
||||
(VectorKind::Vecf32, DistanceKind::L2) => {
|
||||
Self::Vecf32L2(indexing::SealedIndexing::open(path, index_options))
|
||||
}
|
||||
(VectorKind::Vecf32, DistanceKind::Dot) => {
|
||||
Self::Vecf32Dot(indexing::SealedIndexing::open(path, index_options))
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
pub fn vbase<'a>(
|
||||
&'a self,
|
||||
vector: BorrowedVector<'a>,
|
||||
opts: &'a SearchOptions,
|
||||
) -> impl Iterator<Item = (F32, Pointer)> + 'a {
|
||||
match (self, vector) {
|
||||
(Self::Vecf32L2(x), BorrowedVector::Vecf32(vector)) => x.vbase(vector, opts),
|
||||
(Self::Vecf32Dot(x), BorrowedVector::Vecf32(vector)) => x.vbase(vector, opts),
|
||||
(Self::Vecf16Dot(x), BorrowedVector::Vecf16(vector)) => x.vbase(vector, opts),
|
||||
(Self::Vecf16L2(x), BorrowedVector::Vecf16(vector)) => x.vbase(vector, opts),
|
||||
(Self::BVectorDot(x), BorrowedVector::BVector(vector)) => x.vbase(vector, opts),
|
||||
(Self::BVectorHamming(x), BorrowedVector::BVector(vector)) => x.vbase(vector, opts),
|
||||
(Self::BVectorJaccard(x), BorrowedVector::BVector(vector)) => x.vbase(vector, opts),
|
||||
(Self::SVecf32Dot(x), BorrowedVector::SVecf32(vector)) => x.vbase(vector, opts),
|
||||
(Self::SVecf32L2(x), BorrowedVector::SVecf32(vector)) => x.vbase(vector, opts),
|
||||
_ => panic!("invalid vector type"),
|
||||
}
|
||||
.map(|Element { distance, payload }| (distance, payload.0.pointer()))
|
||||
}
|
||||
pub fn dims(&self) -> u32 {
|
||||
match self {
|
||||
Indexing::Vecf32Dot(x) => x.dims(),
|
||||
Indexing::Vecf32L2(x) => x.dims(),
|
||||
Indexing::Vecf16Dot(x) => x.dims(),
|
||||
Indexing::Vecf16L2(x) => x.dims(),
|
||||
Indexing::BVectorDot(x) => x.dims(),
|
||||
Indexing::BVectorHamming(x) => x.dims(),
|
||||
Indexing::BVectorJaccard(x) => x.dims(),
|
||||
Indexing::SVecf32Dot(x) => x.dims(),
|
||||
Indexing::SVecf32L2(x) => x.dims(),
|
||||
}
|
||||
}
|
||||
}
|
117
crates/pyvectors/src/lib.rs
Normal file
117
crates/pyvectors/src/lib.rs
Normal file
@ -0,0 +1,117 @@
|
||||
mod dataset;
|
||||
mod indexing;
|
||||
mod with_labels;
|
||||
|
||||
use base::distance::DistanceKind;
|
||||
use base::index::*;
|
||||
use base::search::Vectors;
|
||||
use base::vector::{BorrowedVector, VectorKind};
|
||||
use dataset::Dataset;
|
||||
use ndarray::{Array1, Array2};
|
||||
use numpy::{IntoPyArray, PyArray2, PyReadonlyArray1, PyReadonlyArray2};
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyDict;
|
||||
use std::path::PathBuf;
|
||||
use validator::Validate;
|
||||
use with_labels::WithLabels;
|
||||
|
||||
#[pymodule]
|
||||
fn vectors(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
detect::init();
|
||||
m.add_class::<Indexing>()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
pub struct Indexing(indexing::Indexing);
|
||||
|
||||
#[pymethods]
|
||||
impl Indexing {
|
||||
#[staticmethod]
|
||||
#[pyo3(signature = (path, distance, dims, dataset, labels, **indexing_options))]
|
||||
pub fn create(
|
||||
path: &str,
|
||||
distance: &str,
|
||||
dims: u32,
|
||||
dataset: PyReadonlyArray2<'_, f32>,
|
||||
labels: PyReadonlyArray1<'_, i64>,
|
||||
indexing_options: Option<Bound<'_, PyDict>>,
|
||||
) -> Self {
|
||||
// path
|
||||
let path = PathBuf::from(path);
|
||||
assert_eq!(std::fs::exists(&path).ok(), Some(false), "file exists");
|
||||
// distance, dims
|
||||
assert!(matches!(dims, 1..=65535));
|
||||
let vector_options = VectorOptions {
|
||||
dims,
|
||||
v: VectorKind::Vecf32,
|
||||
d: match distance {
|
||||
"dot" => DistanceKind::Dot,
|
||||
"l2" => DistanceKind::L2,
|
||||
"hamming" => DistanceKind::Hamming,
|
||||
"jaccard" => DistanceKind::Jaccard,
|
||||
_ => unimplemented!("distance type {distance} is not implemented"),
|
||||
},
|
||||
};
|
||||
vector_options.validate().expect("not valid vector options");
|
||||
// dataset
|
||||
let dataset = dataset.as_array();
|
||||
assert!(dataset.dim().1 == dims as usize, "bad dataset");
|
||||
let dataset = Dataset::new(dataset);
|
||||
let source = WithLabels::new(
|
||||
dataset,
|
||||
labels.as_slice().expect("memory is non continuous"),
|
||||
);
|
||||
// indexing_options
|
||||
let indexing_options: IndexingOptions = indexing_options
|
||||
.map(|obj| pythonize::depythonize_bound(obj.into_any()).expect("failed to deserialize"))
|
||||
.unwrap_or_default();
|
||||
let index_options = IndexOptions {
|
||||
vector: vector_options,
|
||||
indexing: indexing_options,
|
||||
};
|
||||
index_options.validate().expect("not valid index options");
|
||||
// build
|
||||
Self(indexing::Indexing::create(
|
||||
&path,
|
||||
index_options.clone(),
|
||||
source,
|
||||
))
|
||||
}
|
||||
#[staticmethod]
|
||||
pub fn open(path: &str) -> Self {
|
||||
Self(indexing::Indexing::open(path))
|
||||
}
|
||||
#[pyo3(signature = (dataset, k, **search_options))]
|
||||
pub fn search<'py>(
|
||||
&self,
|
||||
py: Python<'py>,
|
||||
dataset: PyReadonlyArray2<'py, f32>,
|
||||
k: u32,
|
||||
search_options: Option<Bound<'py, PyDict>>,
|
||||
) -> (Bound<'py, PyArray2<f32>>, Bound<'py, PyArray2<i64>>) {
|
||||
// dataset
|
||||
let dataset = dataset.as_array();
|
||||
assert!(dataset.dim().1 == self.0.dims() as usize, "bad dataset");
|
||||
let dataset = Dataset::new(dataset);
|
||||
// search_options
|
||||
let search_options: SearchOptions = search_options
|
||||
.map(|obj| pythonize::depythonize_bound(obj.into_any()).expect("failed to deserialize"))
|
||||
.unwrap_or_default();
|
||||
// results
|
||||
let mut d = Array2::zeros((0, k as usize));
|
||||
let mut l = Array2::zeros((0, k as usize));
|
||||
for i in 0..dataset.len() {
|
||||
let (distances, labels) = self
|
||||
.0
|
||||
.vbase(BorrowedVector::Vecf32(dataset.vector(i)), &search_options)
|
||||
.map(|(distance, label)| (distance.0, label.as_u64() as i64))
|
||||
.chain(std::iter::repeat((f32::INFINITY, i64::MAX)))
|
||||
.take(k as usize)
|
||||
.unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
d.push_row(Array1::from_vec(distances).view()).unwrap();
|
||||
l.push_row(Array1::from_vec(labels).view()).unwrap();
|
||||
}
|
||||
(d.into_pyarray_bound(py), l.into_pyarray_bound(py))
|
||||
}
|
||||
}
|
48
crates/pyvectors/src/with_labels.rs
Normal file
48
crates/pyvectors/src/with_labels.rs
Normal file
@ -0,0 +1,48 @@
|
||||
use base::search::{Collection, Payload, Pointer, Source, Vectors};
|
||||
use base::vector::*;
|
||||
|
||||
pub struct WithLabels<'a, T> {
|
||||
dataset: T,
|
||||
labels: &'a [i64],
|
||||
}
|
||||
|
||||
impl<'a, T: Vectors<Vecf32Owned>> WithLabels<'a, T> {
|
||||
pub fn new(dataset: T, labels: &'a [i64]) -> Self {
|
||||
assert!(dataset.len() as usize == labels.len());
|
||||
Self { dataset, labels }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, V: VectorOwned, T: Vectors<V>> Vectors<V> for WithLabels<'a, T> {
|
||||
fn dims(&self) -> u32 {
|
||||
self.dataset.dims()
|
||||
}
|
||||
|
||||
fn len(&self) -> u32 {
|
||||
self.dataset.len()
|
||||
}
|
||||
|
||||
fn vector(&self, i: u32) -> V::Borrowed<'_> {
|
||||
self.dataset.vector(i)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Collection for WithLabels<'_, T> {
|
||||
fn payload(&self, i: u32) -> Payload {
|
||||
Payload::new(Pointer::new(self.labels[i as usize] as u64), 0)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Source for WithLabels<'_, T> {
|
||||
fn get_main<X: std::any::Any>(&self) -> Option<&X> {
|
||||
None
|
||||
}
|
||||
|
||||
fn get_main_len(&self) -> u32 {
|
||||
0
|
||||
}
|
||||
|
||||
fn check_existing(&self, _: u32) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
@ -45,7 +45,7 @@ impl<O: OperatorQuantization> Quantizer<O> {
|
||||
pub fn train(
|
||||
vector_options: VectorOptions,
|
||||
quantization_options: QuantizationOptions,
|
||||
vectors: &impl Vectors<O>,
|
||||
vectors: &(impl Vectors<Owned<O>> + Sync),
|
||||
transform: impl Fn(Borrowed<'_, O>) -> Owned<O> + Copy + Send + Sync,
|
||||
) -> Self {
|
||||
use QuantizationOptions::*;
|
||||
@ -91,7 +91,7 @@ impl<O: OperatorQuantization> Quantization<O> {
|
||||
path: impl AsRef<Path>,
|
||||
vector_options: VectorOptions,
|
||||
quantization_options: QuantizationOptions,
|
||||
vectors: &impl Vectors<O>,
|
||||
vectors: &(impl Vectors<Owned<O>> + Sync),
|
||||
transform: impl Fn(Borrowed<'_, O>) -> Owned<O> + Copy + Send + Sync,
|
||||
) -> Self {
|
||||
std::fs::create_dir(path.as_ref()).unwrap();
|
||||
@ -240,7 +240,7 @@ impl<O: OperatorQuantization> Quantization<O> {
|
||||
|
||||
pub fn process(
|
||||
&self,
|
||||
vectors: &impl Vectors<O>,
|
||||
vectors: &impl Vectors<Owned<O>>,
|
||||
preprocessed: &QuantizationPreprocessed<O>,
|
||||
u: u32,
|
||||
) -> F32 {
|
||||
|
@ -33,7 +33,7 @@ impl<O: OperatorProductQuantization> ProductQuantizer<O> {
|
||||
pub fn train(
|
||||
vector_options: VectorOptions,
|
||||
product_quantization_options: ProductQuantizationOptions,
|
||||
vectors: &impl Vectors<O>,
|
||||
vectors: &(impl Vectors<Owned<O>> + Sync),
|
||||
transform: impl Fn(Borrowed<'_, O>) -> Owned<O> + Copy + Send + Sync,
|
||||
) -> Self {
|
||||
let dims = vector_options.dims;
|
||||
@ -46,7 +46,7 @@ impl<O: OperatorProductQuantization> ProductQuantizer<O> {
|
||||
let subdims = std::cmp::min(ratio, dims - ratio * p);
|
||||
let start = (p * ratio) as usize;
|
||||
let end = start + subdims as usize;
|
||||
let subsamples = sample_subvector_transform(vectors, start, end, transform);
|
||||
let subsamples = sample_subvector_transform::<O>(vectors, start, end, transform);
|
||||
k_means(1 << bits, subsamples, false)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
@ -33,7 +33,7 @@ impl<O: OperatorScalarQuantization> ScalarQuantizer<O> {
|
||||
pub fn train(
|
||||
vector_options: VectorOptions,
|
||||
scalar_quantization_options: ScalarQuantizationOptions,
|
||||
vectors: &impl Vectors<O>,
|
||||
vectors: &impl Vectors<Owned<O>>,
|
||||
transform: impl Fn(Borrowed<'_, O>) -> Owned<O> + Copy,
|
||||
) -> Self {
|
||||
let dims = vector_options.dims;
|
||||
|
@ -26,7 +26,7 @@ impl<O: OperatorTrivialQuantization> TrivialQuantizer<O> {
|
||||
pub fn train(
|
||||
vector_options: VectorOptions,
|
||||
_: TrivialQuantizationOptions,
|
||||
_: &impl Vectors<O>,
|
||||
_: &impl Vectors<Owned<O>>,
|
||||
_: impl Fn(Borrowed<'_, O>) -> Owned<O> + Copy,
|
||||
) -> Self {
|
||||
Self {
|
||||
|
@ -11,7 +11,7 @@ use crate::operator::OperatorRabitq as Op;
|
||||
use crate::quant::quantization::Quantization;
|
||||
use base::always_equal::AlwaysEqual;
|
||||
use base::index::{IndexOptions, RabitqIndexingOptions, SearchOptions};
|
||||
use base::operator::Borrowed;
|
||||
use base::operator::{Borrowed, Owned};
|
||||
use base::scalar::F32;
|
||||
use base::search::RerankerPop;
|
||||
use base::search::{Collection, Element, Payload, Source, Vectors};
|
||||
@ -35,7 +35,11 @@ pub struct Rabitq<O: Op> {
|
||||
}
|
||||
|
||||
impl<O: Op> Rabitq<O> {
|
||||
pub fn create(path: impl AsRef<Path>, options: IndexOptions, source: &impl Source<O>) -> Self {
|
||||
pub fn create(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
source: &(impl Vectors<Owned<O>> + Collection + Source),
|
||||
) -> Self {
|
||||
let remapped = RemappedCollection::from_source(source);
|
||||
from_nothing(path, options, &remapped)
|
||||
}
|
||||
@ -44,6 +48,10 @@ impl<O: Op> Rabitq<O> {
|
||||
open(path)
|
||||
}
|
||||
|
||||
pub fn dims(&self) -> u32 {
|
||||
self.storage.dims()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
self.storage.len()
|
||||
}
|
||||
@ -96,7 +104,7 @@ impl<O: Op> Rabitq<O> {
|
||||
fn from_nothing<O: Op>(
|
||||
path: impl AsRef<Path>,
|
||||
options: IndexOptions,
|
||||
collection: &impl Collection<O>,
|
||||
collection: &(impl Vectors<Owned<O>> + Collection),
|
||||
) -> Rabitq<O> {
|
||||
create_dir(path.as_ref()).unwrap();
|
||||
let RabitqIndexingOptions { nlist } = options.indexing.clone().unwrap_rabitq();
|
||||
@ -119,7 +127,7 @@ fn from_nothing<O: Op>(
|
||||
}
|
||||
projection
|
||||
};
|
||||
let samples = common::sample::sample_cast(collection);
|
||||
let samples = common::sample::sample_cast::<O>(collection);
|
||||
rayon::check();
|
||||
let centroids: Vec2<F32> = k_means(nlist as usize, samples, false);
|
||||
rayon::check();
|
||||
|
@ -1,5 +1,4 @@
|
||||
use crate::Storage;
|
||||
use base::operator::Operator;
|
||||
use base::search::*;
|
||||
use base::vector::*;
|
||||
use common::json::Json;
|
||||
@ -12,7 +11,7 @@ pub struct BVectorStorage {
|
||||
slice: MmapArray<u64>,
|
||||
}
|
||||
|
||||
impl<O: Operator<VectorOwned = BVectorOwned>> Vectors<O> for BVectorStorage {
|
||||
impl Vectors<BVectorOwned> for BVectorStorage {
|
||||
fn dims(&self) -> u32 {
|
||||
*self.dims
|
||||
}
|
||||
@ -29,8 +28,8 @@ impl<O: Operator<VectorOwned = BVectorOwned>> Vectors<O> for BVectorStorage {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Operator<VectorOwned = BVectorOwned>> Storage<O> for BVectorStorage {
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<O>) -> Self {
|
||||
impl Storage<BVectorOwned> for BVectorStorage {
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<BVectorOwned>) -> Self {
|
||||
std::fs::create_dir(path.as_ref()).unwrap();
|
||||
let dims = Json::create(path.as_ref().join("dims"), vectors.dims());
|
||||
let len = Json::create(path.as_ref().join("len"), vectors.len());
|
||||
|
@ -5,15 +5,16 @@ mod vec;
|
||||
use base::operator::*;
|
||||
use base::scalar::*;
|
||||
use base::search::*;
|
||||
use base::vector::VectorOwned;
|
||||
use std::path::Path;
|
||||
|
||||
pub trait Storage<O: Operator>: Vectors<O> {
|
||||
pub trait Storage<V: VectorOwned>: Vectors<V> {
|
||||
fn open(path: impl AsRef<Path>) -> Self;
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<O>) -> Self;
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<V>) -> Self;
|
||||
}
|
||||
|
||||
pub trait OperatorStorage: Operator {
|
||||
type Storage: Storage<Self> + Send + Sync;
|
||||
type Storage: Storage<Owned<Self>> + Send + Sync;
|
||||
}
|
||||
|
||||
impl OperatorStorage for SVecf32Dot {
|
||||
|
@ -1,5 +1,4 @@
|
||||
use crate::Storage;
|
||||
use base::operator::Operator;
|
||||
use base::scalar::*;
|
||||
use base::search::*;
|
||||
use base::vector::*;
|
||||
@ -15,7 +14,7 @@ pub struct SVecStorage {
|
||||
offsets: MmapArray<usize>,
|
||||
}
|
||||
|
||||
impl<O: Operator<VectorOwned = SVecf32Owned>> Vectors<O> for SVecStorage {
|
||||
impl Vectors<SVecf32Owned> for SVecStorage {
|
||||
fn dims(&self) -> u32 {
|
||||
*self.dims
|
||||
}
|
||||
@ -33,8 +32,8 @@ impl<O: Operator<VectorOwned = SVecf32Owned>> Vectors<O> for SVecStorage {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Operator<VectorOwned = SVecf32Owned>> Storage<O> for SVecStorage {
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<O>) -> Self {
|
||||
impl Storage<SVecf32Owned> for SVecStorage {
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<SVecf32Owned>) -> Self {
|
||||
std::fs::create_dir(path.as_ref()).unwrap();
|
||||
let dims = Json::create(path.as_ref().join("dims"), vectors.dims());
|
||||
let len = Json::create(path.as_ref().join("len"), vectors.len());
|
||||
|
@ -1,5 +1,4 @@
|
||||
use crate::Storage;
|
||||
use base::operator::Operator;
|
||||
use base::scalar::*;
|
||||
use base::search::*;
|
||||
use base::vector::*;
|
||||
@ -13,7 +12,7 @@ pub struct VecStorage<T> {
|
||||
slice: MmapArray<T>,
|
||||
}
|
||||
|
||||
impl<O: Operator<VectorOwned = Vecf32Owned>> Vectors<O> for VecStorage<F32> {
|
||||
impl Vectors<Vecf32Owned> for VecStorage<F32> {
|
||||
fn dims(&self) -> u32 {
|
||||
*self.dims
|
||||
}
|
||||
@ -29,8 +28,8 @@ impl<O: Operator<VectorOwned = Vecf32Owned>> Vectors<O> for VecStorage<F32> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Operator<VectorOwned = Vecf32Owned>> Storage<O> for VecStorage<F32> {
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<O>) -> Self {
|
||||
impl Storage<Vecf32Owned> for VecStorage<F32> {
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<Vecf32Owned>) -> Self {
|
||||
std::fs::create_dir(path.as_ref()).unwrap();
|
||||
let dims = Json::create(path.as_ref().join("dims"), vectors.dims());
|
||||
let len = Json::create(path.as_ref().join("len"), vectors.len());
|
||||
@ -49,7 +48,7 @@ impl<O: Operator<VectorOwned = Vecf32Owned>> Storage<O> for VecStorage<F32> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Operator<VectorOwned = Vecf16Owned>> Vectors<O> for VecStorage<F16> {
|
||||
impl Vectors<Vecf16Owned> for VecStorage<F16> {
|
||||
fn dims(&self) -> u32 {
|
||||
*self.dims
|
||||
}
|
||||
@ -65,8 +64,8 @@ impl<O: Operator<VectorOwned = Vecf16Owned>> Vectors<O> for VecStorage<F16> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Operator<VectorOwned = Vecf16Owned>> Storage<O> for VecStorage<F16> {
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<O>) -> Self {
|
||||
impl Storage<Vecf16Owned> for VecStorage<F16> {
|
||||
fn create(path: impl AsRef<Path>, vectors: &impl Vectors<Vecf16Owned>) -> Self {
|
||||
std::fs::create_dir(path.as_ref()).unwrap();
|
||||
let dims = Json::create(path.as_ref().join("dims"), vectors.dims());
|
||||
let len = Json::create(path.as_ref().join("len"), vectors.len());
|
||||
|
@ -13,7 +13,7 @@ rm -rf ./build/vectors-pg${VERSION}_${SEMVER}_${PLATFORM}.deb
|
||||
|
||||
mkdir -p ./build/dir_zip
|
||||
cp -a ./sql/upgrade/. ./build/dir_zip/
|
||||
cp ./target/vectors--$SEMVER.sql ./build/dir_zip/vectors--$SEMVER.sql
|
||||
cp ./target/schema.sql ./build/dir_zip/vectors--$SEMVER.sql
|
||||
sed -e "s/@CARGO_VERSION@/$SEMVER/g" < ./vectors.control > ./build/dir_zip/vectors.control
|
||||
cp ./target/${ARCH}-unknown-linux-gnu/release/libvectors.so ./build/dir_zip/vectors.so
|
||||
zip ./build/vectors-pg${VERSION}_${ARCH}-unknown-linux-gnu_${SEMVER}.zip -j ./build/dir_zip/*
|
||||
|
@ -1,31 +1,44 @@
|
||||
use base::index::*;
|
||||
use pgrx::guc::{GucContext, GucFlags, GucRegistry, GucSetting};
|
||||
|
||||
static FLAT_SQ_RERANK_SIZE: GucSetting<i32> = GucSetting::<i32>::new(0);
|
||||
static FLAT_SQ_RERANK_SIZE: GucSetting<i32> =
|
||||
GucSetting::<i32>::new(SearchOptions::default_flat_sq_rerank_size() as i32);
|
||||
|
||||
static FLAT_SQ_FAST_SCAN: GucSetting<bool> = GucSetting::<bool>::new(false);
|
||||
static FLAT_SQ_FAST_SCAN: GucSetting<bool> =
|
||||
GucSetting::<bool>::new(SearchOptions::default_flat_sq_fast_scan());
|
||||
|
||||
static FLAT_PQ_RERANK_SIZE: GucSetting<i32> = GucSetting::<i32>::new(0);
|
||||
static FLAT_PQ_RERANK_SIZE: GucSetting<i32> =
|
||||
GucSetting::<i32>::new(SearchOptions::default_flat_pq_rerank_size() as i32);
|
||||
|
||||
static FLAT_PQ_FAST_SCAN: GucSetting<bool> = GucSetting::<bool>::new(false);
|
||||
static FLAT_PQ_FAST_SCAN: GucSetting<bool> =
|
||||
GucSetting::<bool>::new(SearchOptions::default_flat_pq_fast_scan());
|
||||
|
||||
static IVF_SQ_RERANK_SIZE: GucSetting<i32> = GucSetting::<i32>::new(0);
|
||||
static IVF_SQ_RERANK_SIZE: GucSetting<i32> =
|
||||
GucSetting::<i32>::new(SearchOptions::default_ivf_sq_rerank_size() as i32);
|
||||
|
||||
static IVF_SQ_FAST_SCAN: GucSetting<bool> = GucSetting::<bool>::new(false);
|
||||
static IVF_SQ_FAST_SCAN: GucSetting<bool> =
|
||||
GucSetting::<bool>::new(SearchOptions::default_ivf_sq_fast_scan());
|
||||
|
||||
static IVF_PQ_RERANK_SIZE: GucSetting<i32> = GucSetting::<i32>::new(0);
|
||||
static IVF_PQ_RERANK_SIZE: GucSetting<i32> =
|
||||
GucSetting::<i32>::new(SearchOptions::default_ivf_pq_rerank_size() as i32);
|
||||
|
||||
static IVF_PQ_FAST_SCAN: GucSetting<bool> = GucSetting::<bool>::new(false);
|
||||
static IVF_PQ_FAST_SCAN: GucSetting<bool> =
|
||||
GucSetting::<bool>::new(SearchOptions::default_ivf_pq_fast_scan());
|
||||
|
||||
static IVF_NPROBE: GucSetting<i32> = GucSetting::<i32>::new(10);
|
||||
static IVF_NPROBE: GucSetting<i32> =
|
||||
GucSetting::<i32>::new(SearchOptions::default_ivf_nprobe() as i32);
|
||||
|
||||
static HNSW_EF_SEARCH: GucSetting<i32> = GucSetting::<i32>::new(100);
|
||||
static HNSW_EF_SEARCH: GucSetting<i32> =
|
||||
GucSetting::<i32>::new(SearchOptions::default_hnsw_ef_search() as i32);
|
||||
|
||||
static RABITQ_NPROBE: GucSetting<i32> = GucSetting::<i32>::new(10);
|
||||
static RABITQ_NPROBE: GucSetting<i32> =
|
||||
GucSetting::<i32>::new(SearchOptions::default_rabitq_nprobe() as i32);
|
||||
|
||||
static RABITQ_FAST_SCAN: GucSetting<bool> = GucSetting::<bool>::new(true);
|
||||
static RABITQ_FAST_SCAN: GucSetting<bool> =
|
||||
GucSetting::<bool>::new(SearchOptions::default_rabitq_fast_scan());
|
||||
|
||||
static DISKANN_EF_SEARCH: GucSetting<i32> = GucSetting::<i32>::new(100);
|
||||
static DISKANN_EF_SEARCH: GucSetting<i32> =
|
||||
GucSetting::<i32>::new(SearchOptions::default_diskann_ef_search() as i32);
|
||||
|
||||
pub unsafe fn init() {
|
||||
GucRegistry::define_int_guc(
|
||||
|
@ -10,11 +10,10 @@ if [[ " $@ " =~ --target' '([^ ]+) ]]; then
|
||||
DIR="./target/$TARGET/debug"
|
||||
fi
|
||||
else
|
||||
TARGET=""
|
||||
if [[ " $@ " =~ " --release " ]]; then
|
||||
DIR="./target/release"
|
||||
elif [[ " $@ " =~ " --profile opt " ]]; then
|
||||
DIR="./target/$TARGET/opt"
|
||||
DIR="./target/opt"
|
||||
else
|
||||
DIR="./target/debug"
|
||||
fi
|
||||
@ -43,6 +42,6 @@ code=$(mktemp)
|
||||
chmod 700 $code
|
||||
CONTROL_FILEPATH="./vectors.control" SO_FILEPATH="$DIR/libvectors.so" $(dirname "$0")/schema-codegen.sh >> $code
|
||||
|
||||
PGRX_EMBED=$code cargo rustc --bin pgrx_embed_vectors "$@" -- --cfg pgrx_embed
|
||||
PGRX_EMBED=$code cargo rustc --package pgvectors --bin pgrx_embed_pgvectors "$@" -- --cfg pgrx_embed
|
||||
|
||||
CARGO_PKG_VERSION="0.0.0" QEMU_LD_PREFIX=$QEMU_LD_PREFIX "${RUNNER[@]}" "$DIR/pgrx_embed_vectors" | expand -t 4
|
||||
CARGO_PKG_VERSION="0.0.0" QEMU_LD_PREFIX=$QEMU_LD_PREFIX "${RUNNER[@]}" "$DIR/pgrx_embed_pgvectors" | expand -t 4
|
||||
|
Loading…
x
Reference in New Issue
Block a user