From 7b9ae2eef0701e17e60e27572ac69c5cdb58f201 Mon Sep 17 00:00:00 2001 From: Usamoi Date: Mon, 19 Feb 2024 17:36:16 +0800 Subject: [PATCH 1/6] refactor: define detailed error types (#356) * refactor: define detailed error types Signed-off-by: usamoi * do not check version in _vectors_main Signed-off-by: usamoi * rename NORMAL to STARTED Signed-off-by: usamoi * fix MacOS and FreeBSD implementation Signed-off-by: usamoi * fix 512KB memory leak for each session Signed-off-by: usamoi * remove skip on delete_threshold for fixing bincode Signed-off-by: usamoi * add WorkerOperations and InstanceViewOperations for name conflicts Signed-off-by: usamoi --------- Signed-off-by: usamoi --- Cargo.lock | 309 +++++++----- Cargo.toml | 50 +- crates/c/Cargo.toml | 4 +- crates/detect/Cargo.toml | 2 +- crates/interprocess-atomic-wait/Cargo.toml | 16 + crates/interprocess-atomic-wait/src/lib.rs | 91 ++++ crates/memfd/Cargo.toml | 15 + crates/memfd/src/lib.rs | 70 +++ crates/send_fd/Cargo.toml | 15 + .../send_fd/src/lib.rs | 6 +- crates/service/Cargo.toml | 35 +- crates/service/src/algorithms/vamana.rs.txt | 456 ------------------ crates/service/src/index/mod.rs | 54 ++- crates/service/src/index/optimizing/mod.rs | 8 +- crates/service/src/instance/mod.rs | 85 ++-- crates/service/src/prelude/error.rs | 120 +++-- crates/service/src/prelude/mod.rs | 2 +- crates/service/src/worker/mod.rs | 93 +++- src/bgworker/mod.rs | 49 +- src/bgworker/normal.rs | 213 ++++---- src/bgworker/upgrade.rs | 78 --- src/datatype/casts_f32.rs | 6 +- src/datatype/operators_svecf32.rs | 103 +--- src/datatype/operators_vecf16.rs | 90 +--- src/datatype/operators_vecf32.rs | 90 +--- src/datatype/svecf32.rs | 59 +-- src/datatype/typmod.rs | 19 +- src/datatype/vecf16.rs | 15 +- src/datatype/vecf32.rs | 15 +- src/gucs/executing.rs | 7 +- src/index/am_build.rs | 23 +- src/index/am_scan.rs | 25 +- src/index/am_setup.rs | 4 +- src/index/am_update.rs | 24 +- src/index/functions.rs | 8 +- src/index/hook_transaction.rs | 14 +- src/index/views.rs | 11 +- src/ipc/client/mod.rs | 269 ----------- src/ipc/mod.rs | 335 +++++++++++-- src/ipc/packet/basic.rs | 19 - src/ipc/packet/create.rs | 6 - src/ipc/packet/delete.rs | 6 - src/ipc/packet/drop.rs | 6 - src/ipc/packet/flush.rs | 6 - src/ipc/packet/insert.rs | 6 - src/ipc/packet/list.rs | 19 - src/ipc/packet/mod.rs | 58 --- src/ipc/packet/stat.rs | 7 - src/ipc/packet/upgrade.rs | 6 - src/ipc/packet/vbase.rs | 19 - src/ipc/server/mod.rs | 449 ----------------- src/ipc/transport/mmap.rs | 62 +-- src/ipc/transport/mod.rs | 71 +-- src/ipc/transport/unix.rs | 8 +- src/lib.rs | 2 +- src/prelude/error.rs | 204 +++++--- src/prelude/mod.rs | 2 +- src/sql/finalize.sql | 6 +- src/utils/mod.rs | 2 - src/utils/os.rs | 190 -------- tests/sqllogictest/error.slt | 4 +- 61 files changed, 1476 insertions(+), 2570 deletions(-) create mode 100644 crates/interprocess-atomic-wait/Cargo.toml create mode 100644 crates/interprocess-atomic-wait/src/lib.rs create mode 100644 crates/memfd/Cargo.toml create mode 100644 crates/memfd/src/lib.rs create mode 100644 crates/send_fd/Cargo.toml rename src/utils/file_socket.rs => crates/send_fd/src/lib.rs (97%) delete mode 100644 crates/service/src/algorithms/vamana.rs.txt delete mode 100644 src/bgworker/upgrade.rs delete mode 100644 src/ipc/client/mod.rs delete mode 100644 src/ipc/packet/basic.rs delete mode 100644 src/ipc/packet/create.rs delete mode 100644 src/ipc/packet/delete.rs delete mode 100644 src/ipc/packet/drop.rs delete mode 100644 src/ipc/packet/flush.rs delete mode 100644 src/ipc/packet/insert.rs delete mode 100644 src/ipc/packet/list.rs delete mode 100644 src/ipc/packet/mod.rs delete mode 100644 src/ipc/packet/stat.rs delete mode 100644 src/ipc/packet/upgrade.rs delete mode 100644 src/ipc/packet/vbase.rs delete mode 100644 src/ipc/server/mod.rs delete mode 100644 src/utils/os.rs diff --git a/Cargo.lock b/Cargo.lock index 0ef1ea6..a927291 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,16 +27,58 @@ dependencies = [ ] [[package]] -name = "anstyle" -version = "1.0.5" +name = "anstream" +version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2faccea4cc4ab4a667ce676a30e8ec13922a692c99bb8f5b11f1502c72e04220" +checksum = "96b09b5178381e0874812a9b157f7fe84982617e48f71f4e3235482775e5b540" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] [[package]] name = "anyhow" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" [[package]] name = "arc-swap" @@ -49,9 +91,6 @@ name = "arrayvec" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" -dependencies = [ - "serde", -] [[package]] name = "async-trait" @@ -61,7 +100,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -121,22 +160,22 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.2" +version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c69fae65a523209d34240b60abe0c42d33d1045d445c0839d8a4894a736e2d" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" dependencies = [ "bitflags 2.4.2", "cexpr", "clang-sys", + "itertools", "lazy_static", "lazycell", - "peeking_take_while", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -189,15 +228,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "d32a994c2b3ca201d9b263612a374263f05e7adde37c4707f693dcd375076d1f" [[package]] name = "bytemuck" -version = "1.14.1" +version = "1.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2490600f404f2b94c167e31d3ed1d5f3c225a0f3b80230053b3e0b7b962bd9" +checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" dependencies = [ "bytemuck_derive", ] @@ -210,7 +249,7 @@ checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -282,9 +321,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.18" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" dependencies = [ "clap_builder", "clap_derive", @@ -302,9 +341,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.18" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" dependencies = [ "anstyle", "clap_lex", @@ -312,21 +351,27 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.4.7" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] name = "clap_lex" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "convert_case" @@ -354,9 +399,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] @@ -506,9 +551,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "enum-map" @@ -527,20 +572,30 @@ checksum = "f282cfdfe92516eb26c2af8589c274c7c17681f5ecc03c18255fe741c6aa64eb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", +] + +[[package]] +name = "env_filter" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +dependencies = [ + "log", + "regex", ] [[package]] name = "env_logger" -version = "0.10.2" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" +checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d" dependencies = [ + "anstream", + "anstyle", + "env_filter", "humantime", - "is-terminal", "log", - "regex", - "termcolor", ] [[package]] @@ -638,7 +693,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -756,12 +811,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" -[[package]] -name = "hermit-abi" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" - [[package]] name = "hmac" version = "0.12.1" @@ -811,23 +860,29 @@ checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" [[package]] name = "indexmap" -version = "2.2.2" +version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" dependencies = [ "equivalent", "hashbrown", ] [[package]] -name = "is-terminal" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" +name = "interprocess_atomic_wait" +version = "0.0.0" dependencies = [ - "hermit-abi", - "rustix", - "windows-sys 0.52.0", + "libc", + "ulock-sys", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", ] [[package]] @@ -838,9 +893,9 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "js-sys" -version = "0.3.67" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ "wasm-bindgen", ] @@ -928,6 +983,15 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "memfd" +version = "0.0.0" +dependencies = [ + "detect", + "rand", + "rustix", +] + [[package]] name = "memmap2" version = "0.9.4" @@ -954,9 +1018,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -1015,9 +1079,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", @@ -1073,6 +1137,12 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "pathsearch" version = "0.2.0" @@ -1083,12 +1153,6 @@ dependencies = [ "libc", ] -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - [[package]] name = "percent-encoding" version = "2.3.1" @@ -1097,9 +1161,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.6" +version = "2.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f200d8d83c44a45b21764d1916299752ca035d15ecd46faca3e9a2a2bf6ad06" +checksum = "219c0dcc30b6a27553f9cc242972b67f75b60eb0db71f0b5462f38b058c41546" dependencies = [ "memchr", "thiserror", @@ -1526,7 +1590,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver 1.0.21", + "semver 1.0.22", ] [[package]] @@ -1556,9 +1620,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -1592,9 +1656,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "semver-parser" @@ -1605,6 +1669,15 @@ dependencies = [ "pest", ] +[[package]] +name = "send_fd" +version = "0.0.0" +dependencies = [ + "libc", + "log", + "rustix", +] + [[package]] name = "seq-macro" version = "0.3.5" @@ -1638,7 +1711,7 @@ checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -1666,7 +1739,6 @@ name = "service" version = "0.0.0" dependencies = [ "arc-swap", - "arrayvec", "bincode", "bytemuck", "byteorder", @@ -1679,7 +1751,6 @@ dependencies = [ "libc", "log", "memmap2", - "memoffset", "multiversion", "num-traits", "parking_lot", @@ -1689,7 +1760,6 @@ dependencies = [ "serde", "serde_json", "thiserror", - "ulock-sys", "uuid", "validator", ] @@ -1802,9 +1872,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.48" +version = "2.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "915aea9e586f80826ee59f8453c1101f9d1c4b3964cd2460185ee8e299ada496" dependencies = [ "proc-macro2", "quote", @@ -1840,44 +1910,34 @@ checksum = "cfb5fa503293557c5158bd215fdc225695e567a77e453f5d4452a50a193969bd" [[package]] name = "tempfile" -version = "3.9.0" +version = "3.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", "rustix", "windows-sys 0.52.0", ] -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - [[package]] name = "thiserror" -version = "1.0.56" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -1897,9 +1957,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.35.1" +version = "1.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", "bytes", @@ -1952,9 +2012,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6a4b9e8023eb94392d3dca65d717c53abc5dad49c07cb65bb8fcd87115fa325" +checksum = "9a9aad4a3066010876e8dcf5a8a06e70a558751117a145c6ce2b82c2e2054290" dependencies = [ "serde", "serde_spanned", @@ -1973,9 +2033,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.21.1" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +checksum = "2c1b5fd4128cc8d3e0cb74d4ed9a9cc7c7284becd4df68f5f940e1ad123606f6" dependencies = [ "indexmap", "serde", @@ -2059,9 +2119,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "url" @@ -2074,6 +2134,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "uuid" version = "1.7.0" @@ -2137,13 +2203,18 @@ dependencies = [ "detect", "env_logger", "half 2.3.1", + "interprocess_atomic_wait", "libc", "log", + "memfd", + "memmap2", "num-traits", + "paste", "pgrx", "pgrx-tests", "rand", "rustix", + "send_fd", "serde", "serde_json", "service", @@ -2185,9 +2256,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -2195,24 +2266,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2220,28 +2291,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" [[package]] name = "web-sys" -version = "0.3.67" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" dependencies = [ "js-sys", "wasm-bindgen", @@ -2422,9 +2493,9 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" [[package]] name = "winnow" -version = "0.5.36" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "818ce546a11a9986bc24f93d0cdf38a8a1a400f1473ea8c82e59f6e0ffab9249" +checksum = "d90f4e0f530c4c69f62b80d839e9ef3855edc9cba471a160c4d692deed62b401" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index 9e6deef..1a932d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,25 +14,30 @@ pg16 = ["pgrx/pg16", "pgrx-tests/pg16"] pg_test = [] [dependencies] +arrayvec.workspace = true +bincode.workspace = true +bytemuck.workspace = true +byteorder.workspace = true +half.workspace = true libc.workspace = true log.workspace = true +memmap2.workspace = true +num-traits.workspace = true +paste.workspace = true +rand.workspace = true +rustix.workspace = true serde.workspace = true serde_json.workspace = true -validator.workspace = true -rustix.workspace = true thiserror.workspace = true -byteorder.workspace = true -bincode.workspace = true -half.workspace = true -num-traits.workspace = true -rand.workspace = true -bytemuck.workspace = true -service = { path = "crates/service" } +validator.workspace = true detect = { path = "crates/detect" } +send_fd = { path = "crates/send_fd" } +service = { path = "crates/service" } +interprocess_atomic_wait = { path = "crates/interprocess-atomic-wait" } +memfd = { path = "crates/memfd" } pgrx = { version = "0.11.3", default-features = false, features = [] } -env_logger = "0.10.0" -toml = "0.8.8" -arrayvec = "0.7.4" +env_logger = "0.11.2" +toml = "0.8.10" [dev-dependencies] pgrx-tests = "0.11.3" @@ -58,24 +63,29 @@ version = "0.0.0" edition = "2021" [workspace.dependencies] -libc = "~0.2" -log = "~0.4" -serde = "~1.0" -serde_json = "1" -thiserror = "~1.0" +arrayvec = "~0.7" bincode = "~1.3" -byteorder = "~1.5" bytemuck = { version = "~1.14", features = ["extern_crate_alloc"] } +byteorder = "~1.5" half = { version = "~2.3", features = [ "bytemuck", "num-traits", "serde", "use-intrinsics", + "rand_distr", ] } +libc = "~0.2" +log = "~0.4" +memmap2 = "0.9.4" num-traits = "~0.2" -validator = { version = "~0.16", features = ["derive"] } +paste = "~1.0" +rand = "0.8.5" rustix = { version = "~0.38", features = ["fs", "net", "mm"] } -rand = "~0.8" +serde = "~1.0" +serde_json = "~1.0" +thiserror = "~1.0" +uuid = { version = "1.7.0", features = ["v4", "serde"] } +validator = { version = "~0.16", features = ["derive"] } [profile.dev] panic = "unwind" diff --git a/crates/c/Cargo.toml b/crates/c/Cargo.toml index f0f0274..1c86319 100644 --- a/crates/c/Cargo.toml +++ b/crates/c/Cargo.toml @@ -4,9 +4,9 @@ version.workspace = true edition.workspace = true [dev-dependencies] -half = { version = "~2.3", features = ["use-intrinsics", "rand_distr"] } +half.workspace = true +rand.workspace = true detect = { path = "../detect" } -rand = "0.8.5" [build-dependencies] cc = "1.0" diff --git a/crates/detect/Cargo.toml b/crates/detect/Cargo.toml index aaae192..1bc7a99 100644 --- a/crates/detect/Cargo.toml +++ b/crates/detect/Cargo.toml @@ -4,5 +4,5 @@ version.workspace = true edition.workspace = true [dependencies] -std_detect = { git = "https://github.com/tensorchord/stdarch.git", branch = "avx512fp16" } rustix.workspace = true +std_detect = { git = "https://github.com/tensorchord/stdarch.git", branch = "avx512fp16" } diff --git a/crates/interprocess-atomic-wait/Cargo.toml b/crates/interprocess-atomic-wait/Cargo.toml new file mode 100644 index 0000000..2f36d9e --- /dev/null +++ b/crates/interprocess-atomic-wait/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "interprocess_atomic_wait" +version.workspace = true +edition.workspace = true + +[dependencies] +libc.workspace = true + +[target.'cfg(target_os = "macos")'.dependencies] +ulock-sys = "0.1.0" + +[lints] +rust.internal_features = "allow" +rust.unsafe_op_in_unsafe_fn = "forbid" +rust.unused_lifetimes = "warn" +rust.unused_qualifications = "warn" diff --git a/crates/interprocess-atomic-wait/src/lib.rs b/crates/interprocess-atomic-wait/src/lib.rs new file mode 100644 index 0000000..324bd8a --- /dev/null +++ b/crates/interprocess-atomic-wait/src/lib.rs @@ -0,0 +1,91 @@ +use std::sync::atomic::AtomicU32; +use std::time::Duration; + +#[cfg(target_os = "linux")] +#[inline(always)] +pub fn wait(futex: &AtomicU32, value: u32, timeout: Duration) { + let timeout = libc::timespec { + tv_sec: i64::try_from(timeout.as_secs()).expect("Timeout is overflow."), + tv_nsec: timeout.subsec_nanos().into(), + }; + unsafe { + libc::syscall( + libc::SYS_futex, + futex.as_ptr(), + libc::FUTEX_WAIT, + value, + &timeout, + ); + } +} + +#[cfg(target_os = "linux")] +#[inline(always)] +pub fn wake(futex: &AtomicU32) { + unsafe { + libc::syscall(libc::SYS_futex, futex.as_ptr(), libc::FUTEX_WAKE, i32::MAX); + } +} + +#[cfg(target_os = "macos")] +#[inline(always)] +pub fn wait(futex: &AtomicU32, value: u32, timeout: Duration) { + let timeout = u32::try_from(timeout.as_millis()).expect("Timeout is overflow."); + unsafe { + // https://github.com/apple-oss-distributions/xnu/blob/main/bsd/kern/sys_ulock.c#L531 + ulock_sys::__ulock_wait( + ulock_sys::darwin19::UL_COMPARE_AND_WAIT_SHARED, + futex.as_ptr().cast(), + value as _, + timeout, + ); + } +} + +#[cfg(target_os = "macos")] +#[inline(always)] +pub fn wake(futex: &AtomicU32) { + unsafe { + ulock_sys::__ulock_wake( + ulock_sys::darwin19::UL_COMPARE_AND_WAIT_SHARED, + futex.as_ptr().cast(), + 0, + ); + } +} + +#[cfg(target_os = "freebsd")] +#[inline(always)] +pub fn wait(futex: &AtomicU32, value: u32, timeout: Duration) { + let ptr: *const AtomicU32 = futex; + let mut timeout = libc::timespec { + tv_sec: i64::try_from(timeout.as_secs()).expect("Timeout is overflow."), + tv_nsec: timeout.subsec_nanos().into(), + }; + unsafe { + // https://github.com/freebsd/freebsd-src/blob/main/sys/kern/kern_umtx.c#L3943 + // https://github.com/freebsd/freebsd-src/blob/main/sys/kern/kern_umtx.c#L3836 + libc::_umtx_op( + ptr as *mut libc::c_void, + libc::UMTX_OP_WAIT_UINT, + value as libc::c_ulong, + std::mem::size_of_val(&timeout) as *mut std::ffi::c_void, + std::ptr::addr_of_mut!(timeout).cast(), + ); + }; +} + +#[cfg(target_os = "freebsd")] +#[inline(always)] +pub fn wake(futex: &AtomicU32) { + let ptr: *const AtomicU32 = futex; + unsafe { + libc::_umtx_op( + ptr as *mut libc::c_void, + libc::UMTX_OP_WAKE, + i32::MAX as libc::c_ulong, + core::ptr::null_mut(), + core::ptr::null_mut(), + ); + }; +} diff --git a/crates/memfd/Cargo.toml b/crates/memfd/Cargo.toml new file mode 100644 index 0000000..2bf02ee --- /dev/null +++ b/crates/memfd/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "memfd" +version.workspace = true +edition.workspace = true + +[dependencies] +rand.workspace = true +rustix.workspace = true +detect = { path = "../detect" } + +[lints] +rust.internal_features = "allow" +rust.unsafe_op_in_unsafe_fn = "forbid" +rust.unused_lifetimes = "warn" +rust.unused_qualifications = "warn" diff --git a/crates/memfd/src/lib.rs b/crates/memfd/src/lib.rs new file mode 100644 index 0000000..54cdd42 --- /dev/null +++ b/crates/memfd/src/lib.rs @@ -0,0 +1,70 @@ +use std::os::fd::OwnedFd; + +#[cfg(target_os = "linux")] +pub fn memfd_create() -> std::io::Result { + if detect::linux::detect_memfd() { + use rustix::fs::MemfdFlags; + Ok(rustix::fs::memfd_create( + format!(".memfd.MEMFD.{:x}", std::process::id()), + MemfdFlags::empty(), + )?) + } else { + use rustix::fs::Mode; + use rustix::fs::OFlags; + // POSIX fcntl locking do not support shmem, so we use a regular file here. + // reference: https://man7.org/linux/man-pages/man3/fcntl.3p.html + // However, Linux shmem supports fcntl locking. + let name = format!( + ".shm.MEMFD.{:x}.{:x}", + std::process::id(), + rand::random::() + ); + let fd = rustix::fs::open( + &name, + OFlags::RDWR | OFlags::CREATE | OFlags::EXCL, + Mode::RUSR | Mode::WUSR, + )?; + rustix::fs::unlink(&name)?; + Ok(fd) + } +} + +#[cfg(target_os = "macos")] +pub fn memfd_create() -> std::io::Result { + use rustix::fs::Mode; + use rustix::fs::OFlags; + // POSIX fcntl locking do not support shmem, so we use a regular file here. + // reference: https://man7.org/linux/man-pages/man3/fcntl.3p.html + let name = format!( + ".shm.MEMFD.{:x}.{:x}", + std::process::id(), + rand::random::() + ); + let fd = rustix::fs::open( + &name, + OFlags::RDWR | OFlags::CREATE | OFlags::EXCL, + Mode::RUSR | Mode::WUSR, + )?; + rustix::fs::unlink(&name)?; + Ok(fd) +} + +#[cfg(target_os = "freebsd")] +pub fn memfd_create() -> std::io::Result { + use rustix::fs::Mode; + use rustix::fs::OFlags; + // POSIX fcntl locking do not support shmem, so we use a regular file here. + // reference: https://man7.org/linux/man-pages/man3/fcntl.3p.html + let name = format!( + ".shm.MEMFD.{:x}.{:x}", + std::process::id(), + rand::random::() + ); + let fd = rustix::fs::open( + &name, + OFlags::RDWR | OFlags::CREATE | OFlags::EXCL, + Mode::RUSR | Mode::WUSR, + )?; + rustix::fs::unlink(&name)?; + Ok(fd) +} diff --git a/crates/send_fd/Cargo.toml b/crates/send_fd/Cargo.toml new file mode 100644 index 0000000..fc50260 --- /dev/null +++ b/crates/send_fd/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "send_fd" +version.workspace = true +edition.workspace = true + +[dependencies] +libc.workspace = true +log.workspace = true +rustix.workspace = true + +[lints] +rust.internal_features = "allow" +rust.unsafe_op_in_unsafe_fn = "forbid" +rust.unused_lifetimes = "warn" +rust.unused_qualifications = "warn" diff --git a/src/utils/file_socket.rs b/crates/send_fd/src/lib.rs similarity index 97% rename from src/utils/file_socket.rs rename to crates/send_fd/src/lib.rs index 3d7a2a4..a1adc42 100644 --- a/src/utils/file_socket.rs +++ b/crates/send_fd/src/lib.rs @@ -6,12 +6,12 @@ use std::io::{IoSlice, IoSliceMut}; use std::os::unix::net::UnixStream; #[repr(C)] -pub struct FileSocket { +pub struct SendFd { tx: OwnedFd, rx: OwnedFd, } -impl FileSocket { +impl SendFd { pub fn new() -> std::io::Result { let (tx, rx) = UnixStream::pair()?; Ok(Self { @@ -47,7 +47,7 @@ fn recv_fd(rx: BorrowedFd<'_>) -> std::io::Result { let mut control = RecvAncillaryBuffer::new(&mut buffer.0); let mut buffer_ios = [b'.']; let ios = IoSliceMut::new(&mut buffer_ios); - let returned = rustix::net::recvmsg(rx, &mut [ios], &mut control, RecvFlags::CMSG_CLOEXEC)?; + let returned = rustix::net::recvmsg(rx, &mut [ios], &mut control, RecvFlags::empty())?; if returned.flags.bits() & libc::MSG_CTRUNC as u32 != 0 { log::warn!("Ancillary is truncated."); } diff --git a/crates/service/Cargo.toml b/crates/service/Cargo.toml index a7d7b3e..9f71438 100644 --- a/crates/service/Cargo.toml +++ b/crates/service/Cargo.toml @@ -4,36 +4,31 @@ version.workspace = true edition.workspace = true [dependencies] +bincode.workspace = true +bytemuck.workspace = true +byteorder.workspace = true +half.workspace = true libc.workspace = true log.workspace = true -serde.workspace = true -serde_json.workspace = true -validator.workspace = true -rustix.workspace = true -thiserror.workspace = true -byteorder.workspace = true -bincode.workspace = true -half.workspace = true +memmap2.workspace = true num-traits.workspace = true rand.workspace = true -bytemuck.workspace = true +rustix.workspace = true +serde.workspace = true +serde_json.workspace = true +thiserror.workspace = true +uuid.workspace = true +validator.workspace = true c = { path = "../c" } detect = { path = "../detect" } -crc32fast = "1.3.2" -crossbeam = "0.8.2" -dashmap = "5.4.0" +crc32fast = "1.4.0" +crossbeam = "0.8.4" +dashmap = "5.5.3" parking_lot = "0.12.1" -memoffset = "0.9.0" -arrayvec = { version = "0.7.3", features = ["serde"] } -memmap2 = "0.9.0" -rayon = "1.6.1" -uuid = { version = "1.6.1", features = ["v4", "serde"] } +rayon = "1.8.1" arc-swap = "1.6.0" multiversion = "0.7.3" -[target.'cfg(target_os = "macos")'.dependencies] -ulock-sys = "0.1.0" - [lints] clippy.derivable_impls = "allow" clippy.len_without_is_empty = "allow" diff --git a/crates/service/src/algorithms/vamana.rs.txt b/crates/service/src/algorithms/vamana.rs.txt deleted file mode 100644 index 98f6b72..0000000 --- a/crates/service/src/algorithms/vamana.rs.txt +++ /dev/null @@ -1,456 +0,0 @@ -#![allow(unused)] - -use crate::algorithms::raw::Raw; -use crate::prelude::*; -use crossbeam::atomic::AtomicCell; -use parking_lot::RwLock; -use parking_lot::RwLockReadGuard; -use parking_lot::RwLockWriteGuard; -use rand::distributions::Uniform; -use rand::prelude::SliceRandom; -use rand::Rng; -use rayon::prelude::*; -use std::cmp::Reverse; -use std::collections::{BTreeMap, BinaryHeap, HashSet}; -use std::sync::Arc; - -pub struct VertexWithDistance { - pub id: u32, - pub distance: Scalar, -} - -impl VertexWithDistance { - pub fn new(id: u32, distance: Scalar) -> Self { - Self { id, distance } - } -} - -impl PartialEq for VertexWithDistance { - fn eq(&self, other: &Self) -> bool { - self.distance.eq(&other.distance) - } -} - -impl Eq for VertexWithDistance {} - -impl PartialOrd for VertexWithDistance { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.distance.cmp(&other.distance)) - } -} - -impl Ord for VertexWithDistance { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.distance.cmp(&other.distance) - } -} - -/// DiskANN search state. -pub struct SearchState { - pub visited: HashSet, - candidates: BTreeMap, - heap: BinaryHeap>, - heap_visited: HashSet, - l: usize, - /// Number of results to return. - //TODO: used during search. - k: usize, -} - -impl SearchState { - /// Creates a new search state. - pub(crate) fn new(k: usize, l: usize) -> Self { - Self { - visited: HashSet::new(), - candidates: BTreeMap::new(), - heap: BinaryHeap::new(), - heap_visited: HashSet::new(), - k, - l, - } - } - - /// Return the next unvisited vertex. - fn pop(&mut self) -> Option { - while let Some(vertex) = self.heap.pop() { - if !self.candidates.contains_key(&vertex.0.distance) { - // The vertex has been removed from the candidate lists, - // from [`push()`]. - continue; - } - - self.visited.insert(vertex.0.id); - return Some(vertex.0.id); - } - - None - } - - /// Push a new (unvisited) vertex into the search state. - fn push(&mut self, vertex_id: u32, distance: Scalar) { - assert!(!self.visited.contains(&vertex_id)); - self.heap_visited.insert(vertex_id); - self.heap - .push(Reverse(VertexWithDistance::new(vertex_id, distance))); - self.candidates.insert(distance, vertex_id); - if self.candidates.len() > self.l { - self.candidates.pop_last(); - } - } - - /// Mark a vertex as visited. - fn visit(&mut self, vertex_id: u32) { - self.visited.insert(vertex_id); - } - - // Returns true if the vertex has been visited. - fn is_visited(&self, vertex_id: u32) -> bool { - self.visited.contains(&vertex_id) || self.heap_visited.contains(&vertex_id) - } -} - -pub struct VamanaImpl { - raw: Arc, - - /// neighbors[vertex_id*r..(vertex_id+1)*r] records r neighbors for each vertex - neighbors: Vec>, - - /// neighbor_size[vertex_id] records the actual number of neighbors for each vertex - /// the RwLock is for protecting both the data for size and original data - neighbor_size: Vec>, - - /// the entry for the entire graph, the closet vector to centroid - medoid: u32, - - dims: u16, - r: u32, - alpha: f32, - l: usize, - - d: Distance, -} - -unsafe impl Send for VamanaImpl {} -unsafe impl Sync for VamanaImpl {} - -impl VamanaImpl { - pub fn new( - raw: Arc, - n: u32, - dims: u16, - r: u32, - alpha: f32, - l: usize, - d: Distance, - ) -> Self { - let neighbors = { - let mut result = Vec::new(); - result.resize_with(r as usize * n as usize, || AtomicCell::new(0)); - result - }; - let neighbor_size = unsafe { - let mut result = Vec::new(); - result.resize_with(n as usize, || RwLock::new(0)); - result - }; - let medoid = 0; - - let mut new_vamana = Self { - raw, - neighbors, - neighbor_size, - medoid, - dims, - r, - alpha, - l, - d, - }; - - // 1. init graph with r random neighbors for each node - let rng = rand::thread_rng(); - new_vamana._init_graph(n, rng.clone()); - - // 2. find medoid - new_vamana.medoid = new_vamana._find_medoid(n); - - // 3. iterate pass - new_vamana._one_pass(n, 1.0, r, l, rng.clone()); - - new_vamana._one_pass(n, alpha, r, l, rng.clone()); - - new_vamana - } - - pub fn search(&self, target: Box<[Scalar]>, k: usize, f: F) -> Vec<(Scalar, Payload)> - where - F: FnMut(Payload) -> bool, - { - // TODO: filter - let state = self._greedy_search_with_filter(0, &target, k, k * 2, f); - - let mut results = BinaryHeap::<(Scalar, u32)>::new(); - for (distance, row) in state.candidates { - if results.len() == k { - break; - } - - results.push((distance, row)); - } - let mut res_vec: Vec<(Scalar, Payload)> = results - .iter() - .map(|x| (x.0, self.raw.payload(x.1))) - .collect(); - res_vec.sort(); - res_vec - } - - fn _greedy_search_with_filter( - &self, - start: u32, - query: &[Scalar], - k: usize, - search_size: usize, - mut f: F, - ) -> SearchState - where - F: FnMut(Payload) -> bool, - { - let mut state = SearchState::new(k, search_size); - - let dist = self.d.distance(query, self.raw.vector(start)); - state.push(start, dist); - while let Some(id) = state.pop() { - // only pop id in the search list but not visited - state.visit(id); - { - let guard = self.neighbor_size[id as usize].read(); - let neighbor_ids = self._get_neighbors(id, &guard); - for neighbor_id in neighbor_ids { - let neighbor_id = neighbor_id.load(); - if state.is_visited(neighbor_id) { - continue; - } - - if f(self.raw.payload(neighbor_id)) { - let dist = self.d.distance(query, self.raw.vector(neighbor_id)); - state.push(neighbor_id, dist); // push and retain closet l nodes - } - } - } - } - - state - } - - fn _init_graph(&self, n: u32, mut rng: impl Rng) { - let distribution = Uniform::new(0, n); - for i in 0..n { - let mut neighbor_ids: HashSet = HashSet::new(); - if self.r < n { - while neighbor_ids.len() < self.r as usize { - let neighbor_id = rng.sample(distribution); - if neighbor_id != i { - neighbor_ids.insert(neighbor_id); - } - } - } else { - neighbor_ids = (0..n).collect(); - } - - { - let mut guard = self.neighbor_size[i as usize].write(); - self._set_neighbors(i, &neighbor_ids, &mut guard); - } - } - } - - fn _set_neighbors( - &self, - vertex_index: u32, - neighbor_ids: &HashSet, - guard: &mut RwLockWriteGuard, - ) { - assert!(neighbor_ids.len() <= self.r as usize); - for (i, item) in neighbor_ids.iter().enumerate() { - self.neighbors[vertex_index as usize * self.r as usize + i].store(*item); - } - **guard = neighbor_ids.len() as u32; - } - - fn _get_neighbors( - &self, - vertex_index: u32, - guard: &RwLockReadGuard, - ) -> &[AtomicCell] { - //TODO: store neighbor length - let size = **guard; - &self.neighbors[(vertex_index as usize * self.r as usize) - ..(vertex_index as usize * self.r as usize + size as usize)] - } - - fn _get_neighbors_with_write_guard( - &self, - vertex_index: u32, - guard: &RwLockWriteGuard, - ) -> &[AtomicCell] { - let size = **guard; - &self.neighbors[(vertex_index as usize * self.r as usize) - ..(vertex_index as usize * self.r as usize + size as usize)] - } - - fn _find_medoid(&self, n: u32) -> u32 { - let centroid = self._compute_centroid(n); - let centroid_arr: &[Scalar] = ¢roid; - - let mut medoid_index = 0; - let mut min_dis = Scalar::INFINITY; - for i in 0..n { - let dis = self.d.distance(centroid_arr, self.raw.vector(i)); - if dis < min_dis { - min_dis = dis; - medoid_index = i; - } - } - medoid_index - } - - fn _compute_centroid(&self, n: u32) -> Vec { - let dim = self.dims as usize; - let mut sum = vec![0_f64; dim]; // change to f32 to avoid overflow - for i in 0..n { - let vec = self.raw.vector(i); - for j in 0..dim { - sum[j] += f32::from(vec[j]) as f64; - } - } - - let collection: Vec = sum - .iter() - .map(|v| Scalar::from((*v / n as f64) as f32)) - .collect(); - collection - } - - // r and l leave here for multiple pass extension - fn _one_pass(&self, n: u32, alpha: f32, r: u32, l: usize, mut rng: impl Rng) { - let mut ids = (0..n).collect::>(); - ids.shuffle(&mut rng); - - ids.into_par_iter() - .for_each(|id| self.search_and_prune_for_one_vertex(id, alpha, r, l)); - } - - fn search_and_prune_for_one_vertex(&self, id: u32, alpha: f32, r: u32, l: usize) { - let query = self.raw.vector(id); - let mut state = self._greedy_search(self.medoid, query, 1, l); - state.visited.remove(&id); // in case visited has id itself - let mut new_neighbor_ids: HashSet = HashSet::new(); - { - let mut guard = self.neighbor_size[id as usize].write(); - let neighbor_ids = self._get_neighbors_with_write_guard(id, &guard); - state.visited.extend(neighbor_ids.iter().map(|x| x.load())); - let neighbor_ids = self._robust_prune(id, state.visited, alpha, r); - let neighbor_ids: HashSet = neighbor_ids.into_iter().collect(); - self._set_neighbors(id, &neighbor_ids, &mut guard); - new_neighbor_ids = neighbor_ids; - } - - for &neighbor_id in new_neighbor_ids.iter() { - { - let mut guard = self.neighbor_size[neighbor_id as usize].write(); - let old_neighbors = self._get_neighbors_with_write_guard(neighbor_id, &guard); - let mut old_neighbors: HashSet = - old_neighbors.iter().map(|x| x.load()).collect(); - old_neighbors.insert(id); - if old_neighbors.len() > r as usize { - // need robust prune - let new_neighbors = self._robust_prune(neighbor_id, old_neighbors, alpha, r); - let new_neighbors: HashSet = new_neighbors.into_iter().collect(); - self._set_neighbors(neighbor_id, &new_neighbors, &mut guard); - } else { - self._set_neighbors(neighbor_id, &old_neighbors, &mut guard); - } - } - } - } - - fn _greedy_search( - &self, - start: u32, - query: &[Scalar], - k: usize, - search_size: usize, - ) -> SearchState { - let mut state = SearchState::new(k, search_size); - - let dist = self.d.distance(query, self.raw.vector(start)); - state.push(start, dist); - while let Some(id) = state.pop() { - // only pop id in the search list but not visited - state.visit(id); - { - let guard = self.neighbor_size[id as usize].read(); - let neighbor_ids = self._get_neighbors(id, &guard); - for neighbor_id in neighbor_ids { - let neighbor_id = neighbor_id.load(); - if state.is_visited(neighbor_id) { - continue; - } - - let dist = self.d.distance(query, self.raw.vector(neighbor_id)); - state.push(neighbor_id, dist); // push and retain closet l nodes - } - } - } - - state - } - - fn _robust_prune(&self, id: u32, mut visited: HashSet, alpha: f32, r: u32) -> Vec { - let mut heap: BinaryHeap = visited - .iter() - .map(|v| { - let dist = self.d.distance(self.raw.vector(id), self.raw.vector(*v)); - VertexWithDistance { - id: *v, - distance: dist, - } - }) - .collect(); - - let mut new_neighbor_ids: Vec = vec![]; - while !visited.is_empty() { - if let Some(mut p) = heap.pop() { - while !visited.contains(&p.id) { - match heap.pop() { - Some(value) => { - p = value; - } - None => { - return new_neighbor_ids; - } - } - } - new_neighbor_ids.push(p.id); - if new_neighbor_ids.len() >= r as usize { - break; - } - let mut to_remove: HashSet = HashSet::new(); - for pv in visited.iter() { - let dist_prime = self.d.distance(self.raw.vector(p.id), self.raw.vector(*pv)); - let dist_query = self.d.distance(self.raw.vector(id), self.raw.vector(*pv)); - if Scalar::from(alpha) * dist_prime <= dist_query { - to_remove.insert(*pv); - } - } - for pv in to_remove.iter() { - visited.remove(pv); - } - } else { - return new_neighbor_ids; - } - } - new_neighbor_ids - } -} diff --git a/crates/service/src/index/mod.rs b/crates/service/src/index/mod.rs index 0ce8ccc..0093fad 100644 --- a/crates/service/src/index/mod.rs +++ b/crates/service/src/index/mod.rs @@ -90,13 +90,10 @@ pub struct SegmentStat { } #[derive(Debug, Serialize, Deserialize)] -pub enum IndexStat { - Normal { - indexing: bool, - segments: Vec, - options: IndexOptions, - }, - Upgrade, +pub struct IndexStat { + pub indexing: bool, + pub segments: Vec, + pub options: IndexOptions, } pub struct Index { @@ -111,10 +108,10 @@ pub struct Index { } impl Index { - pub fn create(path: PathBuf, options: IndexOptions) -> Result, ServiceError> { + pub fn create(path: PathBuf, options: IndexOptions) -> Result, CreateError> { if let Err(err) = options.validate() { - return Err(ServiceError::BadOption { - validation: err.to_string(), + return Err(CreateError::InvalidIndexOptions { + reason: err.to_string(), }); } std::fs::create_dir(&path).unwrap(); @@ -275,7 +272,7 @@ impl Index { } pub fn stat(&self) -> IndexStat { let view = self.view(); - IndexStat::Normal { + IndexStat { indexing: self.instant_index.load() < self.instant_write.load(), options: self.options().clone(), segments: { @@ -324,9 +321,14 @@ impl IndexView { vector: S::VectorRef<'_>, opts: &'a SearchOptions, filter: F, - ) -> Result + 'a, ServiceError> { + ) -> Result + 'a, BasicError> { if self.options.vector.dims != vector.dims() { - return Err(ServiceError::Unmatched); + return Err(BasicError::InvalidVector); + } + if let Err(err) = opts.validate() { + return Err(BasicError::InvalidSearchOptions { + reason: err.to_string(), + }); } struct Comparer(std::collections::BinaryHeap>); @@ -397,9 +399,14 @@ impl IndexView { vector: S::VectorRef<'a>, opts: &'a SearchOptions, filter: F, - ) -> Result + 'a, ServiceError> { + ) -> Result + 'a, VbaseError> { if self.options.vector.dims != vector.dims() { - return Err(ServiceError::Unmatched); + return Err(VbaseError::InvalidVector); + } + if let Err(err) = opts.validate() { + return Err(VbaseError::InvalidSearchOptions { + reason: err.to_string(), + }); } struct Filtering<'a, F: 'a> { @@ -461,7 +468,7 @@ impl IndexView { } })) } - pub fn list(&self) -> impl Iterator + '_ { + pub fn list(&self) -> Result + '_, ListError> { let sealed = self .sealed .values() @@ -475,18 +482,19 @@ impl IndexView { .iter() .map(|(_, x)| x) .flat_map(|x| (0..x.len()).map(|i| x.payload(i))); - sealed + let iter = sealed .chain(growing) .chain(write) - .filter_map(|p| self.delete.check(p)) + .filter_map(|p| self.delete.check(p)); + Ok(iter) } pub fn insert( &self, vector: S::VectorOwned, pointer: Pointer, - ) -> Result, ServiceError> { + ) -> Result, InsertError> { if self.options.vector.dims != vector.dims() { - return Err(ServiceError::Unmatched); + return Err(InsertError::InvalidVector); } let payload = (pointer.as_u48() << 16) | self.delete.version(pointer) as Payload; @@ -500,14 +508,16 @@ impl IndexView { Ok(Err(OutdatedError)) } } - pub fn delete(&self, p: Pointer) { + pub fn delete(&self, p: Pointer) -> Result<(), DeleteError> { self.delete.delete(p); + Ok(()) } - pub fn flush(&self) { + pub fn flush(&self) -> Result<(), FlushError> { self.delete.flush(); if let Some((_, write)) = &self.write { write.flush(); } + Ok(()) } } diff --git a/crates/service/src/index/optimizing/mod.rs b/crates/service/src/index/optimizing/mod.rs index 67972a9..2525b50 100644 --- a/crates/service/src/index/optimizing/mod.rs +++ b/crates/service/src/index/optimizing/mod.rs @@ -14,9 +14,9 @@ pub struct OptimizingOptions { #[serde(default = "OptimizingOptions::default_sealing_size")] #[validate(range(min = 1, max = 4_000_000_000))] pub sealing_size: u32, - #[serde(default = "OptimizingOptions::default_deleted_threshold", skip)] + #[serde(default = "OptimizingOptions::default_delete_threshold")] #[validate(range(min = 0.01, max = 1.00))] - pub deleted_threshold: f64, + pub delete_threshold: f64, #[serde(default = "OptimizingOptions::default_optimizing_threads")] #[validate(range(min = 1, max = 65535))] pub optimizing_threads: usize, @@ -29,7 +29,7 @@ impl OptimizingOptions { fn default_sealing_size() -> u32 { 1 } - fn default_deleted_threshold() -> f64 { + fn default_delete_threshold() -> f64 { 0.2 } fn default_optimizing_threads() -> usize { @@ -45,7 +45,7 @@ impl Default for OptimizingOptions { Self { sealing_secs: Self::default_sealing_secs(), sealing_size: Self::default_sealing_size(), - deleted_threshold: Self::default_deleted_threshold(), + delete_threshold: Self::default_delete_threshold(), optimizing_threads: Self::default_optimizing_threads(), } } diff --git a/crates/service/src/instance/mod.rs b/crates/service/src/instance/mod.rs index 794b198..3007458 100644 --- a/crates/service/src/instance/mod.rs +++ b/crates/service/src/instance/mod.rs @@ -10,6 +10,22 @@ use crate::prelude::*; use std::path::PathBuf; use std::sync::Arc; +pub trait InstanceViewOperations { + fn basic<'a, F: Fn(Pointer) -> bool + Clone + 'a>( + &'a self, + vector: &'a DynamicVector, + opts: &'a SearchOptions, + filter: F, + ) -> Result + 'a>, BasicError>; + fn vbase<'a, F: FnMut(Pointer) -> bool + Clone + 'a>( + &'a self, + vector: &'a DynamicVector, + opts: &'a SearchOptions, + filter: F, + ) -> Result + 'a>, VbaseError>; + fn list(&self) -> Result + '_>, ListError>; +} + #[derive(Clone)] pub enum Instance { F32Cos(Arc>), @@ -25,7 +41,7 @@ pub enum Instance { } impl Instance { - pub fn create(path: PathBuf, options: IndexOptions) -> Result { + pub fn create(path: PathBuf, options: IndexOptions) -> Result { match (options.vector.d, options.vector.k) { (Distance::Cos, Kind::F32) => { let index = Index::create(path.clone(), options)?; @@ -121,18 +137,18 @@ impl Instance { Instance::Upgrade => None, } } - pub fn stat(&self) -> IndexStat { + pub fn stat(&self) -> Option { match self { - Instance::F32Cos(x) => x.stat(), - Instance::F32Dot(x) => x.stat(), - Instance::F32L2(x) => x.stat(), - Instance::F16Cos(x) => x.stat(), - Instance::F16Dot(x) => x.stat(), - Instance::F16L2(x) => x.stat(), - Instance::SparseF32L2(x) => x.stat(), - Instance::SparseF32Cos(x) => x.stat(), - Instance::SparseF32Dot(x) => x.stat(), - Instance::Upgrade => IndexStat::Upgrade, + Instance::F32Cos(x) => Some(x.stat()), + Instance::F32Dot(x) => Some(x.stat()), + Instance::F32L2(x) => Some(x.stat()), + Instance::F16Cos(x) => Some(x.stat()), + Instance::F16Dot(x) => Some(x.stat()), + Instance::F16L2(x) => Some(x.stat()), + Instance::SparseF32L2(x) => Some(x.stat()), + Instance::SparseF32Cos(x) => Some(x.stat()), + Instance::SparseF32Dot(x) => Some(x.stat()), + Instance::Upgrade => None, } } } @@ -149,13 +165,13 @@ pub enum InstanceView { SparseF32L2(Arc>), } -impl InstanceView { - pub fn basic<'a, F: Fn(Pointer) -> bool + Clone + 'a>( +impl InstanceViewOperations for InstanceView { + fn basic<'a, F: Fn(Pointer) -> bool + Clone + 'a>( &'a self, vector: &'a DynamicVector, opts: &'a SearchOptions, filter: F, - ) -> Result + 'a, ServiceError> { + ) -> Result + 'a>, BasicError> { match (self, vector) { (InstanceView::F32Cos(x), DynamicVector::F32(vector)) => { Ok(Box::new(x.basic(vector, opts, filter)?) as Box>) @@ -184,15 +200,15 @@ impl InstanceView { (InstanceView::SparseF32L2(x), DynamicVector::SparseF32(vector)) => { Ok(Box::new(x.basic(vector.into(), opts, filter)?)) } - _ => Err(ServiceError::Unmatched), + _ => Err(BasicError::InvalidVector), } } - pub fn vbase<'a, F: FnMut(Pointer) -> bool + Clone + 'a>( + fn vbase<'a, F: FnMut(Pointer) -> bool + Clone + 'a>( &'a self, vector: &'a DynamicVector, opts: &'a SearchOptions, filter: F, - ) -> Result + '_, ServiceError> { + ) -> Result + 'a>, VbaseError> { match (self, vector) { (InstanceView::F32Cos(x), DynamicVector::F32(vector)) => { Ok(Box::new(x.vbase(vector, opts, filter)?) as Box>) @@ -221,27 +237,30 @@ impl InstanceView { (InstanceView::SparseF32L2(x), DynamicVector::SparseF32(vector)) => { Ok(Box::new(x.vbase(vector.into(), opts, filter)?)) } - _ => Err(ServiceError::Unmatched), + _ => Err(VbaseError::InvalidVector), } } - pub fn list(&self) -> impl Iterator + '_ { + fn list(&self) -> Result + '_>, ListError> { match self { - InstanceView::F32Cos(x) => Box::new(x.list()) as Box>, - InstanceView::F32Dot(x) => Box::new(x.list()), - InstanceView::F32L2(x) => Box::new(x.list()), - InstanceView::F16Cos(x) => Box::new(x.list()), - InstanceView::F16Dot(x) => Box::new(x.list()), - InstanceView::F16L2(x) => Box::new(x.list()), - InstanceView::SparseF32Cos(x) => Box::new(x.list()), - InstanceView::SparseF32Dot(x) => Box::new(x.list()), - InstanceView::SparseF32L2(x) => Box::new(x.list()), + InstanceView::F32Cos(x) => Ok(Box::new(x.list()?) as Box>), + InstanceView::F32Dot(x) => Ok(Box::new(x.list()?)), + InstanceView::F32L2(x) => Ok(Box::new(x.list()?)), + InstanceView::F16Cos(x) => Ok(Box::new(x.list()?)), + InstanceView::F16Dot(x) => Ok(Box::new(x.list()?)), + InstanceView::F16L2(x) => Ok(Box::new(x.list()?)), + InstanceView::SparseF32Cos(x) => Ok(Box::new(x.list()?)), + InstanceView::SparseF32Dot(x) => Ok(Box::new(x.list()?)), + InstanceView::SparseF32L2(x) => Ok(Box::new(x.list()?)), } } +} + +impl InstanceView { pub fn insert( &self, vector: DynamicVector, pointer: Pointer, - ) -> Result, ServiceError> { + ) -> Result, InsertError> { match (self, vector) { (InstanceView::F32Cos(x), DynamicVector::F32(vector)) => x.insert(vector, pointer), (InstanceView::F32Dot(x), DynamicVector::F32(vector)) => x.insert(vector, pointer), @@ -258,10 +277,10 @@ impl InstanceView { (InstanceView::SparseF32L2(x), DynamicVector::SparseF32(vector)) => { x.insert(vector, pointer) } - _ => Err(ServiceError::Unmatched), + _ => Err(InsertError::InvalidVector), } } - pub fn delete(&self, pointer: Pointer) { + pub fn delete(&self, pointer: Pointer) -> Result<(), DeleteError> { match self { InstanceView::F32Cos(x) => x.delete(pointer), InstanceView::F32Dot(x) => x.delete(pointer), @@ -274,7 +293,7 @@ impl InstanceView { InstanceView::SparseF32L2(x) => x.delete(pointer), } } - pub fn flush(&self) { + pub fn flush(&self) -> Result<(), FlushError> { match self { InstanceView::F32Cos(x) => x.flush(), InstanceView::F32Dot(x) => x.flush(), diff --git a/crates/service/src/prelude/error.rs b/crates/service/src/prelude/error.rs index f3fa22d..91781b7 100644 --- a/crates/service/src/prelude/error.rs +++ b/crates/service/src/prelude/error.rs @@ -1,37 +1,95 @@ use serde::{Deserialize, Serialize}; use thiserror::Error; +// control plane + #[must_use] #[derive(Debug, Clone, Error, Serialize, Deserialize)] -#[rustfmt::skip] -pub enum ServiceError { - #[error("\ -The given index option is invalid. -INFORMATION: reason = {validation:?}\ -")] - BadOption { validation: String }, - #[error("\ -The index is not existing in the background worker. -ADVICE: Drop or rebuild the index.\ -")] - UnknownIndex, -#[error("\ -The index is already existing in the background worker.\ -")] - KnownIndex, - #[error("\ -The given vector is invalid for input. -ADVICE: Check if dimensions and scalar type of the vector is matched with the index.\ -")] - Unmatched, - #[error("\ -The extension is upgraded so all index files are outdated. -ADVICE: Delete all index files. Please read `https://docs.pgvecto.rs/admin/upgrading.html`.\ -")] - Upgrade, - #[error("\ -The extension is upgraded so this index is outdated. -ADVICE: Rebuild the index. Please read `https://docs.pgvecto.rs/admin/upgrading.html`.\ -")] - Upgrade2, +pub enum CreateError { + #[error("Index of given name already exists.")] + Exist, + #[error("Invalid index options.")] + InvalidIndexOptions { reason: String }, +} + +#[must_use] +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum DropError { + #[error("Index not found.")] + NotExist, +} + +// data plane + +#[must_use] +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum FlushError { + #[error("Index not found.")] + NotExist, + #[error("Maintenance should be done.")] + Upgrade, +} + +#[must_use] +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum InsertError { + #[error("Index not found.")] + NotExist, + #[error("Maintenance should be done.")] + Upgrade, + #[error("Invalid vector.")] + InvalidVector, +} + +#[must_use] +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum DeleteError { + #[error("Index not found.")] + NotExist, + #[error("Maintenance should be done.")] + Upgrade, +} + +#[must_use] +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum BasicError { + #[error("Index not found.")] + NotExist, + #[error("Maintenance should be done.")] + Upgrade, + #[error("Invalid vector.")] + InvalidVector, + #[error("Invalid search options.")] + InvalidSearchOptions { reason: String }, +} + +#[must_use] +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum VbaseError { + #[error("Index not found.")] + NotExist, + #[error("Maintenance should be done.")] + Upgrade, + #[error("Invalid vector.")] + InvalidVector, + #[error("Invalid search options.")] + InvalidSearchOptions { reason: String }, +} + +#[must_use] +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum ListError { + #[error("Index not found.")] + NotExist, + #[error("Maintenance should be done.")] + Upgrade, +} + +#[must_use] +#[derive(Debug, Clone, Error, Serialize, Deserialize)] +pub enum StatError { + #[error("Index not found.")] + NotExist, + #[error("Maintenance should be done.")] + Upgrade, } diff --git a/crates/service/src/prelude/mod.rs b/crates/service/src/prelude/mod.rs index 1ea2b02..67c1b16 100644 --- a/crates/service/src/prelude/mod.rs +++ b/crates/service/src/prelude/mod.rs @@ -5,7 +5,7 @@ mod search; mod storage; mod sys; -pub use self::error::ServiceError; +pub use self::error::*; pub use self::global::*; pub use self::scalar::{SparseF32, SparseF32Ref, F16, F32}; pub use self::search::{Element, Filter, Payload}; diff --git a/crates/service/src/worker/mod.rs b/crates/service/src/worker/mod.rs index de9d5f0..3ccc778 100644 --- a/crates/service/src/worker/mod.rs +++ b/crates/service/src/worker/mod.rs @@ -1,7 +1,7 @@ pub mod metadata; -use crate::index::IndexOptions; -use crate::instance::Instance; +use crate::index::{IndexOptions, IndexStat}; +use crate::instance::{Instance, InstanceView, InstanceViewOperations}; use crate::prelude::*; use crate::utils::clean::clean; use crate::utils::dir_ops::sync_dir; @@ -13,6 +13,25 @@ use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use std::sync::Arc; +pub trait WorkerOperations { + type InstanceView: InstanceViewOperations; + + fn create(&self, handle: Handle, options: IndexOptions) -> Result<(), CreateError>; + fn drop(&self, handle: Handle) -> Result<(), DropError>; + fn flush(&self, handle: Handle) -> Result<(), FlushError>; + fn insert( + &self, + handle: Handle, + vector: DynamicVector, + pointer: Pointer, + ) -> Result<(), InsertError>; + fn delete(&self, handle: Handle, pointer: Pointer) -> Result<(), DeleteError>; + fn basic_view(&self, handle: Handle) -> Result; + fn vbase_view(&self, handle: Handle) -> Result; + fn list_view(&self, handle: Handle) -> Result; + fn stat(&self, handle: Handle) -> Result; +} + pub struct Worker { path: PathBuf, protect: Mutex, @@ -65,11 +84,12 @@ impl Worker { pub fn view(&self) -> Arc { self.view.load_full() } - pub fn instance_create( - &self, - handle: Handle, - options: IndexOptions, - ) -> Result<(), ServiceError> { +} + +impl WorkerOperations for Worker { + type InstanceView = InstanceView; + + fn create(&self, handle: Handle, options: IndexOptions) -> Result<(), CreateError> { use std::collections::hash_map::Entry; let mut protect = self.protect.lock(); match protect.indexes.entry(handle) { @@ -80,15 +100,70 @@ impl Worker { protect.maintain(&self.view); Ok(()) } - Entry::Occupied(_) => Err(ServiceError::KnownIndex), + Entry::Occupied(_) => Err(CreateError::Exist), } } - pub fn instance_destroy(&self, handle: Handle) { + fn drop(&self, handle: Handle) -> Result<(), DropError> { let mut protect = self.protect.lock(); if protect.indexes.remove(&handle).is_some() { protect.maintain(&self.view); + Ok(()) + } else { + Err(DropError::NotExist) } } + fn flush(&self, handle: Handle) -> Result<(), FlushError> { + let view = self.view(); + let instance = view.get(handle).ok_or(FlushError::NotExist)?; + let view = instance.view().ok_or(FlushError::Upgrade)?; + view.flush()?; + Ok(()) + } + fn insert( + &self, + handle: Handle, + vector: DynamicVector, + pointer: Pointer, + ) -> Result<(), InsertError> { + let view = self.view(); + let instance = view.get(handle).ok_or(InsertError::NotExist)?; + loop { + let view = instance.view().ok_or(InsertError::Upgrade)?; + match view.insert(vector.clone(), pointer)? { + Ok(()) => break, + Err(_) => instance.refresh(), + } + } + Ok(()) + } + fn delete(&self, handle: Handle, pointer: Pointer) -> Result<(), DeleteError> { + let view = self.view(); + let instance = view.get(handle).ok_or(DeleteError::NotExist)?; + let view = instance.view().ok_or(DeleteError::Upgrade)?; + view.delete(pointer)?; + Ok(()) + } + fn basic_view(&self, handle: Handle) -> Result { + let view = self.view(); + let instance = view.get(handle).ok_or(BasicError::NotExist)?; + instance.view().ok_or(BasicError::Upgrade) + } + fn vbase_view(&self, handle: Handle) -> Result { + let view = self.view(); + let instance = view.get(handle).ok_or(VbaseError::NotExist)?; + instance.view().ok_or(VbaseError::Upgrade) + } + fn list_view(&self, handle: Handle) -> Result { + let view = self.view(); + let instance = view.get(handle).ok_or(ListError::NotExist)?; + instance.view().ok_or(ListError::Upgrade) + } + fn stat(&self, handle: Handle) -> Result { + let view = self.view(); + let instance = view.get(handle).ok_or(StatError::NotExist)?; + let stat = instance.stat().ok_or(StatError::Upgrade)?; + Ok(stat) + } } pub struct WorkerView { diff --git a/src/bgworker/mod.rs b/src/bgworker/mod.rs index 04c3767..4e98a39 100644 --- a/src/bgworker/mod.rs +++ b/src/bgworker/mod.rs @@ -1,26 +1,35 @@ pub mod normal; -pub mod upgrade; + +use std::sync::atomic::{AtomicBool, Ordering}; + +static STARTED: AtomicBool = AtomicBool::new(false); pub unsafe fn init() { - use pgrx::bgworkers::BackgroundWorkerBuilder; - use pgrx::bgworkers::BgWorkerStartTime; - use std::time::Duration; - BackgroundWorkerBuilder::new("vectors") - .set_library("vectors") - .set_function("_vectors_main") - .set_argument(None) - .enable_shmem_access(None) - .set_start_time(BgWorkerStartTime::PostmasterStart) - .set_restart_time(Some(Duration::from_secs(1))) - .load(); + use service::worker::Worker; + let path = std::path::Path::new("pg_vectors"); + if !path.try_exists().unwrap() || Worker::check(path.to_owned()) { + use pgrx::bgworkers::BackgroundWorkerBuilder; + use pgrx::bgworkers::BgWorkerStartTime; + use std::time::Duration; + BackgroundWorkerBuilder::new("vectors") + .set_library("vectors") + .set_function("_vectors_main") + .set_argument(None) + .enable_shmem_access(None) + .set_start_time(BgWorkerStartTime::PostmasterStart) + .set_restart_time(Some(Duration::from_secs(15))) + .load(); + STARTED.store(true, Ordering::Relaxed); + } } +pub fn is_started() -> bool { + STARTED.load(Ordering::Relaxed) +} + +#[pgrx::pg_guard] #[no_mangle] extern "C" fn _vectors_main(_arg: pgrx::pg_sys::Datum) { - let _ = std::panic::catch_unwind(main); -} - -fn main() { pub struct AllocErrorPanicPayload { pub layout: std::alloc::Layout, } @@ -60,12 +69,8 @@ fn main() { use std::path::Path; let path = Path::new("pg_vectors"); if path.try_exists().unwrap() { - if Worker::check(path.to_owned()) { - let worker = Worker::open(path.to_owned()); - self::normal::normal(worker); - } else { - self::upgrade::upgrade(); - } + let worker = Worker::open(path.to_owned()); + self::normal::normal(worker); } else { let worker = Worker::create(path.to_owned()); self::normal::normal(worker); diff --git a/src/bgworker/normal.rs b/src/bgworker/normal.rs index cf87f8b..c96fbc7 100644 --- a/src/bgworker/normal.rs +++ b/src/bgworker/normal.rs @@ -1,7 +1,5 @@ -use crate::ipc::server::RpcHandler; use crate::ipc::ConnectionError; -use service::index::OutdatedError; -use service::prelude::ServiceError; +use crate::ipc::ServerRpcHandler; use service::worker::Worker; use std::sync::Arc; @@ -59,167 +57,128 @@ pub fn normal(worker: Arc) { }); } -fn session(worker: Arc, handler: RpcHandler) -> Result { - use crate::ipc::server::RpcHandle; +fn session(worker: Arc, handler: ServerRpcHandler) -> Result { + use crate::ipc::ServerRpcHandle; + use service::instance::InstanceViewOperations; + use service::worker::WorkerOperations; let mut handler = handler; loop { match handler.handle()? { - // transaction - RpcHandle::Flush { handle, x } => { - let view = worker.view(); - if let Some(instance) = view.get(handle) { - if let Some(view) = instance.view() { - view.flush(); - } - } - handler = x.leave()?; + // control plane + ServerRpcHandle::Create { handle, options, x } => { + handler = x.leave(WorkerOperations::create(worker.as_ref(), handle, options))?; } - RpcHandle::Drop { handle, x } => { - worker.instance_destroy(handle); - handler = x.leave()?; + ServerRpcHandle::Drop { handle, x } => { + handler = x.leave(WorkerOperations::drop(worker.as_ref(), handle))?; } - RpcHandle::Create { handle, options, x } => { - match worker.instance_create(handle, options) { - Ok(()) => (), - Err(e) => x.reset(e)?, - }; - handler = x.leave()?; + // data plane + ServerRpcHandle::Flush { handle, x } => { + handler = x.leave(worker.flush(handle))?; } - // instance - RpcHandle::Insert { + ServerRpcHandle::Insert { handle, vector, pointer, x, } => { - let view = worker.view(); - let Some(instance) = view.get(handle) else { - x.reset(ServiceError::UnknownIndex)?; - }; - loop { - let instance_view = match instance.view() { - Some(x) => x, - None => x.reset(ServiceError::Upgrade2)?, - }; - match instance_view.insert(vector.clone(), pointer) { - Ok(Ok(())) => break, - Ok(Err(OutdatedError)) => instance.refresh(), - Err(e) => x.reset(e)?, - } - } - handler = x.leave()?; + handler = x.leave(worker.insert(handle, vector, pointer))?; } - RpcHandle::Delete { handle, pointer, x } => { - let view = worker.view(); - let Some(instance) = view.get(handle) else { - x.reset(ServiceError::UnknownIndex)?; - }; - let instance_view = match instance.view() { - Some(x) => x, - None => x.reset(ServiceError::Upgrade2)?, - }; - instance_view.delete(pointer); - handler = x.leave()?; + ServerRpcHandle::Delete { handle, pointer, x } => { + handler = x.leave(worker.delete(handle, pointer))?; } - RpcHandle::Stat { handle, x } => { - let view = worker.view(); - let Some(instance) = view.get(handle) else { - x.reset(ServiceError::UnknownIndex)?; - }; - let r = instance.stat(); - handler = x.leave(r)? + ServerRpcHandle::Stat { handle, x } => { + handler = x.leave(worker.stat(handle))?; } - RpcHandle::Basic { + ServerRpcHandle::Basic { handle, vector, opts, x, } => { - use crate::ipc::server::BasicHandle::*; - let view = worker.view(); - let Some(instance) = view.get(handle) else { - x.reset(ServiceError::UnknownIndex)?; - }; - let view = match instance.view() { - Some(x) => x, - None => x.reset(ServiceError::Upgrade2)?, - }; - let mut it = match view.basic(&vector, &opts, |_| true) { + let v = match worker.basic_view(handle) { Ok(x) => x, - Err(e) => x.reset(e)?, + Err(e) => { + handler = x.error_err(e)?; + continue; + } }; - let mut x = x.error()?; - loop { - match x.handle()? { - Next { x: y } => { - x = y.leave(it.next())?; - } - Leave { x } => { - handler = x; - break; + match v.basic(&vector, &opts, |_| true) { + Ok(mut iter) => { + use crate::ipc::ServerBasicHandle; + let mut x = x.error_ok()?; + loop { + match x.handle()? { + ServerBasicHandle::Next { x: y } => { + x = y.leave(iter.next())?; + } + ServerBasicHandle::Leave { x } => { + handler = x; + break; + } + } } } - } + Err(e) => handler = x.error_err(e)?, + }; } - RpcHandle::Vbase { + ServerRpcHandle::Vbase { handle, vector, opts, x, } => { - use crate::ipc::server::VbaseHandle::*; - let view = worker.view(); - let Some(instance) = view.get(handle) else { - x.reset(ServiceError::UnknownIndex)?; - }; - let view = match instance.view() { - Some(x) => x, - None => x.reset(ServiceError::Upgrade2)?, - }; - let mut it = match view.vbase(&vector, &opts, |_| true) { + let v = match worker.vbase_view(handle) { Ok(x) => x, - Err(e) => x.reset(e)?, + Err(e) => { + handler = x.error_err(e)?; + continue; + } }; - let mut x = x.error()?; - loop { - match x.handle()? { - Next { x: y } => { - x = y.leave(it.next())?; - } - Leave { x } => { - handler = x; - break; + match v.vbase(&vector, &opts, |_| true) { + Ok(mut iter) => { + use crate::ipc::ServerVbaseHandle; + let mut x = x.error_ok()?; + loop { + match x.handle()? { + ServerVbaseHandle::Next { x: y } => { + x = y.leave(iter.next())?; + } + ServerVbaseHandle::Leave { x } => { + handler = x; + break; + } + } } } - } + Err(e) => handler = x.error_err(e)?, + }; } - RpcHandle::List { handle, x } => { - use crate::ipc::server::ListHandle::*; - let view = worker.view(); - let Some(instance) = view.get(handle) else { - x.reset(ServiceError::UnknownIndex)?; + ServerRpcHandle::List { handle, x } => { + let v = match worker.list_view(handle) { + Ok(x) => x, + Err(e) => { + handler = x.error_err(e)?; + continue; + } }; - let view = match instance.view() { - Some(x) => x, - None => x.reset(ServiceError::Upgrade2)?, - }; - let mut it = view.list(); - let mut x = x.error()?; - loop { - match x.handle()? { - Next { x: y } => { - x = y.leave(it.next())?; - } - Leave { x } => { - handler = x; - break; + match v.list() { + Ok(mut iter) => { + use crate::ipc::ServerListHandle; + let mut x = x.error_ok()?; + loop { + match x.handle()? { + ServerListHandle::Next { x: y } => { + x = y.leave(iter.next())?; + } + ServerListHandle::Leave { x } => { + handler = x; + break; + } + } } } - } - } - // admin - RpcHandle::Upgrade { x } => { - handler = x.leave()?; + Err(e) => handler = x.error_err(e)?, + }; } } } diff --git a/src/bgworker/upgrade.rs b/src/bgworker/upgrade.rs deleted file mode 100644 index 7accc5f..0000000 --- a/src/bgworker/upgrade.rs +++ /dev/null @@ -1,78 +0,0 @@ -use crate::ipc::server::RpcHandler; -use crate::ipc::ConnectionError; -use service::prelude::*; - -pub fn upgrade() { - std::thread::scope(|scope| { - scope.spawn({ - move || { - for rpc_handler in crate::ipc::listen_unix() { - std::thread::spawn({ - move || { - log::trace!("Session established."); - let _ = session(rpc_handler); - log::trace!("Session closed."); - } - }); - } - } - }); - scope.spawn({ - move || { - for rpc_handler in crate::ipc::listen_mmap() { - std::thread::spawn({ - move || { - log::trace!("Session established."); - let _ = session(rpc_handler); - log::trace!("Session closed."); - } - }); - } - } - }); - loop { - let mut sig: i32 = 0; - unsafe { - let mut set: libc::sigset_t = std::mem::zeroed(); - libc::sigemptyset(&mut set); - libc::sigaddset(&mut set, libc::SIGHUP); - libc::sigaddset(&mut set, libc::SIGTERM); - libc::sigwait(&set, &mut sig); - } - match sig { - libc::SIGHUP => { - std::process::exit(0); - } - libc::SIGTERM => { - std::process::exit(0); - } - _ => (), - } - } - }); -} - -fn session(handler: RpcHandler) -> Result<(), ConnectionError> { - use crate::ipc::server::RpcHandle; - let mut handler = handler; - loop { - match handler.handle()? { - RpcHandle::Drop { x, .. } => { - // false drop - handler = x.leave()?; - } - RpcHandle::Flush { x, .. } => x.reset(ServiceError::Upgrade)?, - RpcHandle::Create { x, .. } => x.reset(ServiceError::Upgrade)?, - RpcHandle::Insert { x, .. } => x.reset(ServiceError::Upgrade)?, - RpcHandle::Delete { x, .. } => x.reset(ServiceError::Upgrade)?, - RpcHandle::Stat { x, .. } => x.reset(ServiceError::Upgrade)?, - RpcHandle::Basic { x, .. } => x.reset(ServiceError::Upgrade)?, - RpcHandle::Vbase { x, .. } => x.reset(ServiceError::Upgrade)?, - RpcHandle::List { x, .. } => x.reset(ServiceError::Upgrade)?, - RpcHandle::Upgrade { x } => { - let _ = std::fs::remove_dir_all("./pg_vectors"); - handler = x.leave()?; - } - } - } -} diff --git a/src/datatype/casts_f32.rs b/src/datatype/casts_f32.rs index 5c0b6fc..d28f7d3 100644 --- a/src/datatype/casts_f32.rs +++ b/src/datatype/casts_f32.rs @@ -1,7 +1,7 @@ use crate::datatype::svecf32::{SVecf32, SVecf32Input, SVecf32Output}; use crate::datatype::vecf16::{Vecf16, Vecf16Input, Vecf16Output}; use crate::datatype::vecf32::{Vecf32, Vecf32Input, Vecf32Output}; -use crate::prelude::{FriendlyError, SessionError}; +use crate::prelude::check_value_dimensions; use service::prelude::*; #[pgrx::pg_extern(immutable, parallel_safe, strict)] @@ -10,9 +10,7 @@ fn _vectors_cast_array_to_vecf32( _typmod: i32, _explicit: bool, ) -> Vecf32Output { - if array.is_empty() || array.len() > 65535 { - SessionError::BadValueDimensions.friendly(); - } + check_value_dimensions(array.len()); let mut data = vec![F32::zero(); array.len()]; for (i, x) in array.iter().enumerate() { data[i] = F32(x.unwrap_or(f32::NAN)); diff --git a/src/datatype/operators_svecf32.rs b/src/datatype/operators_svecf32.rs index 0f7180f..88fa790 100644 --- a/src/datatype/operators_svecf32.rs +++ b/src/datatype/operators_svecf32.rs @@ -1,19 +1,11 @@ -use crate::{ - datatype::svecf32::{SVecf32, SVecf32Input, SVecf32Output}, - prelude::{FriendlyError, SessionError}, -}; +use crate::datatype::svecf32::{SVecf32, SVecf32Input, SVecf32Output}; +use crate::prelude::*; use service::prelude::*; use std::ops::Deref; #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_add(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> SVecf32Output { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); let size1 = lhs.len(); let size2 = rhs.len(); @@ -58,13 +50,7 @@ fn _vectors_svecf32_operator_add(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) - #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_minus(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> SVecf32Output { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); let size1 = lhs.len(); let size2 = rhs.len(); @@ -109,117 +95,54 @@ fn _vectors_svecf32_operator_minus(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_lt(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> bool { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); lhs.deref() < rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_lte(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> bool { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); lhs.deref() <= rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_gt(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> bool { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); lhs.deref() > rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_gte(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> bool { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); lhs.deref() >= rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_eq(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> bool { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); lhs.deref() == rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_neq(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> bool { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); lhs.deref() != rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_cosine(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> f32 { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); SparseF32Cos::distance(lhs.data(), rhs.data()).to_f32() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_dot(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> f32 { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); SparseF32Dot::distance(lhs.data(), rhs.data()).to_f32() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_svecf32_operator_l2(lhs: SVecf32Input<'_>, rhs: SVecf32Input<'_>) -> f32 { - if lhs.dims() != rhs.dims() { - SessionError::Unmatched { - left_dimensions: lhs.dims() as _, - right_dimensions: rhs.dims() as _, - } - .friendly(); - } - + check_matched_dimensions(lhs.dims() as _, rhs.dims() as _); SparseF32L2::distance(lhs.data(), rhs.data()).to_f32() } diff --git a/src/datatype/operators_vecf16.rs b/src/datatype/operators_vecf16.rs index 1f3e65a..c817f8e 100644 --- a/src/datatype/operators_vecf16.rs +++ b/src/datatype/operators_vecf16.rs @@ -5,14 +5,7 @@ use std::ops::Deref; #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_add(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> Vecf16Output { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } - let n = lhs.len(); + let n = check_matched_dimensions(lhs.len(), rhs.len()); let mut v = vec![F16::zero(); n]; for i in 0..n { v[i] = lhs[i] + rhs[i]; @@ -22,14 +15,7 @@ fn _vectors_vecf16_operator_add(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> V #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_minus(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> Vecf16Output { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } - let n = lhs.len(); + let n = check_matched_dimensions(lhs.len(), rhs.len()); let mut v = vec![F16::zero(); n]; for i in 0..n { v[i] = lhs[i] - rhs[i]; @@ -39,108 +25,54 @@ fn _vectors_vecf16_operator_minus(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_lt(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() < rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_lte(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() <= rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_gt(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() > rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_gte(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() >= rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_eq(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() == rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_neq(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() != rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_cosine(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> f32 { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); F16Cos::distance(&lhs, &rhs).to_f32() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_dot(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> f32 { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); F16Dot::distance(&lhs, &rhs).to_f32() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf16_operator_l2(lhs: Vecf16Input<'_>, rhs: Vecf16Input<'_>) -> f32 { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); F16L2::distance(&lhs, &rhs).to_f32() } diff --git a/src/datatype/operators_vecf32.rs b/src/datatype/operators_vecf32.rs index 098e92f..31c6360 100644 --- a/src/datatype/operators_vecf32.rs +++ b/src/datatype/operators_vecf32.rs @@ -5,14 +5,7 @@ use std::ops::Deref; #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_add(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> Vecf32Output { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } - let n = lhs.len(); + let n = check_matched_dimensions(lhs.len(), rhs.len()); let mut v = vec![F32::zero(); n]; for i in 0..n { v[i] = lhs[i] + rhs[i]; @@ -22,14 +15,7 @@ fn _vectors_vecf32_operator_add(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> V #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_minus(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> Vecf32Output { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } - let n = lhs.len(); + let n = check_matched_dimensions(lhs.len(), rhs.len()); let mut v = vec![F32::zero(); n]; for i in 0..n { v[i] = lhs[i] - rhs[i]; @@ -39,108 +25,54 @@ fn _vectors_vecf32_operator_minus(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_lt(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() < rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_lte(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() <= rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_gt(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() > rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_gte(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() >= rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_eq(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() == rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_neq(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> bool { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); lhs.deref() != rhs.deref() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_cosine(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> f32 { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); F32Cos::distance(&lhs, &rhs).to_f32() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_dot(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> f32 { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); F32Dot::distance(&lhs, &rhs).to_f32() } #[pgrx::pg_extern(immutable, parallel_safe)] fn _vectors_vecf32_operator_l2(lhs: Vecf32Input<'_>, rhs: Vecf32Input<'_>) -> f32 { - if lhs.len() != rhs.len() { - SessionError::Unmatched { - left_dimensions: lhs.len() as _, - right_dimensions: rhs.len() as _, - } - .friendly(); - } + check_matched_dimensions(lhs.len(), rhs.len()); F32L2::distance(&lhs, &rhs).to_f32() } diff --git a/src/datatype/svecf32.rs b/src/datatype/svecf32.rs index b882a7f..2f585d1 100644 --- a/src/datatype/svecf32.rs +++ b/src/datatype/svecf32.rs @@ -275,10 +275,7 @@ fn _vectors_svecf32_in(input: &CStr, _oid: Oid, _typmod: i32) -> SVecf32Output { if let Some(x) = option { x } else { - SessionError::BadLiteral { - hint: hint.to_string(), - } - .friendly() + bad_literal(hint); } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -312,7 +309,7 @@ fn _vectors_svecf32_in(input: &CStr, _oid: Oid, _typmod: i32) -> SVecf32Output { } index = match index.checked_add(1) { Some(x) => x, - None => SessionError::BadValueDimensions.friendly(), + None => check_value_dimensions(65536).get(), }; } (Reading, b']') => { @@ -324,31 +321,22 @@ fn _vectors_svecf32_in(input: &CStr, _oid: Oid, _typmod: i32) -> SVecf32Output { } index = match index.checked_add(1) { Some(x) => x, - None => SessionError::BadValueDimensions.friendly(), + None => check_value_dimensions(65536).get(), }; } state = MatchedRight; } (_, b' ') => {} _ => { - SessionError::BadLiteral { - hint: format!("Bad character with ascii {:#x}.", c), - } - .friendly(); + bad_literal(&format!("Bad character with ascii {:#x}.", c)); } } } if state != MatchedRight { - SessionError::BadLiteral { - hint: "Bad sequence.".to_string(), - } - .friendly(); - } - if index == 0 { - SessionError::BadValueDimensions.friendly(); + bad_literal("Bad sequence"); } SVecf32::new_in_postgres(SparseF32Ref { - dims: index, + dims: check_value_dimensions(index as usize).get(), indexes: &indexes, values: &values, }) @@ -616,15 +604,12 @@ fn _vectors_svecf32_recv(internal: pgrx::Internal, _oid: Oid, _typmod: i32) -> S if len > 1 { for i in 0..len as usize - 1 { if indexes[i] >= indexes[i + 1] { - SessionError::BadLiteral { - hint: "Indexes are not sorted or duplicated.".to_string(), - } - .friendly(); + pgrx::error!("data corruption is detected"); } } } if indexes[len as usize - 1] >= dims { - SessionError::BadValueDimensions.friendly(); + pgrx::error!("data corruption is detected"); } output.dims = dims; @@ -643,33 +628,24 @@ fn _vectors_svecf32_recv(internal: pgrx::Internal, _oid: Oid, _typmod: i32) -> S } #[pgrx::pg_extern(immutable, parallel_safe, strict)] -fn _vectors_svector_from_array( +fn _vectors_to_svector( dims: i32, index: pgrx::Array, value: pgrx::Array, ) -> SVecf32Output { - let dims: u16 = match dims.try_into() { - Ok(x) => x, - Err(_) => SessionError::BadValueDimensions.friendly(), - }; + let dims = check_value_dimensions(dims as usize); if index.len() != value.len() { - SessionError::BadLiteral { - hint: "Lengths of index and value are not matched.".to_string(), - } - .friendly(); + bad_literal("Lengths of index and value are not matched."); } if index.contains_nulls() || value.contains_nulls() { - SessionError::BadLiteral { - hint: "Index or value contains nulls.".to_string(), - } - .friendly(); + bad_literal("Index or value contains nulls."); } let mut vector: Vec<(u16, F32)> = index .iter_deny_null() .zip(value.iter_deny_null()) .map(|(index, value)| { - if index < 0 || index >= dims as i32 { - SessionError::BadValueDimensions.friendly(); + if index < 0 || index >= dims.get() as i32 { + bad_literal("Index out of bound."); } (index as u16, F32(value)) }) @@ -678,10 +654,7 @@ fn _vectors_svector_from_array( if vector.len() > 1 { for i in 0..vector.len() - 1 { if vector[i].0 == vector[i + 1].0 { - SessionError::BadLiteral { - hint: "Duplicated index.".to_string(), - } - .friendly(); + bad_literal("Duplicated index."); } } } @@ -693,7 +666,7 @@ fn _vectors_svector_from_array( values.push(x.1); } SVecf32::new_in_postgres(SparseF32Ref { - dims, + dims: dims.get(), indexes: &indexes, values: &values, }) diff --git a/src/datatype/typmod.rs b/src/datatype/typmod.rs index de48d21..ea88024 100644 --- a/src/datatype/typmod.rs +++ b/src/datatype/typmod.rs @@ -11,14 +11,6 @@ pub enum Typmod { } impl Typmod { - pub fn parse_from_str(s: &str) -> Option { - use Typmod::*; - if let Ok(x) = s.parse::() { - Some(Dims(x)) - } else { - None - } - } pub fn parse_from_i32(x: i32) -> Option { use Typmod::*; if x == -1 { @@ -43,11 +35,11 @@ impl Typmod { Dims(x) => i32::from(x.get()), } } - pub fn dims(self) -> Option { + pub fn dims(self) -> Option { use Typmod::*; match self { Any => None, - Dims(dims) => Some(dims.get()), + Dims(dims) => Some(dims), } } } @@ -58,12 +50,11 @@ fn _vectors_typmod_in(list: Array<&CStr>) -> i32 { -1 } else if list.len() == 1 { let s = list.get(0).unwrap().unwrap().to_str().unwrap(); - let typmod = Typmod::parse_from_str(s) - .ok_or(SessionError::BadTypeDimensions) - .friendly(); + let typmod = Typmod::Dims(check_type_dimensions(s.parse::().ok())); typmod.into_i32() } else { - SessionError::BadTypeDimensions.friendly(); + check_type_dimensions(None); + unreachable!() } } diff --git a/src/datatype/vecf16.rs b/src/datatype/vecf16.rs index df43c0c..24e3bfd 100644 --- a/src/datatype/vecf16.rs +++ b/src/datatype/vecf16.rs @@ -268,19 +268,18 @@ unsafe impl SqlTranslatable for Vecf16Output { #[pgrx::pg_extern(immutable, parallel_safe, strict)] fn _vectors_vecf16_in(input: &CStr, _oid: Oid, typmod: i32) -> Vecf16Output { use crate::utils::parse::parse_vector; - let reserve = Typmod::parse_from_i32(typmod).unwrap().dims().unwrap_or(0); + let reserve = Typmod::parse_from_i32(typmod) + .unwrap() + .dims() + .map(|x| x.get()) + .unwrap_or(0); let v = parse_vector(input.to_bytes(), reserve as usize, |s| s.parse().ok()); match v { Err(e) => { - SessionError::BadLiteral { - hint: e.to_string(), - } - .friendly(); + bad_literal(&e.to_string()); } Ok(vector) => { - if vector.is_empty() || vector.len() > 65535 { - SessionError::BadValueDimensions.friendly(); - } + check_value_dimensions(vector.len()); Vecf16::new_in_postgres(&vector) } } diff --git a/src/datatype/vecf32.rs b/src/datatype/vecf32.rs index c910bca..dd59882 100644 --- a/src/datatype/vecf32.rs +++ b/src/datatype/vecf32.rs @@ -268,19 +268,18 @@ unsafe impl SqlTranslatable for Vecf32Output { #[pgrx::pg_extern(immutable, parallel_safe, strict)] fn _vectors_vecf32_in(input: &CStr, _oid: Oid, typmod: i32) -> Vecf32Output { use crate::utils::parse::parse_vector; - let reserve = Typmod::parse_from_i32(typmod).unwrap().dims().unwrap_or(0); + let reserve = Typmod::parse_from_i32(typmod) + .unwrap() + .dims() + .map(|x| x.get()) + .unwrap_or(0); let v = parse_vector(input.to_bytes(), reserve as usize, |s| s.parse().ok()); match v { Err(e) => { - SessionError::BadLiteral { - hint: e.to_string(), - } - .friendly(); + bad_literal(&e.to_string()); } Ok(vector) => { - if vector.is_empty() || vector.len() > 65535 { - SessionError::BadValueDimensions.friendly(); - } + check_value_dimensions(vector.len()); Vecf32::new_in_postgres(&vector) } } diff --git a/src/gucs/executing.rs b/src/gucs/executing.rs index 0708d26..08d8748 100644 --- a/src/gucs/executing.rs +++ b/src/gucs/executing.rs @@ -1,6 +1,5 @@ use pgrx::{GucContext, GucFlags, GucRegistry, GucSetting}; use service::index::SearchOptions; -use validator::Validate; static ENABLE_PREFILTER: GucSetting = GucSetting::::new(true); @@ -40,11 +39,9 @@ pub unsafe fn init() { } pub fn search_options() -> SearchOptions { - let options = SearchOptions { + SearchOptions { prefilter_enable: ENABLE_PREFILTER.get(), hnsw_ef_search: HNSW_EF_SEARCH.get() as usize, ivf_nprobe: IVF_NPROBE.get() as u32, - }; - assert!(options.validate().is_ok()); - options + } } diff --git a/src/index/am_build.rs b/src/index/am_build.rs index fb39f9c..38652c3 100644 --- a/src/index/am_build.rs +++ b/src/index/am_build.rs @@ -1,14 +1,14 @@ #![allow(unsafe_op_in_unsafe_fn)] +use crate::index::am_setup::options; use crate::index::utils::from_datum; -use crate::ipc::client::ClientGuard; +use crate::ipc::ClientRpc; use crate::prelude::*; -use crate::{index::am_setup::options, ipc::client::Rpc}; use pgrx::pg_sys::{IndexBuildResult, IndexInfo, RelationData}; use service::prelude::*; pub struct Builder { - pub rpc: ClientGuard, + pub rpc: ClientRpc, pub heap_relation: *mut RelationData, pub index_info: *mut IndexInfo, pub result: *mut IndexBuildResult, @@ -24,8 +24,14 @@ pub unsafe fn build( let oid = (*index).rd_locator.relNumber; let id = Handle::from_sys(oid); let options = options(index); - let mut rpc = crate::ipc::client::borrow_mut(); - rpc.create(id, options); + let mut rpc = check_client(crate::ipc::client()); + match rpc.create(id, options) { + Ok(()) => (), + Err(CreateError::Exist) => bad_service_exists(), + Err(CreateError::InvalidIndexOptions { reason }) => { + bad_service_invalid_index_options(&reason) + } + } if let Some((heap_relation, index_info, result)) = data { let mut builder = Builder { rpc, @@ -60,7 +66,12 @@ unsafe extern "C" fn callback( let state = &mut *(state as *mut Builder); let vector = from_datum(*values.add(0)); let pointer = Pointer::from_sys(*ctid); - state.rpc.insert(id, vector, pointer); + match state.rpc.insert(id, vector, pointer) { + Ok(()) => (), + Err(InsertError::NotExist) => bad_service_not_exist(), + Err(InsertError::Upgrade) => bad_service_upgrade(), + Err(InsertError::InvalidVector) => bad_service_invalid_vector(), + } (*state.result).heap_tuples += 1.0; (*state.result).index_tuples += 1.0; } diff --git a/src/index/am_scan.rs b/src/index/am_scan.rs index 5ad1ad6..6bbf097 100644 --- a/src/index/am_scan.rs +++ b/src/index/am_scan.rs @@ -4,8 +4,7 @@ use crate::gucs::executing::search_options; use crate::gucs::planning::Mode; use crate::gucs::planning::SEARCH_MODE; use crate::index::utils::from_datum; -use crate::ipc::client::ClientGuard; -use crate::ipc::client::{Basic, Vbase}; +use crate::ipc::{ClientBasic, ClientVbase}; use crate::prelude::*; use pgrx::FromDatum; use service::prelude::*; @@ -17,11 +16,11 @@ pub enum Scanner { }, Basic { node: *mut pgrx::pg_sys::IndexScanState, - basic: ClientGuard, + basic: ClientBasic, }, Vbase { node: *mut pgrx::pg_sys::IndexScanState, - vbase: ClientGuard, + vbase: ClientVbase, }, } @@ -97,17 +96,29 @@ pub unsafe fn next_scan(scan: pgrx::pg_sys::IndexScanDesc) -> bool { let oid = (*(*scan).indexRelation).rd_locator.relNumber; let id = Handle::from_sys(oid); - let rpc = crate::ipc::client::borrow_mut(); + let rpc = check_client(crate::ipc::client()); match SEARCH_MODE.get() { Mode::basic => { let opts = search_options(); - let basic = rpc.basic(id, vector.clone(), opts); + let basic = match rpc.basic(id, vector.clone(), opts) { + Ok(x) => x, + Err((_, BasicError::NotExist)) => bad_service_not_exist(), + Err((_, BasicError::Upgrade)) => bad_service_upgrade(), + Err((_, BasicError::InvalidVector)) => bad_service_invalid_vector(), + Err((_, BasicError::InvalidSearchOptions { reason: _ })) => unreachable!(), + }; *scanner = Scanner::Basic { node, basic }; } Mode::vbase => { let opts = search_options(); - let vbase = rpc.vbase(id, vector.clone(), opts); + let vbase = match rpc.vbase(id, vector.clone(), opts) { + Ok(x) => x, + Err((_, VbaseError::NotExist)) => bad_service_not_exist(), + Err((_, VbaseError::Upgrade)) => bad_service_upgrade(), + Err((_, VbaseError::InvalidVector)) => bad_service_invalid_vector(), + Err((_, VbaseError::InvalidSearchOptions { reason: _ })) => unreachable!(), + }; *scanner = Scanner::Vbase { node, vbase }; } } diff --git a/src/index/am_setup.rs b/src/index/am_setup.rs index e084940..6e1b1bf 100644 --- a/src/index/am_setup.rs +++ b/src/index/am_setup.rs @@ -74,7 +74,7 @@ pub unsafe fn convert_opfamily_to_distance(opfamily: pgrx::pg_sys::Oid) -> (Dist } else if operator == regoperatorin("vectors.<=>(vectors.svector,vectors.svector)") { result = (Distance::Cos, Kind::SparseF32); } else { - SessionError::BadOptions2.friendly(); + bad_opclass(); }; pgrx::pg_sys::ReleaseCatCacheList(list); pgrx::pg_sys::ReleaseSysCache(tuple); @@ -91,7 +91,7 @@ pub unsafe fn options(index_relation: pgrx::pg_sys::Relation) -> IndexOptions { let attrs = (*(*index_relation).rd_att).attrs.as_slice(1); let attr = &attrs[0]; let typmod = Typmod::parse_from_i32(attr.type_mod()).unwrap(); - let dims = typmod.dims().ok_or(SessionError::BadOption1).friendly(); + let dims = check_column_dimensions(typmod.dims()).get(); // get other options let parsed = get_parsed_from_varlena((*index_relation).rd_options); IndexOptions { diff --git a/src/index/am_update.rs b/src/index/am_update.rs index 04647ac..638db50 100644 --- a/src/index/am_update.rs +++ b/src/index/am_update.rs @@ -6,18 +6,32 @@ pub fn update_insert(handle: Handle, vector: DynamicVector, tid: pgrx::pg_sys::I callback_dirty(handle); let pointer = Pointer::from_sys(tid); - let mut rpc = crate::ipc::client::borrow_mut(); - rpc.insert(handle, vector, pointer); + let mut rpc = check_client(crate::ipc::client()); + + match rpc.insert(handle, vector, pointer) { + Ok(()) => (), + Err(InsertError::NotExist) => bad_service_not_exist(), + Err(InsertError::Upgrade) => bad_service_upgrade(), + Err(InsertError::InvalidVector) => bad_service_invalid_vector(), + } } pub fn update_delete(handle: Handle, f: impl Fn(Pointer) -> bool) { callback_dirty(handle); - let mut rpc_list = crate::ipc::client::borrow_mut().list(handle); - let mut rpc = crate::ipc::client::borrow_mut(); + let mut rpc_list = match check_client(crate::ipc::client()).list(handle) { + Ok(x) => x, + Err((_, ListError::NotExist)) => bad_service_not_exist(), + Err((_, ListError::Upgrade)) => bad_service_upgrade(), + }; + let mut rpc = check_client(crate::ipc::client()); while let Some(p) = rpc_list.next() { if f(p) { - rpc.delete(handle, p); + match rpc.delete(handle, p) { + Ok(()) => (), + Err(DeleteError::NotExist) => (), + Err(DeleteError::Upgrade) => (), + } } } rpc_list.leave(); diff --git a/src/index/functions.rs b/src/index/functions.rs index 8f88132..bc88556 100644 --- a/src/index/functions.rs +++ b/src/index/functions.rs @@ -1,8 +1,4 @@ -use crate::ipc::client; - -#[pgrx::pg_extern(immutable, parallel_safe, strict)] +#[pgrx::pg_extern(volatile, strict)] fn _vectors_pgvectors_upgrade() { - let mut client = client::borrow_mut(); - client.upgrade(); - pgrx::warning!("pgvecto.rs is upgraded. Restart PostgreSQL to take effects."); + let _ = std::fs::remove_dir_all("pg_vectors"); } diff --git a/src/index/hook_transaction.rs b/src/index/hook_transaction.rs index ab9cc28..530fd6a 100644 --- a/src/index/hook_transaction.rs +++ b/src/index/hook_transaction.rs @@ -16,12 +16,14 @@ pub fn commit() { if pending_deletes.is_empty() && pending_dirty.is_empty() { return; } - let mut rpc = crate::ipc::client::borrow_mut(); + let Some(mut rpc) = crate::ipc::client() else { + return; + }; for handle in pending_dirty { - rpc.flush(handle); + let _ = rpc.flush(handle); } for handle in pending_deletes { - rpc.drop(handle); + let _ = rpc.drop(handle); } } @@ -31,9 +33,11 @@ pub fn abort() { if pending_deletes.is_empty() { return; } - let mut rpc = crate::ipc::client::borrow_mut(); + let Some(mut rpc) = crate::ipc::client() else { + return; + }; for handle in pending_deletes { - rpc.drop(handle); + let _ = rpc.drop(handle); } } diff --git a/src/index/views.rs b/src/index/views.rs index 384c826..5b2d316 100644 --- a/src/index/views.rs +++ b/src/index/views.rs @@ -9,14 +9,14 @@ fn _vectors_index_stat( use service::index::IndexStat; let id = Handle::from_sys(oid); let mut res = PgHeapTuple::new_composite_type("vectors.vector_index_stat").unwrap(); - let mut rpc = crate::ipc::client::borrow_mut(); + let mut rpc = check_client(crate::ipc::client()); let stat = rpc.stat(id); match stat { - IndexStat::Normal { + Ok(IndexStat { indexing, options, segments, - } => { + }) => { res.set_by_name("idx_status", "NORMAL").unwrap(); res.set_by_name("idx_indexing", indexing).unwrap(); res.set_by_name( @@ -60,7 +60,10 @@ fn _vectors_index_stat( .unwrap(); res } - IndexStat::Upgrade => { + Err(StatError::NotExist) => { + bad_service_not_exist(); + } + Err(StatError::Upgrade) => { res.set_by_name("idx_status", "UPGRADE").unwrap(); res } diff --git a/src/ipc/client/mod.rs b/src/ipc/client/mod.rs deleted file mode 100644 index c1841fd..0000000 --- a/src/ipc/client/mod.rs +++ /dev/null @@ -1,269 +0,0 @@ -use super::packet::*; -use super::transport::ClientSocket; -use crate::gucs::internal::{Transport, TRANSPORT}; -use crate::prelude::*; -use crate::utils::cells::PgRefCell; -use service::index::IndexOptions; -use service::index::IndexStat; -use service::index::SearchOptions; -use service::prelude::*; -use std::mem::ManuallyDrop; -use std::ops::Deref; -use std::ops::DerefMut; - -pub trait ClientLike: 'static { - fn from_socket(socket: ClientSocket) -> Self; - fn to_socket(self) -> ClientSocket; -} - -pub struct ClientGuard(pub ManuallyDrop); - -impl ClientGuard { - fn map(mut self) -> ClientGuard { - unsafe { - let t = ManuallyDrop::take(&mut self.0); - std::mem::forget(self); - ClientGuard::new(U::from_socket(t.to_socket())) - } - } -} - -impl Deref for ClientGuard { - type Target = T; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for ClientGuard { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -pub struct Rpc { - socket: ClientSocket, -} - -impl Rpc { - pub fn new(socket: ClientSocket) -> Self { - Self { socket } - } -} - -impl ClientGuard { - pub fn flush(&mut self, handle: Handle) { - let packet = RpcPacket::Flush { handle }; - self.socket.ok(packet).friendly(); - let flush::FlushPacket::Leave {} = self.socket.recv().friendly(); - } - pub fn drop(&mut self, handle: Handle) { - let packet = RpcPacket::Drop { handle }; - self.socket.ok(packet).friendly(); - let drop::DropPacket::Leave {} = self.socket.recv().friendly(); - } - pub fn create(&mut self, handle: Handle, options: IndexOptions) { - let packet = RpcPacket::Create { handle, options }; - self.socket.ok(packet).friendly(); - let create::CreatePacket::Leave {} = self.socket.recv().friendly(); - } - pub fn basic( - mut self, - handle: Handle, - vector: DynamicVector, - opts: SearchOptions, - ) -> ClientGuard { - let packet = RpcPacket::Basic { - handle, - vector, - opts, - }; - self.socket.ok(packet).friendly(); - let basic::BasicErrorPacket {} = self.socket.recv().friendly(); - ClientGuard::map(self) - } - pub fn delete(&mut self, handle: Handle, pointer: Pointer) { - let packet = RpcPacket::Delete { handle, pointer }; - self.socket.ok(packet).friendly(); - let delete::DeletePacket::Leave {} = self.socket.recv().friendly(); - } - pub fn insert(&mut self, handle: Handle, vector: DynamicVector, pointer: Pointer) { - let packet = RpcPacket::Insert { - handle, - vector, - pointer, - }; - self.socket.ok(packet).friendly(); - let insert::InsertPacket::Leave {} = self.socket.recv().friendly(); - } - pub fn stat(&mut self, handle: Handle) -> IndexStat { - let packet = RpcPacket::Stat { handle }; - self.socket.ok(packet).friendly(); - let stat::StatPacket::Leave { result } = self.socket.recv().friendly(); - result - } - pub fn vbase( - mut self, - handle: Handle, - vector: DynamicVector, - opts: SearchOptions, - ) -> ClientGuard { - let packet = RpcPacket::Vbase { - handle, - vector, - opts, - }; - self.socket.ok(packet).friendly(); - let vbase::VbaseErrorPacket {} = self.socket.recv().friendly(); - ClientGuard::map(self) - } - pub fn list(mut self, handle: Handle) -> ClientGuard { - let packet = RpcPacket::List { handle }; - self.socket.ok(packet).friendly(); - let list::ListErrorPacket {} = self.socket.recv().friendly(); - ClientGuard::map(self) - } - pub fn upgrade(&mut self) { - let packet = RpcPacket::Upgrade {}; - self.socket.ok(packet).friendly(); - let upgrade::UpgradePacket::Leave {} = self.socket.recv().friendly(); - } -} - -impl ClientLike for Rpc { - fn from_socket(socket: ClientSocket) -> Self { - Self { socket } - } - - fn to_socket(self) -> ClientSocket { - self.socket - } -} - -pub struct Vbase { - socket: ClientSocket, -} - -impl Vbase { - pub fn next(&mut self) -> Option { - let packet = vbase::VbasePacket::Next {}; - self.socket.ok(packet).friendly(); - let vbase::VbaseNextPacket { p } = self.socket.recv().friendly(); - p - } -} - -impl ClientGuard { - pub fn leave(mut self) -> ClientGuard { - let packet = vbase::VbasePacket::Leave {}; - self.socket.ok(packet).friendly(); - let vbase::VbaseLeavePacket {} = self.socket.recv().friendly(); - ClientGuard::map(self) - } -} - -impl ClientLike for Vbase { - fn from_socket(socket: ClientSocket) -> Self { - Self { socket } - } - - fn to_socket(self) -> ClientSocket { - self.socket - } -} - -pub struct Basic { - socket: ClientSocket, -} - -impl Basic { - pub fn next(&mut self) -> Option { - let packet = basic::BasicPacket::Next {}; - self.socket.ok(packet).friendly(); - let basic::BasicNextPacket { p } = self.socket.recv().friendly(); - p - } -} - -impl ClientGuard { - pub fn leave(mut self) -> ClientGuard { - let packet = basic::BasicPacket::Leave {}; - self.socket.ok(packet).friendly(); - let basic::BasicLeavePacket {} = self.socket.recv().friendly(); - ClientGuard::map(self) - } -} - -impl ClientLike for Basic { - fn from_socket(socket: ClientSocket) -> Self { - Self { socket } - } - - fn to_socket(self) -> ClientSocket { - self.socket - } -} - -pub struct List { - socket: ClientSocket, -} - -impl List { - pub fn next(&mut self) -> Option { - let packet = list::ListPacket::Next {}; - self.socket.ok(packet).friendly(); - let list::ListNextPacket { p } = self.socket.recv().friendly(); - p - } -} - -impl ClientGuard { - pub fn leave(mut self) -> ClientGuard { - let packet = list::ListPacket::Leave {}; - self.socket.ok(packet).friendly(); - let list::ListLeavePacket {} = self.socket.recv().friendly(); - ClientGuard::map(self) - } -} - -impl ClientLike for List { - fn from_socket(socket: ClientSocket) -> Self { - Self { socket } - } - - fn to_socket(self) -> ClientSocket { - self.socket - } -} - -static CLIENTS: PgRefCell> = unsafe { PgRefCell::new(Vec::new()) }; - -pub fn borrow_mut() -> ClientGuard { - let mut x = CLIENTS.borrow_mut(); - if let Some(socket) = x.pop() { - return ClientGuard::new(Rpc::new(socket)); - } - let socket = match TRANSPORT.get() { - Transport::unix => crate::ipc::connect_unix(), - Transport::mmap => crate::ipc::connect_mmap(), - }; - ClientGuard::new(Rpc::new(socket)) -} - -impl ClientGuard { - pub fn new(t: T) -> Self { - Self(ManuallyDrop::new(t)) - } -} - -impl Drop for ClientGuard { - fn drop(&mut self) { - let socket = unsafe { ManuallyDrop::take(&mut self.0).to_socket() }; - if !std::thread::panicking() && std::any::TypeId::of::() == std::any::TypeId::of::() - { - let mut x = CLIENTS.borrow_mut(); - x.push(socket); - } - } -} diff --git a/src/ipc/mod.rs b/src/ipc/mod.rs index 104ec9c..5c9d46c 100644 --- a/src/ipc/mod.rs +++ b/src/ipc/mod.rs @@ -1,55 +1,43 @@ -pub mod client; -mod packet; -pub mod server; pub mod transport; -use self::server::RpcHandler; +use self::transport::ClientSocket; +use self::transport::ServerSocket; +use crate::gucs::internal::{Transport, TRANSPORT}; +use crate::ipc::transport::Packet; use crate::prelude::*; +use crate::utils::cells::PgRefCell; use serde::{Deserialize, Serialize}; -use service::prelude::ServiceError; -use thiserror::Error; +use service::index::IndexOptions; +use service::index::IndexStat; +use service::index::SearchOptions; +use service::prelude::*; -#[derive(Debug, Clone, Error)] +#[derive(Debug, Clone)] pub enum ConnectionError { - #[error("\ -IPC connection is closed unexpected. -ADVICE: The error is raisen by background worker errors. \ -Please check the full PostgreSQL log to get more information. Please read `https://docs.pgvecto.rs/admin/configuration.html`.\ -")] - Unexpected, - #[error(transparent)] - Service(#[from] ServiceError), - #[error(transparent)] - Grace(#[from] GraceError), + ClosedConnection, + BadSerialization, + BadDeserialization, } -impl FriendlyError for ConnectionError {} - -#[derive(Debug, Clone, Error, Serialize, Deserialize)] -#[error("Client performs a graceful shutdown.")] -pub struct GraceError; - -impl FriendlyError for GraceError {} - -pub fn listen_unix() -> impl Iterator { +pub fn listen_unix() -> impl Iterator { std::iter::from_fn(move || { let socket = self::transport::ServerSocket::Unix(self::transport::unix::accept()); - Some(self::server::RpcHandler::new(socket)) + Some(self::ServerRpcHandler::new(socket)) }) } -pub fn listen_mmap() -> impl Iterator { +pub fn listen_mmap() -> impl Iterator { std::iter::from_fn(move || { let socket = self::transport::ServerSocket::Mmap(self::transport::mmap::accept()); - Some(self::server::RpcHandler::new(socket)) + Some(self::ServerRpcHandler::new(socket)) }) } -pub fn connect_unix() -> self::transport::ClientSocket { +pub fn connect_unix() -> ClientSocket { self::transport::ClientSocket::Unix(self::transport::unix::connect()) } -pub fn connect_mmap() -> self::transport::ClientSocket { +pub fn connect_mmap() -> ClientSocket { self::transport::ClientSocket::Mmap(self::transport::mmap::connect()) } @@ -57,3 +45,288 @@ pub fn init() { self::transport::mmap::init(); self::transport::unix::init(); } + +impl Drop for ClientRpc { + fn drop(&mut self) { + let socket = self.socket.take(); + if let Some(socket) = socket { + if !std::thread::panicking() { + let mut x = CLIENTS.borrow_mut(); + x.push(socket); + } + } + } +} + +pub struct ClientRpc { + pub socket: Option, +} + +impl ClientRpc { + fn new(socket: ClientSocket) -> Self { + Self { + socket: Some(socket), + } + } + fn _ok(&mut self, packet: U) -> Result<(), ConnectionError> { + self.socket.as_mut().unwrap().ok(packet) + } + fn _recv(&mut self) -> Result { + self.socket.as_mut().unwrap().recv() + } +} + +static CLIENTS: PgRefCell> = unsafe { PgRefCell::new(Vec::new()) }; + +pub fn client() -> Option { + if !crate::bgworker::is_started() { + return None; + } + let mut x = CLIENTS.borrow_mut(); + if let Some(socket) = x.pop() { + return Some(ClientRpc::new(socket)); + } + let socket = match TRANSPORT.get() { + Transport::unix => connect_unix(), + Transport::mmap => connect_mmap(), + }; + Some(ClientRpc::new(socket)) +} + +pub struct ServerRpcHandler { + socket: ServerSocket, +} + +impl ServerRpcHandler { + pub(super) fn new(socket: ServerSocket) -> Self { + Self { socket } + } +} + +macro_rules! define_packets { + (unary $name:ident($($p_name:ident: $p_ty:ty),*) -> $r:ty;) => { + paste::paste! { + #[derive(Debug, Serialize, Deserialize)] + pub struct [] { + pub result: Result<$r, [< $name:camel Error >]>, + } + } + }; + (stream $name:ident($($p_name:ident: $p_ty:ty),*) -> $r:ty;) => { + paste::paste! { + #[derive(Debug, Serialize, Deserialize)] + pub struct [] { + pub result: Result<(), [< $name:camel Error >]>, + } + + #[derive(Debug, Serialize, Deserialize)] + pub enum [] { + Next {}, + Leave {}, + } + + #[derive(Debug, Serialize, Deserialize)] + pub struct [] { + pub p: Option<$r>, + } + + #[derive(Debug, Serialize, Deserialize)] + pub struct [] {} + } + }; +} + +macro_rules! define_client_stuffs { + (unary $name:ident($($p_name:ident:$p_ty:ty),*) -> $r:ty;) => { + paste::paste! { + impl ClientRpc { + pub fn $name(&mut self, $($p_name:$p_ty),*) -> Result<$r, [< $name:camel Error >]> { + let packet = PacketRpc::[< $name:camel >] { $($p_name),* }; + check_connection(self._ok(packet)); + let [] { result } = check_connection(self._recv()); + result + } + } + } + }; + (stream $name:ident($($p_name:ident:$p_ty:ty),*) -> $r:ty;) => { + paste::paste! { + impl ClientRpc { + pub fn $name(mut self, $($p_name:$p_ty),*) -> Result<[], (Self, [< $name:camel Error >])> { + let packet = PacketRpc::[<$name:camel>] { $($p_name),* }; + check_connection(self._ok(packet)); + let [] { result } = check_connection(self._recv()); + if let Err(e) = result { + Err((self, e)) + } else { + Ok([] { + socket: self.socket.take() + }) + } + } + } + + pub struct [] { + socket: Option, + } + + impl [] { + fn _ok(&mut self, packet: U) -> Result<(), ConnectionError> { + self.socket.as_mut().unwrap().ok(packet) + } + fn _recv(&mut self) -> Result { + self.socket.as_mut().unwrap().recv() + } + } + + impl [] { + pub fn next(&mut self) -> Option<$r> { + let packet = []::Next {}; + check_connection(self._ok(packet)); + let [] { p } = check_connection(self._recv()); + p + } + pub fn leave(mut self) -> ClientRpc { + let packet = []::Leave {}; + check_connection(self._ok(packet)); + let [] {} = check_connection(self._recv()); + ClientRpc { socket: self.socket.take() } + } + } + } + }; +} + +macro_rules! define_server_stuffs { + (unary $name:ident($($p_name:ident:$p_ty:ty),*) -> $r:ty;) => { + paste::paste! { + pub struct [] { + socket: ServerSocket, + } + + impl [] { + pub fn leave(mut self, result: Result<$r, [<$name:camel Error>]>) -> Result { + let packet = [] { result }; + self.socket.ok(packet)?; + Ok(ServerRpcHandler { + socket: self.socket, + }) + } + } + } + }; + (stream $name:ident($($p_name:ident:$p_ty:ty),*) -> $r:ty;) => { + paste::paste! { + pub struct [] { + socket: ServerSocket, + } + + impl [] { + pub fn error_ok(mut self) -> Result<[], ConnectionError> { + self.socket.ok([] { result: Ok(()) })?; + Ok([] { + socket: self.socket, + }) + } + pub fn error_err(mut self, err: [<$name:camel Error>]) -> Result { + self.socket.ok([] { result: Err(err) })?; + Ok(ServerRpcHandler { + socket: self.socket, + }) + } + } + + pub struct [] { + socket: ServerSocket, + } + + impl [] { + pub fn handle(mut self) -> Result<[], ConnectionError> { + Ok(match self.socket.recv::<[]>()? { + []::Next {} => []::Next { + x: [] { + socket: self.socket, + }, + }, + []::Leave {} => { + self.socket.ok([] {})?; + []::Leave { + x: ServerRpcHandler { + socket: self.socket, + }, + } + } + }) + } + } + + pub enum [] { + Next { x: [] }, + Leave { x: ServerRpcHandler }, + } + + pub struct [] { + socket: ServerSocket, + } + + impl [] { + pub fn leave(mut self, p: Option<$r>) -> Result<[], ConnectionError> { + let packet = [] { p }; + self.socket.ok(packet)?; + Ok([] { + socket: self.socket, + }) + } + } + } + }; +} + +macro_rules! defines { + ( + $($kind:ident $name:ident($($p_name:ident:$p_ty:ty),*) -> $r:ty;)* + ) => { + $(define_packets!($kind $name($($p_name:$p_ty),*) -> $r;);)* + $(define_client_stuffs!($kind $name($($p_name:$p_ty),*) -> $r;);)* + $(define_server_stuffs!($kind $name($($p_name:$p_ty),*) -> $r;);)* + + paste::paste! { + #[derive(Debug, Serialize, Deserialize)] + pub enum PacketRpc { + $([<$name:camel>]{$($p_name:$p_ty),*},)* + } + + impl ServerRpcHandler { + pub fn handle(mut self) -> Result { + Ok(match self.socket.recv::()? { + $(PacketRpc::[<$name:camel>] { $($p_name),* } => ServerRpcHandle::[<$name:camel>] { + $($p_name),*, + x: [] { + socket: self.socket, + }, + },)* + }) + } + } + + pub enum ServerRpcHandle { + $([<$name:camel>] { + $($p_name:$p_ty),*, + x: [< Server $name:camel >], + }),* + } + } + }; +} + +defines! { + unary create(handle: Handle, options: IndexOptions) -> (); + unary drop(handle: Handle) -> (); + unary flush(handle: Handle) -> (); + unary insert(handle: Handle, vector: DynamicVector, pointer: Pointer) -> (); + unary delete(handle: Handle, pointer: Pointer) -> (); + stream basic(handle: Handle, vector: DynamicVector, opts: SearchOptions) -> Pointer; + stream vbase(handle: Handle, vector: DynamicVector, opts: SearchOptions) -> Pointer; + stream list(handle: Handle) -> Pointer; + unary stat(handle: Handle) -> IndexStat; +} diff --git a/src/ipc/packet/basic.rs b/src/ipc/packet/basic.rs deleted file mode 100644 index 3cb4781..0000000 --- a/src/ipc/packet/basic.rs +++ /dev/null @@ -1,19 +0,0 @@ -use serde::{Deserialize, Serialize}; -use service::prelude::*; - -#[derive(Debug, Serialize, Deserialize)] -pub struct BasicErrorPacket {} - -#[derive(Debug, Serialize, Deserialize)] -pub enum BasicPacket { - Next {}, - Leave {}, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct BasicNextPacket { - pub p: Option, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct BasicLeavePacket {} diff --git a/src/ipc/packet/create.rs b/src/ipc/packet/create.rs deleted file mode 100644 index edb1afd..0000000 --- a/src/ipc/packet/create.rs +++ /dev/null @@ -1,6 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize)] -pub enum CreatePacket { - Leave {}, -} diff --git a/src/ipc/packet/delete.rs b/src/ipc/packet/delete.rs deleted file mode 100644 index f950a4c..0000000 --- a/src/ipc/packet/delete.rs +++ /dev/null @@ -1,6 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize)] -pub enum DeletePacket { - Leave {}, -} diff --git a/src/ipc/packet/drop.rs b/src/ipc/packet/drop.rs deleted file mode 100644 index eb6ef0d..0000000 --- a/src/ipc/packet/drop.rs +++ /dev/null @@ -1,6 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize)] -pub enum DropPacket { - Leave {}, -} diff --git a/src/ipc/packet/flush.rs b/src/ipc/packet/flush.rs deleted file mode 100644 index f39543d..0000000 --- a/src/ipc/packet/flush.rs +++ /dev/null @@ -1,6 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize)] -pub enum FlushPacket { - Leave {}, -} diff --git a/src/ipc/packet/insert.rs b/src/ipc/packet/insert.rs deleted file mode 100644 index b056d6c..0000000 --- a/src/ipc/packet/insert.rs +++ /dev/null @@ -1,6 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize)] -pub enum InsertPacket { - Leave {}, -} diff --git a/src/ipc/packet/list.rs b/src/ipc/packet/list.rs deleted file mode 100644 index 093ed5c..0000000 --- a/src/ipc/packet/list.rs +++ /dev/null @@ -1,19 +0,0 @@ -use serde::{Deserialize, Serialize}; -use service::prelude::*; - -#[derive(Debug, Serialize, Deserialize)] -pub struct ListErrorPacket {} - -#[derive(Debug, Serialize, Deserialize)] -pub enum ListPacket { - Next {}, - Leave {}, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct ListNextPacket { - pub p: Option, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct ListLeavePacket {} diff --git a/src/ipc/packet/mod.rs b/src/ipc/packet/mod.rs deleted file mode 100644 index d9cb6a0..0000000 --- a/src/ipc/packet/mod.rs +++ /dev/null @@ -1,58 +0,0 @@ -pub mod basic; -pub mod create; -pub mod delete; -pub mod drop; -pub mod flush; -pub mod insert; -pub mod list; -pub mod stat; -pub mod upgrade; -pub mod vbase; - -use serde::{Deserialize, Serialize}; -use service::index::IndexOptions; -use service::index::SearchOptions; -use service::prelude::*; - -#[derive(Debug, Serialize, Deserialize)] -pub enum RpcPacket { - // transaction - Flush { - handle: Handle, - }, - Drop { - handle: Handle, - }, - Create { - handle: Handle, - options: IndexOptions, - }, - // instance - Insert { - handle: Handle, - vector: DynamicVector, - pointer: Pointer, - }, - Delete { - handle: Handle, - pointer: Pointer, - }, - Stat { - handle: Handle, - }, - Basic { - handle: Handle, - vector: DynamicVector, - opts: SearchOptions, - }, - Vbase { - handle: Handle, - vector: DynamicVector, - opts: SearchOptions, - }, - List { - handle: Handle, - }, - // admin - Upgrade {}, -} diff --git a/src/ipc/packet/stat.rs b/src/ipc/packet/stat.rs deleted file mode 100644 index 2366021..0000000 --- a/src/ipc/packet/stat.rs +++ /dev/null @@ -1,7 +0,0 @@ -use serde::{Deserialize, Serialize}; -use service::index::IndexStat; - -#[derive(Debug, Serialize, Deserialize)] -pub enum StatPacket { - Leave { result: IndexStat }, -} diff --git a/src/ipc/packet/upgrade.rs b/src/ipc/packet/upgrade.rs deleted file mode 100644 index 0a7ab99..0000000 --- a/src/ipc/packet/upgrade.rs +++ /dev/null @@ -1,6 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize)] -pub enum UpgradePacket { - Leave {}, -} diff --git a/src/ipc/packet/vbase.rs b/src/ipc/packet/vbase.rs deleted file mode 100644 index 4663a9c..0000000 --- a/src/ipc/packet/vbase.rs +++ /dev/null @@ -1,19 +0,0 @@ -use serde::{Deserialize, Serialize}; -use service::prelude::*; - -#[derive(Debug, Serialize, Deserialize)] -pub struct VbaseErrorPacket {} - -#[derive(Debug, Serialize, Deserialize)] -pub enum VbasePacket { - Next {}, - Leave {}, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct VbaseNextPacket { - pub p: Option, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct VbaseLeavePacket {} diff --git a/src/ipc/server/mod.rs b/src/ipc/server/mod.rs deleted file mode 100644 index a77d3c1..0000000 --- a/src/ipc/server/mod.rs +++ /dev/null @@ -1,449 +0,0 @@ -use super::packet::*; -use super::transport::ServerSocket; -use super::ConnectionError; -use service::index::IndexOptions; -use service::index::IndexStat; -use service::index::SearchOptions; -use service::prelude::*; - -pub struct RpcHandler { - socket: ServerSocket, -} - -impl RpcHandler { - pub(super) fn new(socket: ServerSocket) -> Self { - Self { socket } - } - pub fn handle(mut self) -> Result { - Ok(match self.socket.recv::()? { - RpcPacket::Flush { handle } => RpcHandle::Flush { - handle, - x: Flush { - socket: self.socket, - }, - }, - RpcPacket::Drop { handle } => RpcHandle::Drop { - handle, - x: Drop { - socket: self.socket, - }, - }, - RpcPacket::Create { handle, options } => RpcHandle::Create { - handle, - options, - x: Create { - socket: self.socket, - }, - }, - RpcPacket::Insert { - handle, - vector, - pointer, - } => RpcHandle::Insert { - handle, - vector, - pointer, - x: Insert { - socket: self.socket, - }, - }, - RpcPacket::Delete { handle, pointer } => RpcHandle::Delete { - handle, - pointer, - x: Delete { - socket: self.socket, - }, - }, - RpcPacket::Basic { - handle, - vector, - opts, - } => RpcHandle::Basic { - handle, - vector, - opts, - x: Basic { - socket: self.socket, - }, - }, - RpcPacket::Stat { handle } => RpcHandle::Stat { - handle, - x: Stat { - socket: self.socket, - }, - }, - RpcPacket::Vbase { - handle, - vector, - opts, - } => RpcHandle::Vbase { - handle, - vector, - opts, - x: Vbase { - socket: self.socket, - }, - }, - RpcPacket::List { handle } => RpcHandle::List { - handle, - x: List { - socket: self.socket, - }, - }, - RpcPacket::Upgrade {} => RpcHandle::Upgrade { - x: Upgrade { - socket: self.socket, - }, - }, - }) - } -} - -pub enum RpcHandle { - Flush { - handle: Handle, - x: Flush, - }, - Drop { - handle: Handle, - x: Drop, - }, - Create { - handle: Handle, - options: IndexOptions, - x: Create, - }, - Basic { - handle: Handle, - vector: DynamicVector, - opts: SearchOptions, - x: Basic, - }, - Insert { - handle: Handle, - vector: DynamicVector, - pointer: Pointer, - x: Insert, - }, - Delete { - handle: Handle, - pointer: Pointer, - x: Delete, - }, - Stat { - handle: Handle, - x: Stat, - }, - Vbase { - handle: Handle, - vector: DynamicVector, - opts: SearchOptions, - x: Vbase, - }, - List { - handle: Handle, - x: List, - }, - Upgrade { - x: Upgrade, - }, -} - -pub struct Flush { - socket: ServerSocket, -} - -impl Flush { - pub fn leave(mut self) -> Result { - let packet = flush::FlushPacket::Leave {}; - self.socket.ok(packet)?; - Ok(RpcHandler { - socket: self.socket, - }) - } - #[allow(dead_code)] - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct Drop { - socket: ServerSocket, -} - -impl Drop { - pub fn leave(mut self) -> Result { - let packet = drop::DropPacket::Leave {}; - self.socket.ok(packet)?; - Ok(RpcHandler { - socket: self.socket, - }) - } - #[allow(dead_code)] - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct Create { - socket: ServerSocket, -} - -impl Create { - pub fn leave(mut self) -> Result { - let packet = create::CreatePacket::Leave {}; - self.socket.ok(packet)?; - Ok(RpcHandler { - socket: self.socket, - }) - } - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct Insert { - socket: ServerSocket, -} - -impl Insert { - pub fn leave(mut self) -> Result { - let packet = insert::InsertPacket::Leave {}; - self.socket.ok(packet)?; - Ok(RpcHandler { - socket: self.socket, - }) - } - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct Delete { - socket: ServerSocket, -} - -impl Delete { - pub fn leave(mut self) -> Result { - let packet = delete::DeletePacket::Leave {}; - self.socket.ok(packet)?; - Ok(RpcHandler { - socket: self.socket, - }) - } - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct Basic { - socket: ServerSocket, -} - -impl Basic { - pub fn error(mut self) -> Result { - self.socket.ok(basic::BasicErrorPacket {})?; - Ok(BasicHandler { - socket: self.socket, - }) - } - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct BasicHandler { - socket: ServerSocket, -} - -impl BasicHandler { - pub fn handle(mut self) -> Result { - Ok(match self.socket.recv::()? { - basic::BasicPacket::Next {} => BasicHandle::Next { - x: BasicNext { - socket: self.socket, - }, - }, - basic::BasicPacket::Leave {} => { - self.socket.ok(basic::BasicLeavePacket {})?; - BasicHandle::Leave { - x: RpcHandler { - socket: self.socket, - }, - } - } - }) - } -} - -pub enum BasicHandle { - Next { x: BasicNext }, - Leave { x: RpcHandler }, -} - -pub struct BasicNext { - socket: ServerSocket, -} - -impl BasicNext { - pub fn leave(mut self, p: Option) -> Result { - let packet = basic::BasicNextPacket { p }; - self.socket.ok(packet)?; - Ok(BasicHandler { - socket: self.socket, - }) - } -} - -pub struct Stat { - socket: ServerSocket, -} - -impl Stat { - pub fn leave(mut self, result: IndexStat) -> Result { - let packet = stat::StatPacket::Leave { result }; - self.socket.ok(packet)?; - Ok(RpcHandler { - socket: self.socket, - }) - } - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct Vbase { - socket: ServerSocket, -} - -impl Vbase { - pub fn error(mut self) -> Result { - self.socket.ok(vbase::VbaseErrorPacket {})?; - Ok(VbaseHandler { - socket: self.socket, - }) - } - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct VbaseHandler { - socket: ServerSocket, -} - -impl VbaseHandler { - pub fn handle(mut self) -> Result { - Ok(match self.socket.recv::()? { - vbase::VbasePacket::Next {} => VbaseHandle::Next { - x: VbaseNext { - socket: self.socket, - }, - }, - vbase::VbasePacket::Leave {} => { - self.socket.ok(vbase::VbaseLeavePacket {})?; - VbaseHandle::Leave { - x: RpcHandler { - socket: self.socket, - }, - } - } - }) - } -} - -pub enum VbaseHandle { - Next { x: VbaseNext }, - Leave { x: RpcHandler }, -} - -pub struct VbaseNext { - socket: ServerSocket, -} - -impl VbaseNext { - pub fn leave(mut self, p: Option) -> Result { - let packet = vbase::VbaseNextPacket { p }; - self.socket.ok(packet)?; - Ok(VbaseHandler { - socket: self.socket, - }) - } -} - -pub struct List { - socket: ServerSocket, -} - -impl List { - pub fn error(mut self) -> Result { - self.socket.ok(list::ListErrorPacket {})?; - Ok(ListHandler { - socket: self.socket, - }) - } - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} - -pub struct ListHandler { - socket: ServerSocket, -} - -impl ListHandler { - pub fn handle(mut self) -> Result { - Ok(match self.socket.recv::()? { - list::ListPacket::Next {} => ListHandle::Next { - x: ListNext { - socket: self.socket, - }, - }, - list::ListPacket::Leave {} => { - self.socket.ok(list::ListLeavePacket {})?; - ListHandle::Leave { - x: RpcHandler { - socket: self.socket, - }, - } - } - }) - } -} - -pub enum ListHandle { - Next { x: ListNext }, - Leave { x: RpcHandler }, -} - -pub struct ListNext { - socket: ServerSocket, -} - -impl ListNext { - pub fn leave(mut self, p: Option) -> Result { - let packet = list::ListNextPacket { p }; - self.socket.ok(packet)?; - Ok(ListHandler { - socket: self.socket, - }) - } -} - -pub struct Upgrade { - socket: ServerSocket, -} - -impl Upgrade { - pub fn leave(mut self) -> Result { - let packet = upgrade::UpgradePacket::Leave {}; - self.socket.ok(packet)?; - Ok(RpcHandler { - socket: self.socket, - }) - } - #[allow(dead_code)] - pub fn reset(mut self, err: ServiceError) -> Result { - self.socket.err(err) - } -} diff --git a/src/ipc/transport/mmap.rs b/src/ipc/transport/mmap.rs index ea23462..b7b4b10 100644 --- a/src/ipc/transport/mmap.rs +++ b/src/ipc/transport/mmap.rs @@ -1,43 +1,58 @@ use super::ConnectionError; -use crate::utils::file_socket::FileSocket; -use crate::utils::os::{futex_wait, futex_wake, memfd_create, mmap_populate}; use rustix::fd::{AsFd, OwnedFd}; use rustix::fs::FlockOperation; +use send_fd::SendFd; use std::cell::UnsafeCell; use std::io::ErrorKind; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::OnceLock; +use std::time::Duration; const BUFFER_SIZE: usize = 512 * 1024; const SPIN_LIMIT: usize = 8; +const TIMEOUT: Duration = Duration::from_secs(15); -static CHANNEL: OnceLock = OnceLock::new(); +static CHANNEL: OnceLock = OnceLock::new(); pub fn init() { - CHANNEL.set(FileSocket::new().unwrap()).ok().unwrap(); + CHANNEL.set(SendFd::new().unwrap()).ok().unwrap(); } pub fn accept() -> Socket { let memfd = CHANNEL.get().unwrap().recv().unwrap(); rustix::fs::fcntl_lock(&memfd, FlockOperation::NonBlockingLockShared).unwrap(); - let addr = unsafe { mmap_populate(BUFFER_SIZE, &memfd).unwrap() }; + let memmap = unsafe { + memmap2::MmapOptions::new() + .len(BUFFER_SIZE) + .populate() + .map_mut(&memfd) + .unwrap() + }; Socket { is_server: true, - addr: addr as _, + addr: memmap.as_ptr().cast(), memfd, + _memmap: memmap, } } pub fn connect() -> Socket { - let memfd = memfd_create().unwrap(); + let memfd = memfd::memfd_create().unwrap(); rustix::fs::ftruncate(&memfd, BUFFER_SIZE as u64).unwrap(); rustix::fs::fcntl_lock(&memfd, FlockOperation::NonBlockingLockShared).unwrap(); CHANNEL.get().unwrap().send(memfd.as_fd()).unwrap(); - let addr = unsafe { mmap_populate(BUFFER_SIZE, &memfd).unwrap() }; + let memmap = unsafe { + memmap2::MmapOptions::new() + .len(BUFFER_SIZE) + .populate() + .map_mut(&memfd) + .unwrap() + }; Socket { is_server: false, - addr: addr as _, + addr: memmap.as_ptr().cast(), memfd, + _memmap: memmap, } } @@ -45,6 +60,7 @@ pub struct Socket { is_server: bool, addr: *const Channel, memfd: OwnedFd, + _memmap: memmap2::MmapMut, } unsafe impl Send for Socket {} @@ -123,17 +139,13 @@ impl Channel { { break; } - unsafe { - futex_wait(&self.futex, Y); - } + interprocess_atomic_wait::wait(&self.futex, Y, TIMEOUT); } Y => { if !test() { - return Err(ConnectionError::Unexpected); - } - unsafe { - futex_wait(&self.futex, Y); + return Err(ConnectionError::ClosedConnection); } + interprocess_atomic_wait::wait(&self.futex, Y, TIMEOUT); } _ => unsafe { std::hint::unreachable_unchecked() }, } @@ -154,9 +166,7 @@ impl Channel { (*self.bytes.get())[0..data.len()].copy_from_slice(data); } if X == self.futex.swap(T, Ordering::Release) { - unsafe { - futex_wake(&self.futex); - } + interprocess_atomic_wait::wake(&self.futex); } } unsafe fn server_recv(&self, test: impl Fn() -> bool) -> Result, ConnectionError> { @@ -182,17 +192,13 @@ impl Channel { { break; } - unsafe { - futex_wait(&self.futex, Y); - } + interprocess_atomic_wait::wait(&self.futex, Y, TIMEOUT); } Y => { if !test() { - return Err(ConnectionError::Unexpected); - } - unsafe { - futex_wait(&self.futex, Y); + return Err(ConnectionError::ClosedConnection); } + interprocess_atomic_wait::wait(&self.futex, Y, TIMEOUT); } _ => unsafe { std::hint::unreachable_unchecked() }, } @@ -213,9 +219,7 @@ impl Channel { (*self.bytes.get())[0..data.len()].copy_from_slice(data); } if X == self.futex.swap(T, Ordering::Release) { - unsafe { - futex_wake(&self.futex); - } + interprocess_atomic_wait::wake(&self.futex); } } } diff --git a/src/ipc/transport/mod.rs b/src/ipc/transport/mod.rs index d7004a1..3c6178f 100644 --- a/src/ipc/transport/mod.rs +++ b/src/ipc/transport/mod.rs @@ -1,23 +1,21 @@ pub mod mmap; pub mod unix; -use super::{ConnectionError, GraceError}; +use super::ConnectionError; use serde::{Deserialize, Serialize}; -use service::prelude::ServiceError; -use std::fmt::Debug; -pub trait Bincode: Debug { - fn serialize(&self) -> Vec; - fn deserialize(_: &[u8]) -> Self; +pub trait Packet: Sized { + fn serialize(&self) -> Option>; + fn deserialize(_: &[u8]) -> Option; } -impl Deserialize<'a>> Bincode for T { - fn serialize(&self) -> Vec { - bincode::serialize(self).unwrap() +impl Deserialize<'a>> Packet for T { + fn serialize(&self) -> Option> { + bincode::serialize(self).ok() } - fn deserialize(bytes: &[u8]) -> Self { - bincode::deserialize(bytes).unwrap() + fn deserialize(bytes: &[u8]) -> Option { + bincode::deserialize(bytes).ok() } } @@ -32,66 +30,39 @@ pub enum ClientSocket { } impl ServerSocket { - pub fn ok(&mut self, packet: T) -> Result<(), ConnectionError> { - let mut buffer = vec![0u8]; - buffer.extend(packet.serialize()); + pub fn ok(&mut self, packet: T) -> Result<(), ConnectionError> { + let buffer = packet + .serialize() + .ok_or(ConnectionError::BadSerialization)?; match self { Self::Unix(x) => x.send(&buffer), Self::Mmap(x) => x.send(&buffer), } } - pub fn err(&mut self, packet: ServiceError) -> Result { - let mut buffer = vec![1u8]; - buffer.extend(Bincode::serialize(&packet)); - match self { - Self::Unix(x) => x.send(&buffer)?, - Self::Mmap(x) => x.send(&buffer)?, - } - Err(ConnectionError::Service(packet)) - } - pub fn recv(&mut self) -> Result { + pub fn recv(&mut self) -> Result { let buffer = match self { Self::Unix(x) => x.recv()?, Self::Mmap(x) => x.recv()?, }; - let c = &buffer[1..]; - match buffer[0] { - 0u8 => Ok(T::deserialize(c)), - 1u8 => Err(ConnectionError::Grace(bincode::deserialize(c).unwrap())), - _ => unreachable!(), - } + T::deserialize(&buffer).ok_or(ConnectionError::BadDeserialization) } } impl ClientSocket { - pub fn ok(&mut self, packet: T) -> Result<(), ConnectionError> { - let mut buffer = vec![0u8]; - buffer.extend(packet.serialize()); + pub fn ok(&mut self, packet: T) -> Result<(), ConnectionError> { + let buffer = packet + .serialize() + .ok_or(ConnectionError::BadSerialization)?; match self { Self::Unix(x) => x.send(&buffer), Self::Mmap(x) => x.send(&buffer), } } - #[allow(unused)] - pub fn err(&mut self, packet: GraceError) -> Result { - let mut buffer = vec![1u8]; - buffer.extend(Bincode::serialize(&packet)); - match self { - Self::Unix(x) => x.send(&buffer)?, - Self::Mmap(x) => x.send(&buffer)?, - } - Err(ConnectionError::Grace(packet)) - } - pub fn recv(&mut self) -> Result { + pub fn recv(&mut self) -> Result { let buffer = match self { Self::Unix(x) => x.recv()?, Self::Mmap(x) => x.recv()?, }; - let c = &buffer[1..]; - match buffer[0] { - 0u8 => Ok(T::deserialize(c)), - 1u8 => Err(ConnectionError::Service(bincode::deserialize(c).unwrap())), - _ => unreachable!(), - } + T::deserialize(&buffer).ok_or(ConnectionError::BadDeserialization) } } diff --git a/src/ipc/transport/unix.rs b/src/ipc/transport/unix.rs index a15bc2c..7fd9adb 100644 --- a/src/ipc/transport/unix.rs +++ b/src/ipc/transport/unix.rs @@ -1,15 +1,15 @@ use super::ConnectionError; -use crate::utils::file_socket::FileSocket; use byteorder::{ReadBytesExt, WriteBytesExt}; use rustix::fd::AsFd; +use send_fd::SendFd; use std::io::{Read, Write}; use std::os::unix::net::UnixStream; use std::sync::OnceLock; -static CHANNEL: OnceLock = OnceLock::new(); +static CHANNEL: OnceLock = OnceLock::new(); pub fn init() { - CHANNEL.set(FileSocket::new().unwrap()).ok().unwrap(); + CHANNEL.set(SendFd::new().unwrap()).ok().unwrap(); } pub fn accept() -> Socket { @@ -32,7 +32,7 @@ macro_rules! resolve_closed { ($t: expr) => { match $t { Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { - return Err(ConnectionError::Unexpected) + return Err(ConnectionError::ClosedConnection) } Err(e) => panic!("{}", e), Ok(e) => e, diff --git a/src/lib.rs b/src/lib.rs index 56452c9..7ca3d35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,7 @@ pgrx::extension_sql_file!("./sql/finalize.sql", finalize); unsafe extern "C" fn _PG_init() { use crate::prelude::*; if unsafe { pgrx::pg_sys::IsUnderPostmaster } { - SessionError::BadInit.friendly(); + bad_init(); } unsafe { detect::initialize(); diff --git a/src/prelude/error.rs b/src/prelude/error.rs index 325bc6a..e28e05b 100644 --- a/src/prelude/error.rs +++ b/src/prelude/error.rs @@ -1,75 +1,143 @@ -use service::prelude::ServiceError; -use std::fmt::Display; -use thiserror::Error; +use crate::ipc::{ClientRpc, ConnectionError}; +use pgrx::error; +use std::num::NonZeroU16; -pub trait FriendlyError: Display { - fn friendly(&self) -> ! { - panic!("pgvecto.rs: {}", self); - } -} - -impl FriendlyError for ServiceError {} - -pub trait Friendly { - fn friendly(self) -> T; -} - -impl Friendly for Result { - fn friendly(self) -> T { - match self { - Ok(x) => x, - Err(e) => e.friendly(), - } - } -} - -#[must_use] -#[derive(Debug, Error)] -#[rustfmt::skip] -pub enum SessionError { - #[error("\ -pgvecto.rs must be loaded via shared_preload_libraries. +pub fn bad_init() -> ! { + error!("\ +pgvecto.rs: pgvecto.rs must be loaded via shared_preload_libraries. ADVICE: If you encounter this error for your first use of pgvecto.rs, \ please read `https://docs.pgvecto.rs/getting-started/installation.html`. \ You should edit `shared_preload_libraries` in `postgresql.conf` to include `vectors.so`, \ -or simply run the command `psql -U postgres -c 'ALTER SYSTEM SET shared_preload_libraries = \"vectors.so\"'`.\ -")] - BadInit, - #[error("\ -Bad literal. -INFORMATION: hint = {hint}\ -")] - BadLiteral { - hint: String, - }, - #[error("\ -Dimensions type modifier of a vector column is needed for building the index.\ -")] - BadOption1, - #[error("\ -Indexes can only be built on built-in distance functions. -ADVICE: If you want pgvecto.rs to support more distance functions, \ -visit `https://github.com/tensorchord/pgvecto.rs/issues` and contribute your ideas.\ -")] - BadOptions2, - #[error("\ -Modifier of the type is invalid. -ADVICE: Check if modifier of the type is an integer among 1 and 65535.\ -")] - BadTypeDimensions, - #[error("\ -Dimensions of the vector is invalid. -ADVICE: Check if dimensions of the vector are among 1 and 65535.\ -")] - BadValueDimensions, - #[error("\ -Operands of the operator differs in dimensions or scalar type. -INFORMATION: left_dimensions = {left_dimensions}, right_dimensions = {right_dimensions}\ -")] - Unmatched { - left_dimensions: u16, - right_dimensions: u16, +or simply run the command `psql -U postgres -c 'ALTER SYSTEM SET shared_preload_libraries = \"vectors.so\"'`."); +} + +pub fn check_type_dimensions(dimensions: Option) -> NonZeroU16 { + match dimensions { + None => { + error!( + "\ +pgvecto.rs: Modifier of the type is invalid. +ADVICE: Check if modifier of the type is an integer among 1 and 65535." + ) + } + Some(x) => x, } } -impl FriendlyError for SessionError {} +pub fn check_value_dimensions(dimensions: usize) -> NonZeroU16 { + match u16::try_from(dimensions) + .and_then(NonZeroU16::try_from) + .ok() + { + None => { + error!( + "\ +pgvecto.rs: Dimensions of the vector is invalid. +ADVICE: Check if dimensions of the vector are among 1 and 65535." + ) + } + Some(x) => x, + } +} + +pub fn bad_literal(hint: &str) -> ! { + error!( + "\ +pgvecto.rs: Bad literal. +INFORMATION: hint = {hint}" + ); +} + +#[inline(always)] +pub fn check_matched_dimensions(left_dimensions: usize, right_dimensions: usize) -> usize { + if left_dimensions != right_dimensions { + error!( + "\ +pgvecto.rs: Operands of the operator differs in dimensions or scalar type. +INFORMATION: left_dimensions = {left_dimensions}, right_dimensions = {right_dimensions}", + ) + } + left_dimensions +} + +#[inline(always)] +pub fn check_column_dimensions(dimensions: Option) -> NonZeroU16 { + match dimensions { + None => error!( + "\ +pgvecto.rs: Dimensions type modifier of a vector column is needed for building the index.", + ), + Some(x) => x, + } +} + +pub fn bad_opclass() -> ! { + error!( + "\ +pgvecto.rs: Indexes can only be built on built-in distance functions. +ADVICE: If you want pgvecto.rs to support more distance functions, \ +visit `https://github.com/tensorchord/pgvecto.rs/issues` and contribute your ideas." + ); +} + +pub fn bad_service_not_exist() -> ! { + error!( + "\ +pgvecto.rs: The index is not existing in the background worker. +ADVICE: Drop or rebuild the index.\ + " + ); +} + +pub fn check_connection(result: Result) -> T { + match result { + Err(_) => error!( + "\ +pgvecto.rs: Indexes can only be built on built-in distance functions. +ADVICE: If you want pgvecto.rs to support more distance functions, \ +visit `https://github.com/tensorchord/pgvecto.rs/issues` and contribute your ideas." + ), + Ok(x) => x, + } +} + +pub fn check_client(option: Option) -> ClientRpc { + match option { + None => error!( + "\ +pgvecto.rs: The extension is upgraded so all index files are outdated. +ADVICE: Delete all index files. Please read `https://docs.pgvecto.rs/admin/upgrading.html`" + ), + Some(x) => x, + } +} + +pub fn bad_service_upgrade() -> ! { + error!( + "\ +pgvecto.rs: The extension is upgraded so this index is outdated. +ADVICE: Rebuild the index. Please read `https://docs.pgvecto.rs/admin/upgrading.html`." + ) +} + +pub fn bad_service_exists() -> ! { + error!( + "\ +pgvecto.rs: The index is already existing in the background worker." + ) +} + +pub fn bad_service_invalid_index_options(reason: &str) -> ! { + error!( + "\ +pgvecto.rs: The given index option is invalid. +INFORMATION: reason = {reason:?}" + ) +} + +pub fn bad_service_invalid_vector() -> ! { + error!( + "\ +pgvecto.rs: The dimension of a vector does not matched that in a vector index column." + ) +} diff --git a/src/prelude/mod.rs b/src/prelude/mod.rs index 617b2d3..af364e4 100644 --- a/src/prelude/mod.rs +++ b/src/prelude/mod.rs @@ -1,5 +1,5 @@ mod error; mod sys; -pub use error::{Friendly, FriendlyError, SessionError}; +pub use error::*; pub use sys::{FromSys, IntoSys}; diff --git a/src/sql/finalize.sql b/src/sql/finalize.sql index 552af2d..ac79f9b 100644 --- a/src/sql/finalize.sql +++ b/src/sql/finalize.sql @@ -341,10 +341,10 @@ CREATE OPERATOR <=> ( -- List of functions CREATE FUNCTION pgvectors_upgrade() RETURNS void -IMMUTABLE STRICT PARALLEL SAFE LANGUAGE c AS 'MODULE_PATHNAME', '_vectors_pgvectors_upgrade_wrapper'; +STRICT LANGUAGE c AS 'MODULE_PATHNAME', '_vectors_pgvectors_upgrade_wrapper'; -CREATE FUNCTION to_svector("dims" INT, "indices" INT[], "values" real[]) RETURNS svector -IMMUTABLE STRICT PARALLEL SAFE LANGUAGE c AS 'MODULE_PATHNAME', '_vectors_svector_from_array_wrapper'; +CREATE FUNCTION to_svector(dims INT, indices INT[], vals real[]) RETURNS svector +IMMUTABLE STRICT PARALLEL SAFE LANGUAGE c AS 'MODULE_PATHNAME', '_vectors_to_svector_wrapper'; -- List of casts diff --git a/src/utils/mod.rs b/src/utils/mod.rs index bcad5f7..9672461 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,4 +1,2 @@ pub mod cells; -pub mod file_socket; -pub mod os; pub mod parse; diff --git a/src/utils/os.rs b/src/utils/os.rs deleted file mode 100644 index 431efc0..0000000 --- a/src/utils/os.rs +++ /dev/null @@ -1,190 +0,0 @@ -use rustix::fd::{AsFd, OwnedFd}; -use rustix::mm::{MapFlags, ProtFlags}; -use std::sync::atomic::AtomicU32; - -#[cfg(target_os = "linux")] -pub unsafe fn futex_wait(futex: &AtomicU32, value: u32) { - const FUTEX_TIMEOUT: libc::timespec = libc::timespec { - tv_sec: 15, - tv_nsec: 0, - }; - unsafe { - libc::syscall( - libc::SYS_futex, - futex.as_ptr(), - libc::FUTEX_WAIT, - value, - &FUTEX_TIMEOUT, - ); - } -} - -#[cfg(target_os = "linux")] -pub unsafe fn futex_wake(futex: &AtomicU32) { - unsafe { - libc::syscall(libc::SYS_futex, futex.as_ptr(), libc::FUTEX_WAKE, i32::MAX); - } -} - -#[cfg(target_os = "linux")] -pub fn memfd_create() -> std::io::Result { - if detect::linux::detect_memfd() { - use rustix::fs::MemfdFlags; - Ok(rustix::fs::memfd_create( - format!(".memfd.VECTORS.{:x}", std::process::id()), - MemfdFlags::empty(), - )?) - } else { - use rustix::fs::Mode; - use rustix::fs::OFlags; - // POSIX fcntl locking do not support shmem, so we use a regular file here. - // reference: https://man7.org/linux/man-pages/man3/fcntl.3p.html - let name = format!( - ".shm.VECTORS.{:x}.{:x}", - std::process::id(), - rand::random::() - ); - let fd = rustix::fs::open( - &name, - OFlags::RDWR | OFlags::CREATE | OFlags::EXCL, - Mode::RUSR | Mode::WUSR, - )?; - rustix::fs::unlink(&name)?; - Ok(fd) - } -} - -#[cfg(target_os = "linux")] -pub unsafe fn mmap_populate(len: usize, fd: impl AsFd) -> std::io::Result<*mut libc::c_void> { - use std::ptr::null_mut; - unsafe { - Ok(rustix::mm::mmap( - null_mut(), - len, - ProtFlags::READ | ProtFlags::WRITE, - MapFlags::SHARED | MapFlags::POPULATE, - fd, - 0, - )?) - } -} - -#[cfg(target_os = "macos")] -pub unsafe fn futex_wait(futex: &AtomicU32, value: u32) { - const ULOCK_TIMEOUT: u32 = 15_000_000; - unsafe { - ulock_sys::__ulock_wait( - ulock_sys::darwin19::UL_COMPARE_AND_WAIT_SHARED, - futex.as_ptr().cast(), - value as _, - ULOCK_TIMEOUT, - ); - } -} - -#[cfg(target_os = "macos")] -pub unsafe fn futex_wake(futex: &AtomicU32) { - unsafe { - ulock_sys::__ulock_wake( - ulock_sys::darwin19::UL_COMPARE_AND_WAIT_SHARED, - futex.as_ptr().cast(), - 0, - ); - } -} - -#[cfg(target_os = "macos")] -pub fn memfd_create() -> std::io::Result { - use rustix::fs::Mode; - use rustix::fs::OFlags; - // POSIX fcntl locking do not support shmem, so we use a regular file here. - // reference: https://man7.org/linux/man-pages/man3/fcntl.3p.html - let name = format!( - ".shm.VECTORS.{:x}.{:x}", - std::process::id(), - rand::random::() - ); - let fd = rustix::fs::open( - &name, - OFlags::RDWR | OFlags::CREATE | OFlags::EXCL, - Mode::RUSR | Mode::WUSR, - )?; - rustix::fs::unlink(&name)?; - Ok(fd) -} - -#[cfg(target_os = "macos")] -pub unsafe fn mmap_populate(len: usize, fd: impl AsFd) -> std::io::Result<*mut libc::c_void> { - use std::ptr::null_mut; - unsafe { - Ok(rustix::mm::mmap( - null_mut(), - len, - ProtFlags::READ | ProtFlags::WRITE, - MapFlags::SHARED, - fd, - 0, - )?) - } -} - -#[cfg(target_os = "freebsd")] -pub unsafe fn futex_wait(futex: &AtomicU32, value: u32) { - let ptr: *const AtomicU32 = futex; - unsafe { - libc::_umtx_op( - ptr as *mut libc::c_void, - libc::UMTX_OP_WAIT_UINT, - value as libc::c_ulong, - core::ptr::null_mut(), - core::ptr::null_mut(), - ); - }; -} - -#[cfg(target_os = "freebsd")] -pub unsafe fn futex_wake(futex: &AtomicU32) { - let ptr: *const AtomicU32 = futex; - unsafe { - libc::_umtx_op( - ptr as *mut libc::c_void, - libc::UMTX_OP_WAKE, - i32::MAX as libc::c_ulong, - core::ptr::null_mut(), - core::ptr::null_mut(), - ); - }; -} - -#[cfg(target_os = "freebsd")] -pub fn memfd_create() -> std::io::Result { - use rustix::fs::Mode; - use rustix::fs::OFlags; - let name = format!( - ".shm.VECTORS.{:x}.{:x}", - std::process::id(), - rand::random::() - ); - let fd = rustix::fs::open( - &name, - OFlags::RDWR | OFlags::CREATE | OFlags::EXCL, - Mode::RUSR | Mode::WUSR, - )?; - rustix::fs::unlink(&name)?; - Ok(fd) -} - -#[cfg(target_os = "freebsd")] -pub unsafe fn mmap_populate(len: usize, fd: impl AsFd) -> std::io::Result<*mut libc::c_void> { - use std::ptr::null_mut; - unsafe { - Ok(rustix::mm::mmap( - null_mut(), - len, - ProtFlags::READ | ProtFlags::WRITE, - MapFlags::SHARED, - fd, - 0, - )?) - } -} diff --git a/tests/sqllogictest/error.slt b/tests/sqllogictest/error.slt index 8900283..f88cff0 100644 --- a/tests/sqllogictest/error.slt +++ b/tests/sqllogictest/error.slt @@ -7,8 +7,8 @@ CREATE TABLE t (val vector(3)); statement ok CREATE INDEX ON t USING vectors (val vector_l2_ops); -statement error The given vector is invalid for input. +statement error The dimension of a vector does not matched that in a vector index column. INSERT INTO t (val) VALUES ('[0, 1, 2, 3]'); -statement error The given vector is invalid for input. +statement error The dimension of a vector does not matched that in a vector index column. SELECT * FROM t ORDER BY val <-> '[0, 1, 2, 3]'; From f0a0d4d7bc405375b033578fb640f7ae884b65a0 Mon Sep 17 00:00:00 2001 From: Usamoi Date: Mon, 19 Feb 2024 18:12:55 +0800 Subject: [PATCH 2/6] ci: increase concurrency (#362) Signed-off-by: usamoi --- .github/workflows/check.yml | 69 ++++++++++++++++++++++++++--------- .github/workflows/release.yml | 2 +- scripts/ci_install.sh | 3 -- scripts/ci_setup.sh | 15 -------- 4 files changed, 52 insertions(+), 37 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 97f0a6b..7658e79 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -54,8 +54,8 @@ jobs: VERSION: ${{ matrix.version }} OS: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 - - uses: actions/cache/restore@v3 + - uses: actions/checkout@v4 + - uses: actions/cache/restore@v4 with: path: | ~/.cargo/registry/index/ @@ -63,10 +63,57 @@ jobs: ~/.cargo/git/db/ key: cargo-${{ matrix.os }}-pg${{ matrix.version }}-${{ hashFiles('./Cargo.lock') }} restore-keys: cargo-${{ matrix.os }}-pg${{ matrix.version }} - - uses: mozilla-actions/sccache-action@v0.0.3 + - uses: mozilla-actions/sccache-action@v0.0.4 - name: Setup shell: bash - run: ./scripts/ci_setup.sh + run: | + ./scripts/ci_setup.sh + curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash + cargo binstall sqllogictest-bin -y --force + cargo install cargo-pgrx@$(grep 'pgrx = {' Cargo.toml | cut -d '"' -f 2 | head -n 1) --debug + cargo pgrx init --pg$VERSION=$(which pg_config) + - name: Install release + run: ./scripts/ci_install.sh + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Test 2 + run: ./tests/tests.sh + - uses: actions/cache/save@v4 + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + key: cargo-${{ matrix.os }}-pg${{ matrix.version }}-${{ hashFiles('./Cargo.lock') }} + debug_check: + strategy: + matrix: + include: + - { version: 14, os: "ubuntu-latest" } + - { version: 15, os: "ubuntu-latest" } + - { version: 16, os: "ubuntu-latest" } + runs-on: ${{ matrix.os }} + env: + VERSION: ${{ matrix.version }} + OS: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - uses: actions/cache/restore@v4 + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + key: cargo-${{ matrix.os }}-pg${{ matrix.version }}-${{ hashFiles('./Cargo.lock') }} + restore-keys: cargo-${{ matrix.os }}-pg${{ matrix.version }} + - uses: mozilla-actions/sccache-action@v0.0.4 + - name: Setup + shell: bash + run: | + ./scripts/ci_setup.sh + cargo install cargo-pgrx@$(grep 'pgrx = {' Cargo.toml | cut -d '"' -f 2 | head -n 1) --debug + cargo pgrx init --pg$VERSION=$(which pg_config) - name: Format check run: cargo fmt --check - name: Semantic check @@ -80,17 +127,3 @@ jobs: - name: Test run: | cargo test --all --no-fail-fast --no-default-features --features "pg${{ matrix.version }} pg_test" --target x86_64-unknown-linux-gnu -- --nocapture - - name: Install release - run: ./scripts/ci_install.sh - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - name: Test 2 - run: ./tests/tests.sh - - uses: actions/cache/save@v3 - with: - path: | - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - key: cargo-${{ matrix.os }}-pg${{ matrix.version }}-${{ hashFiles('./Cargo.lock') }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c673cdc..e1d7ab9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -55,7 +55,7 @@ jobs: run: | sed -i "s/@CARGO_VERSION@/${{ needs.semver.outputs.version }}/g" ./vectors.control cat ./vectors.control - - uses: actions/cache/restore@v3 + - uses: actions/cache/restore@v4 with: path: | ~/.cargo/registry/index/ diff --git a/scripts/ci_install.sh b/scripts/ci_install.sh index 7eacbd5..ed89363 100755 --- a/scripts/ci_install.sh +++ b/scripts/ci_install.sh @@ -10,6 +10,3 @@ if [ "$OS" == "ubuntu-latest" ]; then sudo systemctl restart postgresql pg_lsclusters fi -if [ "$OS" == "macos-latest" ]; then - brew services restart postgresql@$VERSION -fi diff --git a/scripts/ci_setup.sh b/scripts/ci_setup.sh index e6593be..03d2d31 100755 --- a/scripts/ci_setup.sh +++ b/scripts/ci_setup.sh @@ -25,21 +25,6 @@ if [ "$OS" == "ubuntu-latest" ]; then sudo -iu postgres createuser -s -r runner createdb fi -if [ "$OS" == "macos-latest" ]; then - brew uninstall postgresql - brew install postgresql@$VERSION - export PATH="$PATH:$(brew --prefix postgresql@$VERSION)/bin" - echo "$(brew --prefix postgresql@$VERSION)/bin" >> $GITHUB_PATH - brew services start postgresql@$VERSION - sleep 30 - createdb -fi sudo chmod -R 777 `pg_config --pkglibdir` sudo chmod -R 777 `pg_config --sharedir`/extension - -curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash -cargo binstall sqllogictest-bin -y --force - -cargo install cargo-pgrx@$(grep 'pgrx = {' Cargo.toml | cut -d '"' -f 2 | head -n 1) --debug -cargo pgrx init --pg$VERSION=$(which pg_config) From 10c40c052bdeb304f9fa5d0657ea7b00a6bc733f Mon Sep 17 00:00:00 2001 From: Jinjing Zhou Date: Mon, 19 Feb 2024 19:33:49 +0800 Subject: [PATCH 3/6] fix docker tag (#364) Signed-off-by: Jinjing.Zhou --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 956b34d..fca3e4a 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ docker run \ --name pgvecto-rs-demo \ -e POSTGRES_PASSWORD=mysecretpassword \ -p 5432:5432 \ - -d tensorchord/pgvecto-rs:pg16-v0.1.14-beta + -d tensorchord/pgvecto-rs:pg16-v0.2.0 ``` Then you can connect to the database using the `psql` command line tool. The default username is `postgres`, and the default password is `mysecretpassword`. From 6e6902ec5c6ca89c95d85f11db055706f2fbda49 Mon Sep 17 00:00:00 2001 From: cutecutecat Date: Tue, 20 Feb 2024 11:13:38 +0800 Subject: [PATCH 4/6] fix: python test (#365) Signed-off-by: cutecutecat --- bindings/python/tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/tests/__init__.py b/bindings/python/tests/__init__.py index 0a2e9ac..b4b7391 100644 --- a/bindings/python/tests/__init__.py +++ b/bindings/python/tests/__init__.py @@ -55,7 +55,7 @@ EXPECTED_SQRT_EUCLID_DIS = [14.0, 2030.4756, 3.0] OP_NEG_DOT_PROD_DIS = [1, 2, 4] EXPECTED_NEG_DOT_PROD_DIS = [-17.0, 80.64, -7.0] OP_NEG_COS_DIS = [3, 2, 1] -EXPECTED_NEG_COS_DIS = [-0.7142857, 0.5199225, -0.92582005] +EXPECTED_NEG_COS_DIS = [0.28571427, 1.5199225, 0.07417989] # ==== test_delete ==== LEN_AFT_DEL = 2 From d6e0545b93b2fb99d7df1d59d4bb60fc17eaa1eb Mon Sep 17 00:00:00 2001 From: usamoi Date: Tue, 20 Feb 2024 11:59:47 +0800 Subject: [PATCH 5/6] ci: fix "cache save failed" (#366) Signed-off-by: usamoi --- .github/workflows/check.yml | 23 ++++++++++++++++------- .github/workflows/release.yml | 8 -------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 7658e79..2182167 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -56,17 +56,17 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/cache/restore@v4 + id: cache with: path: | ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ - key: cargo-${{ matrix.os }}-pg${{ matrix.version }}-${{ hashFiles('./Cargo.lock') }} - restore-keys: cargo-${{ matrix.os }}-pg${{ matrix.version }} + key: ${{ github.job }}-${{ matrix.version }}-${{ matrix.os }}-${{ hashFiles('./Cargo.lock') }} - uses: mozilla-actions/sccache-action@v0.0.4 - name: Setup shell: bash - run: | + run: | ./scripts/ci_setup.sh curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash cargo binstall sqllogictest-bin -y --force @@ -77,15 +77,16 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.10" - - name: Test 2 + - name: Test run: ./tests/tests.sh - uses: actions/cache/save@v4 + if: ${{ !steps.cache.outputs.cache-hit }} with: path: | ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ - key: cargo-${{ matrix.os }}-pg${{ matrix.version }}-${{ hashFiles('./Cargo.lock') }} + key: ${{ github.job }}-${{ matrix.version }}-${{ matrix.os }}-${{ hashFiles('./Cargo.lock') }} debug_check: strategy: matrix: @@ -100,13 +101,13 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/cache/restore@v4 + id: cache with: path: | ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ - key: cargo-${{ matrix.os }}-pg${{ matrix.version }}-${{ hashFiles('./Cargo.lock') }} - restore-keys: cargo-${{ matrix.os }}-pg${{ matrix.version }} + key: ${{ github.job }}-${{ matrix.version }}-${{ matrix.os }}-${{ hashFiles('./Cargo.lock') }} - uses: mozilla-actions/sccache-action@v0.0.4 - name: Setup shell: bash @@ -127,3 +128,11 @@ jobs: - name: Test run: | cargo test --all --no-fail-fast --no-default-features --features "pg${{ matrix.version }} pg_test" --target x86_64-unknown-linux-gnu -- --nocapture + - uses: actions/cache/save@v4 + if: ${{ !steps.cache.outputs.cache-hit }} + with: + path: | + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + key: ${{ github.job }}-${{ matrix.version }}-${{ matrix.os }}-${{ hashFiles('./Cargo.lock') }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e1d7ab9..c9cae53 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -55,14 +55,6 @@ jobs: run: | sed -i "s/@CARGO_VERSION@/${{ needs.semver.outputs.version }}/g" ./vectors.control cat ./vectors.control - - uses: actions/cache/restore@v4 - with: - path: | - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - key: cargo-${{ runner.os }}-pg${{ matrix.version }}-${{ hashFiles('./Cargo.lock') }} - restore-keys: cargo-${{ runner.os }}-pg${{ matrix.version }} - uses: mozilla-actions/sccache-action@v0.0.3 - name: Prepare run: | From 42fa5835d1ae77b75f89dd31d4d30fe76a35599b Mon Sep 17 00:00:00 2001 From: usamoi Date: Tue, 20 Feb 2024 15:35:49 +0800 Subject: [PATCH 6/6] refactor: add crate "base" (#367) Signed-off-by: usamoi --- Cargo.lock | 32 +++++++++++++++ Cargo.toml | 1 + crates/base/Cargo.toml | 40 +++++++++++++++++++ .../src/prelude => base/src}/error.rs | 0 crates/base/src/lib.rs | 7 ++++ .../src/prelude => base/src}/scalar/f16.rs | 2 +- .../src/prelude => base/src}/scalar/f32.rs | 2 +- crates/base/src/scalar/mod.rs | 16 ++++++++ .../src/prelude => base/src}/search.rs | 2 +- .../{service/src/prelude => base/src}/sys.rs | 0 crates/base/src/vector/mod.rs | 19 +++++++++ .../scalar => base/src/vector}/sparse_f32.rs | 4 +- crates/service/Cargo.toml | 1 + .../algorithms/clustering/elkan_k_means.rs | 1 + .../src/algorithms/quantization/scalar.rs | 1 + crates/service/src/prelude/global/f16.rs | 1 + crates/service/src/prelude/global/f16_cos.rs | 4 +- crates/service/src/prelude/global/f16_dot.rs | 4 +- crates/service/src/prelude/global/f16_l2.rs | 4 +- crates/service/src/prelude/global/f32_l2.rs | 3 +- crates/service/src/prelude/global/mod.rs | 29 +------------- crates/service/src/prelude/mod.rs | 14 +++---- crates/service/src/prelude/scalar/mod.rs | 7 ---- src/datatype/casts_f32.rs | 1 + src/datatype/operators_svecf32.rs | 1 + src/datatype/operators_vecf16.rs | 1 + src/datatype/operators_vecf32.rs | 1 + 27 files changed, 143 insertions(+), 55 deletions(-) create mode 100644 crates/base/Cargo.toml rename crates/{service/src/prelude => base/src}/error.rs (100%) create mode 100644 crates/base/src/lib.rs rename crates/{service/src/prelude => base/src}/scalar/f16.rs (99%) rename crates/{service/src/prelude => base/src}/scalar/f32.rs (99%) create mode 100644 crates/base/src/scalar/mod.rs rename crates/{service/src/prelude => base/src}/search.rs (90%) rename crates/{service/src/prelude => base/src}/sys.rs (100%) create mode 100644 crates/base/src/vector/mod.rs rename crates/{service/src/prelude/scalar => base/src/vector}/sparse_f32.rs (95%) delete mode 100644 crates/service/src/prelude/scalar/mod.rs diff --git a/Cargo.lock b/Cargo.lock index a927291..609d782 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -143,6 +143,36 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base" +version = "0.0.0" +dependencies = [ + "arc-swap", + "bincode", + "bytemuck", + "byteorder", + "c", + "crc32fast", + "crossbeam", + "dashmap", + "detect", + "half 2.3.1", + "libc", + "log", + "memmap2", + "multiversion", + "num-traits", + "parking_lot", + "rand", + "rayon", + "rustix", + "serde", + "serde_json", + "thiserror", + "uuid", + "validator", +] + [[package]] name = "base64" version = "0.21.7" @@ -1739,6 +1769,7 @@ name = "service" version = "0.0.0" dependencies = [ "arc-swap", + "base", "bincode", "bytemuck", "byteorder", @@ -2197,6 +2228,7 @@ name = "vectors" version = "0.0.0" dependencies = [ "arrayvec", + "base", "bincode", "bytemuck", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 1a932d4..5b7e589 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ serde.workspace = true serde_json.workspace = true thiserror.workspace = true validator.workspace = true +base = { path = "crates/base" } detect = { path = "crates/detect" } send_fd = { path = "crates/send_fd" } service = { path = "crates/service" } diff --git a/crates/base/Cargo.toml b/crates/base/Cargo.toml new file mode 100644 index 0000000..79c2f8d --- /dev/null +++ b/crates/base/Cargo.toml @@ -0,0 +1,40 @@ +[package] +name = "base" +version.workspace = true +edition.workspace = true + +[dependencies] +bincode.workspace = true +bytemuck.workspace = true +byteorder.workspace = true +half.workspace = true +libc.workspace = true +log.workspace = true +memmap2.workspace = true +num-traits.workspace = true +rand.workspace = true +rustix.workspace = true +serde.workspace = true +serde_json.workspace = true +thiserror.workspace = true +uuid.workspace = true +validator.workspace = true +c = { path = "../c" } +detect = { path = "../detect" } +crc32fast = "1.4.0" +crossbeam = "0.8.4" +dashmap = "5.5.3" +parking_lot = "0.12.1" +rayon = "1.8.1" +arc-swap = "1.6.0" +multiversion = "0.7.3" + +[lints] +clippy.derivable_impls = "allow" +clippy.len_without_is_empty = "allow" +clippy.needless_range_loop = "allow" +clippy.too_many_arguments = "allow" +rust.internal_features = "allow" +rust.unsafe_op_in_unsafe_fn = "forbid" +rust.unused_lifetimes = "warn" +rust.unused_qualifications = "warn" diff --git a/crates/service/src/prelude/error.rs b/crates/base/src/error.rs similarity index 100% rename from crates/service/src/prelude/error.rs rename to crates/base/src/error.rs diff --git a/crates/base/src/lib.rs b/crates/base/src/lib.rs new file mode 100644 index 0000000..653153e --- /dev/null +++ b/crates/base/src/lib.rs @@ -0,0 +1,7 @@ +#![feature(core_intrinsics)] + +pub mod error; +pub mod scalar; +pub mod search; +pub mod sys; +pub mod vector; diff --git a/crates/service/src/prelude/scalar/f16.rs b/crates/base/src/scalar/f16.rs similarity index 99% rename from crates/service/src/prelude/scalar/f16.rs rename to crates/base/src/scalar/f16.rs index 467542f..da5735b 100644 --- a/crates/service/src/prelude/scalar/f16.rs +++ b/crates/base/src/scalar/f16.rs @@ -1,4 +1,4 @@ -use crate::prelude::global::FloatCast; +use super::FloatCast; use half::f16; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; diff --git a/crates/service/src/prelude/scalar/f32.rs b/crates/base/src/scalar/f32.rs similarity index 99% rename from crates/service/src/prelude/scalar/f32.rs rename to crates/base/src/scalar/f32.rs index a4e70a1..c6e431b 100644 --- a/crates/service/src/prelude/scalar/f32.rs +++ b/crates/base/src/scalar/f32.rs @@ -1,4 +1,4 @@ -use crate::prelude::global::FloatCast; +use super::FloatCast; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; use std::fmt::{Debug, Display}; diff --git a/crates/base/src/scalar/mod.rs b/crates/base/src/scalar/mod.rs new file mode 100644 index 0000000..8e30d33 --- /dev/null +++ b/crates/base/src/scalar/mod.rs @@ -0,0 +1,16 @@ +mod f16; +mod f32; + +pub use f16::F16; +pub use f32::F32; + +pub trait FloatCast: Sized { + fn from_f32(x: f32) -> Self; + fn to_f32(self) -> f32; + fn from_f(x: F32) -> Self { + Self::from_f32(x.0) + } + fn to_f(self) -> F32 { + F32(Self::to_f32(self)) + } +} diff --git a/crates/service/src/prelude/search.rs b/crates/base/src/search.rs similarity index 90% rename from crates/service/src/prelude/search.rs rename to crates/base/src/search.rs index 2009730..c5e946b 100644 --- a/crates/service/src/prelude/search.rs +++ b/crates/base/src/search.rs @@ -1,4 +1,4 @@ -use crate::prelude::F32; +use crate::scalar::F32; pub type Payload = u64; diff --git a/crates/service/src/prelude/sys.rs b/crates/base/src/sys.rs similarity index 100% rename from crates/service/src/prelude/sys.rs rename to crates/base/src/sys.rs diff --git a/crates/base/src/vector/mod.rs b/crates/base/src/vector/mod.rs new file mode 100644 index 0000000..8f61177 --- /dev/null +++ b/crates/base/src/vector/mod.rs @@ -0,0 +1,19 @@ +mod sparse_f32; + +pub use sparse_f32::{SparseF32, SparseF32Ref}; + +pub trait Vector { + fn dims(&self) -> u16; +} + +impl Vector for Vec { + fn dims(&self) -> u16 { + self.len().try_into().unwrap() + } +} + +impl<'a, T> Vector for &'a [T] { + fn dims(&self) -> u16 { + self.len().try_into().unwrap() + } +} diff --git a/crates/service/src/prelude/scalar/sparse_f32.rs b/crates/base/src/vector/sparse_f32.rs similarity index 95% rename from crates/service/src/prelude/scalar/sparse_f32.rs rename to crates/base/src/vector/sparse_f32.rs index d8d28a2..d529032 100644 --- a/crates/service/src/prelude/scalar/sparse_f32.rs +++ b/crates/base/src/vector/sparse_f32.rs @@ -1,4 +1,6 @@ -use crate::prelude::*; +use super::Vector; +use crate::scalar::F32; +use num_traits::Zero; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/service/Cargo.toml b/crates/service/Cargo.toml index 9f71438..8cd06be 100644 --- a/crates/service/Cargo.toml +++ b/crates/service/Cargo.toml @@ -19,6 +19,7 @@ serde_json.workspace = true thiserror.workspace = true uuid.workspace = true validator.workspace = true +base = { path = "../base" } c = { path = "../c" } detect = { path = "../detect" } crc32fast = "1.4.0" diff --git a/crates/service/src/algorithms/clustering/elkan_k_means.rs b/crates/service/src/algorithms/clustering/elkan_k_means.rs index fa3ad4a..e746568 100644 --- a/crates/service/src/algorithms/clustering/elkan_k_means.rs +++ b/crates/service/src/algorithms/clustering/elkan_k_means.rs @@ -1,5 +1,6 @@ use crate::prelude::*; use crate::utils::vec2::Vec2; +use base::scalar::FloatCast; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; use std::ops::{Index, IndexMut}; diff --git a/crates/service/src/algorithms/quantization/scalar.rs b/crates/service/src/algorithms/quantization/scalar.rs index f613df8..17467b4 100644 --- a/crates/service/src/algorithms/quantization/scalar.rs +++ b/crates/service/src/algorithms/quantization/scalar.rs @@ -5,6 +5,7 @@ use crate::index::IndexOptions; use crate::prelude::*; use crate::utils::dir_ops::sync_dir; use crate::utils::mmap_array::MmapArray; +use base::scalar::FloatCast; use serde::{Deserialize, Serialize}; use std::path::Path; use std::sync::Arc; diff --git a/crates/service/src/prelude/global/f16.rs b/crates/service/src/prelude/global/f16.rs index d877b5a..be5c560 100644 --- a/crates/service/src/prelude/global/f16.rs +++ b/crates/service/src/prelude/global/f16.rs @@ -1,4 +1,5 @@ use crate::prelude::*; +use base::scalar::FloatCast; pub fn cosine(lhs: &[F16], rhs: &[F16]) -> F32 { #[inline(always)] diff --git a/crates/service/src/prelude/global/f16_cos.rs b/crates/service/src/prelude/global/f16_cos.rs index 3e9a350..a8e7f33 100644 --- a/crates/service/src/prelude/global/f16_cos.rs +++ b/crates/service/src/prelude/global/f16_cos.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; - use crate::prelude::*; +use base::scalar::FloatCast; +use std::borrow::Cow; #[derive(Debug, Clone, Copy)] pub enum F16Cos {} diff --git a/crates/service/src/prelude/global/f16_dot.rs b/crates/service/src/prelude/global/f16_dot.rs index 4f5d2d7..353efba 100644 --- a/crates/service/src/prelude/global/f16_dot.rs +++ b/crates/service/src/prelude/global/f16_dot.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; - use crate::prelude::*; +use base::scalar::FloatCast; +use std::borrow::Cow; #[derive(Debug, Clone, Copy)] pub enum F16Dot {} diff --git a/crates/service/src/prelude/global/f16_l2.rs b/crates/service/src/prelude/global/f16_l2.rs index 5313554..46d9568 100644 --- a/crates/service/src/prelude/global/f16_l2.rs +++ b/crates/service/src/prelude/global/f16_l2.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; - use crate::prelude::*; +use base::scalar::FloatCast; +use std::borrow::Cow; #[derive(Debug, Clone, Copy)] pub enum F16L2 {} diff --git a/crates/service/src/prelude/global/f32_l2.rs b/crates/service/src/prelude/global/f32_l2.rs index fd9ea6f..9836f4b 100644 --- a/crates/service/src/prelude/global/f32_l2.rs +++ b/crates/service/src/prelude/global/f32_l2.rs @@ -1,6 +1,5 @@ -use std::borrow::Cow; - use crate::prelude::*; +use std::borrow::Cow; #[derive(Debug, Clone, Copy)] pub enum F32L2 {} diff --git a/crates/service/src/prelude/global/mod.rs b/crates/service/src/prelude/global/mod.rs index aa0cbf2..802b09a 100644 --- a/crates/service/src/prelude/global/mod.rs +++ b/crates/service/src/prelude/global/mod.rs @@ -43,7 +43,7 @@ pub trait G: Copy + Debug + 'static { + Zero + num_traits::NumOps + num_traits::NumAssignOps - + FloatCast; + + base::scalar::FloatCast; type Storage: for<'a> Storage = Self::VectorRef<'a>>; type L2: for<'a> G = &'a [Self::Scalar]>; type VectorOwned: Vector + Clone + Serialize + for<'a> Deserialize<'a>; @@ -103,33 +103,6 @@ pub trait G: Copy + Debug + 'static { ) -> F32; } -pub trait FloatCast: Sized { - fn from_f32(x: f32) -> Self; - fn to_f32(self) -> f32; - fn from_f(x: F32) -> Self { - Self::from_f32(x.0) - } - fn to_f(self) -> F32 { - F32(Self::to_f32(self)) - } -} - -pub trait Vector { - fn dims(&self) -> u16; -} - -impl Vector for Vec { - fn dims(&self) -> u16 { - self.len().try_into().unwrap() - } -} - -impl<'a, T> Vector for &'a [T] { - fn dims(&self) -> u16 { - self.len().try_into().unwrap() - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub enum DynamicVector { F32(Vec), diff --git a/crates/service/src/prelude/mod.rs b/crates/service/src/prelude/mod.rs index 67c1b16..a559acb 100644 --- a/crates/service/src/prelude/mod.rs +++ b/crates/service/src/prelude/mod.rs @@ -1,15 +1,13 @@ -mod error; mod global; -mod scalar; -mod search; mod storage; -mod sys; -pub use self::error::*; pub use self::global::*; -pub use self::scalar::{SparseF32, SparseF32Ref, F16, F32}; -pub use self::search::{Element, Filter, Payload}; pub use self::storage::{DenseMmap, SparseMmap, Storage}; -pub use self::sys::{Handle, Pointer}; + +pub use base::error::*; +pub use base::scalar::{F16, F32}; +pub use base::search::{Element, Filter, Payload}; +pub use base::sys::{Handle, Pointer}; +pub use base::vector::{SparseF32, SparseF32Ref, Vector}; pub use num_traits::{Float, Zero}; diff --git a/crates/service/src/prelude/scalar/mod.rs b/crates/service/src/prelude/scalar/mod.rs deleted file mode 100644 index 1be763f..0000000 --- a/crates/service/src/prelude/scalar/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod f16; -mod f32; -mod sparse_f32; - -pub use f16::F16; -pub use f32::F32; -pub use sparse_f32::{SparseF32, SparseF32Ref}; diff --git a/src/datatype/casts_f32.rs b/src/datatype/casts_f32.rs index d28f7d3..123434c 100644 --- a/src/datatype/casts_f32.rs +++ b/src/datatype/casts_f32.rs @@ -2,6 +2,7 @@ use crate::datatype::svecf32::{SVecf32, SVecf32Input, SVecf32Output}; use crate::datatype::vecf16::{Vecf16, Vecf16Input, Vecf16Output}; use crate::datatype::vecf32::{Vecf32, Vecf32Input, Vecf32Output}; use crate::prelude::check_value_dimensions; +use base::scalar::FloatCast; use service::prelude::*; #[pgrx::pg_extern(immutable, parallel_safe, strict)] diff --git a/src/datatype/operators_svecf32.rs b/src/datatype/operators_svecf32.rs index 88fa790..03f3616 100644 --- a/src/datatype/operators_svecf32.rs +++ b/src/datatype/operators_svecf32.rs @@ -1,5 +1,6 @@ use crate::datatype::svecf32::{SVecf32, SVecf32Input, SVecf32Output}; use crate::prelude::*; +use base::scalar::FloatCast; use service::prelude::*; use std::ops::Deref; diff --git a/src/datatype/operators_vecf16.rs b/src/datatype/operators_vecf16.rs index c817f8e..191c0e3 100644 --- a/src/datatype/operators_vecf16.rs +++ b/src/datatype/operators_vecf16.rs @@ -1,5 +1,6 @@ use crate::datatype::vecf16::{Vecf16, Vecf16Input, Vecf16Output}; use crate::prelude::*; +use base::scalar::FloatCast; use service::prelude::*; use std::ops::Deref; diff --git a/src/datatype/operators_vecf32.rs b/src/datatype/operators_vecf32.rs index 31c6360..50649f8 100644 --- a/src/datatype/operators_vecf32.rs +++ b/src/datatype/operators_vecf32.rs @@ -1,5 +1,6 @@ use crate::datatype::vecf32::{Vecf32, Vecf32Input, Vecf32Output}; use crate::prelude::*; +use base::scalar::FloatCast; use service::prelude::*; use std::ops::Deref;