1
0
mirror of https://github.com/tensorchord/pgvecto.rs.git synced 2025-07-29 08:21:12 +03:00

feat: Add high-level API for Python (#123)

* feat: init high level api

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: pretify things

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: add test && filter subpackage

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* fix: dependency

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* test: fix Action

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: add isort for format

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* fix: create extension with init client

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* docs: add readme

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* chore: bump version

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: rename things

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: delete embedder

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: simplify filter

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: config ruff

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: clean up client.py

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: modify PGVectoRs interfaces

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* chore: add docs

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: delete text column

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* rename things

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* Revert "feat: delete text column"

This reverts commit df5452b9ad.

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* feat: rename insert

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* chore: delete __all__ for filters.py

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* chore: update things

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* chore: update lint config

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* pretify things

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* pdm lock -G :all -S direct_minimal_versions

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* replace relative import

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* change Record.from_text

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* make lint happ

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

* fix Record.from_text

Signed-off-by: 盐粒 Yanli <mail@yanli.one>

---------

Signed-off-by: 盐粒 Yanli <mail@yanli.one>
This commit is contained in:
盐粒 Yanli
2023-11-16 20:52:15 +08:00
committed by GitHub
parent 9ce6c3b4cb
commit f8344dd039
12 changed files with 844 additions and 174 deletions

View File

@ -0,0 +1,74 @@
import os
from openai import OpenAI
from pgvecto_rs.sdk import PGVectoRs, Record, filters
URL = "postgresql+psycopg://{username}:{password}@{host}:{port}/{db_name}".format(
port=os.getenv("DB_PORT", 5432),
host=os.getenv("DB_HOST", "localhost"),
username=os.getenv("DB_USER", "postgres"),
password=os.getenv("DB_PASS", "mysecretpassword"),
db_name=os.getenv("DB_NAME", "postgres"),
)
embedding = OpenAI().embeddings
def embed(text: str):
return (
embedding.create(input=text, model="text-embedding-ada-002").data[0].embedding
)
texts = [
"Hello world",
"Hello PostgreSQL",
"Hello pgvecto.rs!",
]
records1 = [Record.from_text(text, embed(text), {"src": "one"}) for text in texts]
records2 = [Record.from_text(text, embed(text), {"src": "two"}) for text in texts]
target = embed("Hello vector database!")
# Create an empty client
client = PGVectoRs(
db_url=URL,
collection_name="example",
dimension=1536,
)
try:
# Add some records
client.insert(records1)
client.insert(records2)
# Query (With a filter from the filters module)
print("#################### First Query ####################")
for record, dis in client.search(
target, filter=filters.meta_contains({"src": "one"})
):
print(f"DISTANCE SCORE: {dis}")
print(record)
# Another Query (Equivalent to the first one, but with a lambda filter written by hand)
print("#################### Second Query ####################")
for record, dis in client.search(
target, filter=lambda r: r.meta.contains({"src": "one"})
):
print(f"DISTANCE SCORE: {dis}")
print(record)
# Yet Another Query (With a more complex filter)
print("#################### Third Query ####################")
def complex_filter(r: filters.FilterInput) -> filters.FilterOutput:
t1 = r.text.endswith("!") == False # noqa: E712
t2 = r.meta.contains({"src": "two"})
t = t1 & t2
return t
for record, dis in client.search(target, filter=complex_filter):
print(f"DISTANCE SCORE: {dis}")
print(record)
finally:
# Clean up
client.drop()