From 0ffcb3d4aa895c1a3bfa8f9a14338e901b3de161 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Mon, 24 Mar 2025 19:51:48 +0000 Subject: [PATCH] Vectors: Got basic LLM querying working using vector search context --- app/Search/SearchController.php | 16 +++++++++ app/Search/Vectors/EntityVectorGenerator.php | 2 +- .../Services/OpenAiVectorQueryService.php | 21 ++++++++++++ .../Vectors/Services/VectorQueryService.php | 9 +++++ app/Search/Vectors/VectorSearchRunner.php | 33 +++++++++++++++++++ ..._24_155748_create_search_vectors_table.php | 5 ++- resources/views/search/query.blade.php | 29 ++++++++++++++++ routes/web.php | 1 + 8 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 app/Search/Vectors/VectorSearchRunner.php create mode 100644 resources/views/search/query.blade.php diff --git a/app/Search/SearchController.php b/app/Search/SearchController.php index 2fce6a3d5..a688385e7 100644 --- a/app/Search/SearchController.php +++ b/app/Search/SearchController.php @@ -6,6 +6,7 @@ use BookStack\Entities\Queries\PageQueries; use BookStack\Entities\Queries\QueryPopular; use BookStack\Entities\Tools\SiblingFetcher; use BookStack\Http\Controller; +use BookStack\Search\Vectors\VectorSearchRunner; use Illuminate\Http\Request; class SearchController extends Controller @@ -139,4 +140,19 @@ class SearchController extends Controller return view('entities.list-basic', ['entities' => $entities, 'style' => 'compact']); } + + public function searchQuery(Request $request, VectorSearchRunner $runner) + { + $query = $request->get('query', ''); + + if ($query) { + $results = $runner->run($query); + } else { + $results = null; + } + + return view('search.query', [ + 'results' => $results, + ]); + } } diff --git a/app/Search/Vectors/EntityVectorGenerator.php b/app/Search/Vectors/EntityVectorGenerator.php index 8a4918773..9563694a3 100644 --- a/app/Search/Vectors/EntityVectorGenerator.php +++ b/app/Search/Vectors/EntityVectorGenerator.php @@ -42,7 +42,7 @@ class EntityVectorGenerator $toInsert[] = [ 'entity_id' => $entity->id, 'entity_type' => $entity->getMorphClass(), - 'embedding' => DB::raw('STRING_TO_VECTOR("[' . implode(',', $embedding) . ']")'), + 'embedding' => DB::raw('VEC_FROMTEXT("[' . implode(',', $embedding) . ']")'), 'text' => $text, ]; } diff --git a/app/Search/Vectors/Services/OpenAiVectorQueryService.php b/app/Search/Vectors/Services/OpenAiVectorQueryService.php index 8d2910998..e0e145f3a 100644 --- a/app/Search/Vectors/Services/OpenAiVectorQueryService.php +++ b/app/Search/Vectors/Services/OpenAiVectorQueryService.php @@ -33,4 +33,25 @@ class OpenAiVectorQueryService implements VectorQueryService return $response['data'][0]['embedding']; } + + public function query(string $input, array $context): string + { + $formattedContext = implode("\n", $context); + + $response = $this->jsonRequest('POST', 'v1/chat/completions', [ + 'model' => 'gpt-4o', + 'messages' => [ + [ + 'role' => 'developer', + 'content' => 'You are a helpful assistant providing search query responses. Be specific, factual and to-the-point in response.' + ], + [ + 'role' => 'user', + 'content' => "Provide a response to the below given QUERY using the below given CONTEXT\nQUERY: {$input}\n\nCONTEXT: {$formattedContext}", + ] + ], + ]); + + return $response['choices'][0]['message']['content'] ?? ''; + } } diff --git a/app/Search/Vectors/Services/VectorQueryService.php b/app/Search/Vectors/Services/VectorQueryService.php index 2cc4ed017..746f95f5b 100644 --- a/app/Search/Vectors/Services/VectorQueryService.php +++ b/app/Search/Vectors/Services/VectorQueryService.php @@ -9,4 +9,13 @@ interface VectorQueryService * @return float[] */ public function generateEmbeddings(string $text): array; + + /** + * Query the LLM service using the given user input, and + * relevant context text retrieved locally via a vector search. + * Returns the response output text from the LLM. + * + * @param string[] $context + */ + public function query(string $input, array $context): string; } diff --git a/app/Search/Vectors/VectorSearchRunner.php b/app/Search/Vectors/VectorSearchRunner.php new file mode 100644 index 000000000..db28779e4 --- /dev/null +++ b/app/Search/Vectors/VectorSearchRunner.php @@ -0,0 +1,33 @@ +vectorQueryServiceProvider->get(); + $queryVector = $queryService->generateEmbeddings($query); + + // TODO - Apply permissions + // TODO - Join models + $topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id') + ->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance') + ->orderBy('distance', 'asc') + ->limit(10) + ->get(); + + $matchesText = array_values(array_map(fn (SearchVector $match) => $match->text, $topMatches->all())); + $llmResult = $queryService->query($query, $matchesText); + + return [ + 'llm_result' => $llmResult, + 'entity_matches' => $topMatches->toArray() + ]; + } +} diff --git a/database/migrations/2025_03_24_155748_create_search_vectors_table.php b/database/migrations/2025_03_24_155748_create_search_vectors_table.php index d7fb0118a..1b552b22c 100644 --- a/database/migrations/2025_03_24_155748_create_search_vectors_table.php +++ b/database/migrations/2025_03_24_155748_create_search_vectors_table.php @@ -16,10 +16,13 @@ return new class extends Migration $table->string('entity_type', 100); $table->integer('entity_id'); $table->text('text'); - $table->vector('embedding'); $table->index(['entity_type', 'entity_id']); }); + + $table = DB::getTablePrefix() . 'search_vectors'; + DB::statement("ALTER TABLE {$table} ADD COLUMN (embedding VECTOR(1536) NOT NULL)"); + DB::statement("ALTER TABLE {$table} ADD VECTOR INDEX (embedding) DISTANCE=cosine"); } /** diff --git a/resources/views/search/query.blade.php b/resources/views/search/query.blade.php new file mode 100644 index 000000000..e8b4c8477 --- /dev/null +++ b/resources/views/search/query.blade.php @@ -0,0 +1,29 @@ +@extends('layouts.simple') + +@section('body') +
+ +
+ + +
+ + @if($results) +

Results

+ +

LLM Output

+

{{ $results['llm_result'] }}

+ +

Entity Matches

+ @foreach($results['entity_matches'] as $match) +
+
{{ $match['entity_type'] }}:{{ $match['entity_id'] }}; Distance: {{ $match['distance'] }}
+
+ match text +
{{ $match['text'] }}
+
+
+ @endforeach + @endif +
+@stop diff --git a/routes/web.php b/routes/web.php index 818472583..15fe6d69b 100644 --- a/routes/web.php +++ b/routes/web.php @@ -187,6 +187,7 @@ Route::middleware('auth')->group(function () { // Search Route::get('/search', [SearchController::class, 'search']); + Route::get('/search/query', [SearchController::class, 'searchQuery']); Route::get('/search/book/{bookId}', [SearchController::class, 'searchBook']); Route::get('/search/chapter/{bookId}', [SearchController::class, 'searchChapter']); Route::get('/search/entity/siblings', [SearchController::class, 'searchSiblings']);