1
0
mirror of https://github.com/BookStackApp/BookStack.git synced 2025-10-20 20:12:39 +03:00

Vectors: Split out vector search and llm query runs

Added a formal object type to carry across vector search results.
Added permission application and entity combining with vector search
results.
Also updated namespace from vectors to queries.
This commit is contained in:
Dan Brown
2025-08-21 12:14:52 +01:00
parent 2c3100e401
commit 88ccd9e5b9
18 changed files with 155 additions and 78 deletions

View File

@@ -4,8 +4,8 @@ namespace BookStack\Console\Commands;
use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Search\Vectors\SearchVector;
use BookStack\Search\Vectors\StoreEntityVectorsJob;
use BookStack\Search\Queries\SearchVector;
use BookStack\Search\Queries\StoreEntityVectorsJob;
use Illuminate\Console\Command;
class RegenerateVectorsCommand extends Command

View File

@@ -1,10 +1,12 @@
<?php
namespace BookStack\Search\Vectors;
declare(strict_types=1);
namespace BookStack\Search\Queries;
use BookStack\Activity\Models\Tag;
use BookStack\Entities\Models\Entity;
use BookStack\Search\Vectors\Services\VectorQueryService;
use BookStack\Search\Queries\Services\VectorQueryService;
use Illuminate\Support\Facades\DB;
class EntityVectorGenerator

View File

@@ -0,0 +1,26 @@
<?php
namespace BookStack\Search\Queries;
use Exception;
class LlmQueryRunner
{
public function __construct(
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
) {
}
/**
* Run a query against the configured LLM to produce a text response.
* @param VectorSearchResult[] $vectorResults
* @throws Exception
*/
public function run(string $query, array $vectorResults): string
{
$queryService = $this->vectorQueryServiceProvider->get();
$matchesText = array_values(array_map(fn (VectorSearchResult $result) => $result->matchText, $vectorResults));
return $queryService->query($query, $matchesText);
}
}

View File

@@ -1,9 +1,10 @@
<?php
namespace BookStack\Search;
namespace BookStack\Search\Queries;
use BookStack\Http\Controller;
use BookStack\Search\Vectors\VectorSearchRunner;
use BookStack\Search\SearchOptions;
use BookStack\Search\SearchRunner;
use Illuminate\Http\Request;
class QueryController extends Controller
@@ -35,19 +36,13 @@ class QueryController extends Controller
/**
* Perform a vector/LLM-based query search.
*/
public function run(Request $request, VectorSearchRunner $runner)
public function run(Request $request, VectorSearchRunner $searchRunner, LlmQueryRunner $llmRunner)
{
// TODO - Validate if query system is active
$query = $request->get('query', '');
if ($query) {
$results = $runner->run($query);
} else {
$results = null;
}
return view('search.query', [
'results' => $results,
]);
$results = $query ? $searchRunner->run($query) : [];
$llmResult = $llmRunner->run($query, $results);
dd($results, $llmResult);
}
}

View File

@@ -0,0 +1,26 @@
<?php
declare(strict_types=1);
namespace BookStack\Search\Queries;
use BookStack\Permissions\Models\JointPermission;
use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\HasMany;
/**
* @property string $entity_type
* @property int $entity_id
* @property string $text
* @property string $embedding
*/
class SearchVector extends Model
{
public $timestamps = false;
public function jointPermissions(): HasMany
{
return $this->hasMany(JointPermission::class, 'entity_id', 'entity_id')
->whereColumn('search_vectors.entity_type', '=', 'joint_permissions.entity_type');
}
}

View File

@@ -1,6 +1,6 @@
<?php
namespace BookStack\Search\Vectors\Services;
namespace BookStack\Search\Queries\Services;
use BookStack\Http\HttpRequestService;

View File

@@ -1,6 +1,6 @@
<?php
namespace BookStack\Search\Vectors\Services;
namespace BookStack\Search\Queries\Services;
interface VectorQueryService
{

View File

@@ -1,6 +1,8 @@
<?php
namespace BookStack\Search\Vectors;
declare(strict_types=1);
namespace BookStack\Search\Queries;
use BookStack\Entities\Models\Entity;
use Illuminate\Contracts\Queue\ShouldQueue;

View File

@@ -1,6 +1,8 @@
<?php
namespace BookStack\Search\Vectors;
declare(strict_types=1);
namespace BookStack\Search\Queries;
use InvalidArgumentException;

View File

@@ -1,10 +1,12 @@
<?php
namespace BookStack\Search\Vectors;
declare(strict_types=1);
namespace BookStack\Search\Queries;
use BookStack\Http\HttpRequestService;
use BookStack\Search\Vectors\Services\OpenAiVectorQueryService;
use BookStack\Search\Vectors\Services\VectorQueryService;
use BookStack\Search\Queries\Services\OpenAiVectorQueryService;
use BookStack\Search\Queries\Services\VectorQueryService;
class VectorQueryServiceProvider
{

View File

@@ -0,0 +1,17 @@
<?php
declare(strict_types=1);
namespace BookStack\Search\Queries;
use BookStack\Entities\Models\Entity;
readonly class VectorSearchResult
{
public function __construct(
public Entity $entity,
public float $distance,
public string $matchText
) {
}
}

View File

@@ -0,0 +1,54 @@
<?php
namespace BookStack\Search\Queries;
use BookStack\Entities\Tools\MixedEntityListLoader;
use BookStack\Permissions\PermissionApplicator;
use Exception;
class VectorSearchRunner
{
public function __construct(
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
protected PermissionApplicator $permissions,
protected MixedEntityListLoader $entityLoader,
) {
}
/**
* Run a vector search query to find results across entities.
* @return VectorSearchResult[]
* @throws Exception
*/
public function run(string $query): array
{
$queryService = $this->vectorQueryServiceProvider->get();
$queryVector = $queryService->generateEmbeddings($query);
// TODO - Test permissions applied
$topMatchesQuery = SearchVector::query()->select('text', 'entity_type', 'entity_id')
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
->orderBy('distance', 'asc')
->having('distance', '<', 0.6)
->limit(10);
$query = $this->permissions->restrictEntityRelationQuery($topMatchesQuery, 'search_vectors', 'entity_id', 'entity_type');
$topMatches = $query->get();
$this->entityLoader->loadIntoRelations($topMatches->all(), 'entity', true);
$results = [];
foreach ($topMatches as $match) {
if ($match->relationLoaded('entity')) {
$results[] = new VectorSearchResult(
$match->getRelation('entity'),
$match->getAttribute('distance'),
$match->getAttribute('text'),
);
}
}
return $results;
}
}

View File

@@ -6,7 +6,7 @@ use BookStack\Entities\Queries\PageQueries;
use BookStack\Entities\Queries\QueryPopular;
use BookStack\Entities\Tools\SiblingFetcher;
use BookStack\Http\Controller;
use BookStack\Search\Vectors\VectorSearchRunner;
use BookStack\Search\Queries\VectorSearchRunner;
use Illuminate\Http\Request;
class SearchController extends Controller

View File

@@ -6,8 +6,8 @@ use BookStack\Activity\Models\Tag;
use BookStack\Entities\EntityProvider;
use BookStack\Entities\Models\Entity;
use BookStack\Entities\Models\Page;
use BookStack\Search\Vectors\StoreEntityVectorsJob;
use BookStack\Search\Vectors\VectorQueryServiceProvider;
use BookStack\Search\Queries\StoreEntityVectorsJob;
use BookStack\Search\Queries\VectorQueryServiceProvider;
use BookStack\Util\HtmlDocument;
use DOMNode;
use Illuminate\Database\Eloquent\Builder;

View File

@@ -1,16 +0,0 @@
<?php
namespace BookStack\Search\Vectors;
use Illuminate\Database\Eloquent\Model;
/**
* @property string $entity_type
* @property int $entity_id
* @property string $text
* @property string $embedding
*/
class SearchVector extends Model
{
public $timestamps = false;
}

View File

@@ -1,34 +0,0 @@
<?php
namespace BookStack\Search\Vectors;
class VectorSearchRunner
{
public function __construct(
protected VectorQueryServiceProvider $vectorQueryServiceProvider
) {
}
public function run(string $query): array
{
$queryService = $this->vectorQueryServiceProvider->get();
$queryVector = $queryService->generateEmbeddings($query);
// TODO - Apply permissions
// TODO - Join models
$topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
->orderBy('distance', 'asc')
->having('distance', '<', 0.6)
->limit(10)
->get();
$matchesText = array_values(array_map(fn (SearchVector $match) => $match->text, $topMatches->all()));
$llmResult = $queryService->query($query, $matchesText);
return [
'llm_result' => $llmResult,
'entity_matches' => $topMatches->toArray()
];
}
}

View File

@@ -11,7 +11,7 @@ use BookStack\Exports\Controllers as ExportControllers;
use BookStack\Http\Middleware\VerifyCsrfToken;
use BookStack\Permissions\PermissionsController;
use BookStack\References\ReferenceController;
use BookStack\Search\QueryController;
use BookStack\Search\Queries\QueryController;
use BookStack\Search\SearchController;
use BookStack\Settings as SettingControllers;
use BookStack\Sorting as SortingControllers;
@@ -199,6 +199,7 @@ Route::middleware('auth')->group(function () {
// Queries
Route::get('/query', [QueryController::class, 'show']);
Route::get('/query/run', [QueryController::class, 'run']); // TODO - Development only, remove
Route::post('/query', [QueryController::class, 'run']);
// User Search

View File

@@ -2,7 +2,7 @@
namespace Search;
use BookStack\Search\Vectors\TextChunker;
use BookStack\Search\Queries\TextChunker;
use Tests\TestCase;
class TextChunkerTest extends TestCase