mirror of
https://github.com/BookStackApp/BookStack.git
synced 2025-10-20 20:12:39 +03:00
Vectors: Split out vector search and llm query runs
Added a formal object type to carry across vector search results. Added permission application and entity combining with vector search results. Also updated namespace from vectors to queries.
This commit is contained in:
@@ -4,8 +4,8 @@ namespace BookStack\Console\Commands;
|
||||
|
||||
use BookStack\Entities\EntityProvider;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Search\Vectors\SearchVector;
|
||||
use BookStack\Search\Vectors\StoreEntityVectorsJob;
|
||||
use BookStack\Search\Queries\SearchVector;
|
||||
use BookStack\Search\Queries\StoreEntityVectorsJob;
|
||||
use Illuminate\Console\Command;
|
||||
|
||||
class RegenerateVectorsCommand extends Command
|
||||
|
@@ -1,10 +1,12 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Activity\Models\Tag;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Search\Vectors\Services\VectorQueryService;
|
||||
use BookStack\Search\Queries\Services\VectorQueryService;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
|
||||
class EntityVectorGenerator
|
26
app/Search/Queries/LlmQueryRunner.php
Normal file
26
app/Search/Queries/LlmQueryRunner.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use Exception;
|
||||
|
||||
class LlmQueryRunner
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a query against the configured LLM to produce a text response.
|
||||
* @param VectorSearchResult[] $vectorResults
|
||||
* @throws Exception
|
||||
*/
|
||||
public function run(string $query, array $vectorResults): string
|
||||
{
|
||||
$queryService = $this->vectorQueryServiceProvider->get();
|
||||
|
||||
$matchesText = array_values(array_map(fn (VectorSearchResult $result) => $result->matchText, $vectorResults));
|
||||
return $queryService->query($query, $matchesText);
|
||||
}
|
||||
}
|
@@ -1,9 +1,10 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search;
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Http\Controller;
|
||||
use BookStack\Search\Vectors\VectorSearchRunner;
|
||||
use BookStack\Search\SearchOptions;
|
||||
use BookStack\Search\SearchRunner;
|
||||
use Illuminate\Http\Request;
|
||||
|
||||
class QueryController extends Controller
|
||||
@@ -35,19 +36,13 @@ class QueryController extends Controller
|
||||
/**
|
||||
* Perform a vector/LLM-based query search.
|
||||
*/
|
||||
public function run(Request $request, VectorSearchRunner $runner)
|
||||
public function run(Request $request, VectorSearchRunner $searchRunner, LlmQueryRunner $llmRunner)
|
||||
{
|
||||
// TODO - Validate if query system is active
|
||||
$query = $request->get('query', '');
|
||||
|
||||
if ($query) {
|
||||
$results = $runner->run($query);
|
||||
} else {
|
||||
$results = null;
|
||||
}
|
||||
|
||||
return view('search.query', [
|
||||
'results' => $results,
|
||||
]);
|
||||
$results = $query ? $searchRunner->run($query) : [];
|
||||
$llmResult = $llmRunner->run($query, $results);
|
||||
dd($results, $llmResult);
|
||||
}
|
||||
}
|
26
app/Search/Queries/SearchVector.php
Normal file
26
app/Search/Queries/SearchVector.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Permissions\Models\JointPermission;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
use Illuminate\Database\Eloquent\Relations\HasMany;
|
||||
|
||||
/**
|
||||
* @property string $entity_type
|
||||
* @property int $entity_id
|
||||
* @property string $text
|
||||
* @property string $embedding
|
||||
*/
|
||||
class SearchVector extends Model
|
||||
{
|
||||
public $timestamps = false;
|
||||
|
||||
public function jointPermissions(): HasMany
|
||||
{
|
||||
return $this->hasMany(JointPermission::class, 'entity_id', 'entity_id')
|
||||
->whereColumn('search_vectors.entity_type', '=', 'joint_permissions.entity_type');
|
||||
}
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors\Services;
|
||||
namespace BookStack\Search\Queries\Services;
|
||||
|
||||
use BookStack\Http\HttpRequestService;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors\Services;
|
||||
namespace BookStack\Search\Queries\Services;
|
||||
|
||||
interface VectorQueryService
|
||||
{
|
@@ -1,6 +1,8 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
@@ -1,6 +1,8 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use InvalidArgumentException;
|
||||
|
@@ -1,10 +1,12 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Http\HttpRequestService;
|
||||
use BookStack\Search\Vectors\Services\OpenAiVectorQueryService;
|
||||
use BookStack\Search\Vectors\Services\VectorQueryService;
|
||||
use BookStack\Search\Queries\Services\OpenAiVectorQueryService;
|
||||
use BookStack\Search\Queries\Services\VectorQueryService;
|
||||
|
||||
class VectorQueryServiceProvider
|
||||
{
|
17
app/Search/Queries/VectorSearchResult.php
Normal file
17
app/Search/Queries/VectorSearchResult.php
Normal file
@@ -0,0 +1,17 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Entities\Models\Entity;
|
||||
|
||||
readonly class VectorSearchResult
|
||||
{
|
||||
public function __construct(
|
||||
public Entity $entity,
|
||||
public float $distance,
|
||||
public string $matchText
|
||||
) {
|
||||
}
|
||||
}
|
54
app/Search/Queries/VectorSearchRunner.php
Normal file
54
app/Search/Queries/VectorSearchRunner.php
Normal file
@@ -0,0 +1,54 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Queries;
|
||||
|
||||
use BookStack\Entities\Tools\MixedEntityListLoader;
|
||||
use BookStack\Permissions\PermissionApplicator;
|
||||
use Exception;
|
||||
|
||||
class VectorSearchRunner
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider,
|
||||
protected PermissionApplicator $permissions,
|
||||
protected MixedEntityListLoader $entityLoader,
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a vector search query to find results across entities.
|
||||
* @return VectorSearchResult[]
|
||||
* @throws Exception
|
||||
*/
|
||||
public function run(string $query): array
|
||||
{
|
||||
$queryService = $this->vectorQueryServiceProvider->get();
|
||||
$queryVector = $queryService->generateEmbeddings($query);
|
||||
|
||||
// TODO - Test permissions applied
|
||||
$topMatchesQuery = SearchVector::query()->select('text', 'entity_type', 'entity_id')
|
||||
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
|
||||
->orderBy('distance', 'asc')
|
||||
->having('distance', '<', 0.6)
|
||||
->limit(10);
|
||||
|
||||
$query = $this->permissions->restrictEntityRelationQuery($topMatchesQuery, 'search_vectors', 'entity_id', 'entity_type');
|
||||
$topMatches = $query->get();
|
||||
|
||||
$this->entityLoader->loadIntoRelations($topMatches->all(), 'entity', true);
|
||||
|
||||
$results = [];
|
||||
|
||||
foreach ($topMatches as $match) {
|
||||
if ($match->relationLoaded('entity')) {
|
||||
$results[] = new VectorSearchResult(
|
||||
$match->getRelation('entity'),
|
||||
$match->getAttribute('distance'),
|
||||
$match->getAttribute('text'),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
}
|
@@ -6,7 +6,7 @@ use BookStack\Entities\Queries\PageQueries;
|
||||
use BookStack\Entities\Queries\QueryPopular;
|
||||
use BookStack\Entities\Tools\SiblingFetcher;
|
||||
use BookStack\Http\Controller;
|
||||
use BookStack\Search\Vectors\VectorSearchRunner;
|
||||
use BookStack\Search\Queries\VectorSearchRunner;
|
||||
use Illuminate\Http\Request;
|
||||
|
||||
class SearchController extends Controller
|
||||
|
@@ -6,8 +6,8 @@ use BookStack\Activity\Models\Tag;
|
||||
use BookStack\Entities\EntityProvider;
|
||||
use BookStack\Entities\Models\Entity;
|
||||
use BookStack\Entities\Models\Page;
|
||||
use BookStack\Search\Vectors\StoreEntityVectorsJob;
|
||||
use BookStack\Search\Vectors\VectorQueryServiceProvider;
|
||||
use BookStack\Search\Queries\StoreEntityVectorsJob;
|
||||
use BookStack\Search\Queries\VectorQueryServiceProvider;
|
||||
use BookStack\Util\HtmlDocument;
|
||||
use DOMNode;
|
||||
use Illuminate\Database\Eloquent\Builder;
|
||||
|
@@ -1,16 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
|
||||
/**
|
||||
* @property string $entity_type
|
||||
* @property int $entity_id
|
||||
* @property string $text
|
||||
* @property string $embedding
|
||||
*/
|
||||
class SearchVector extends Model
|
||||
{
|
||||
public $timestamps = false;
|
||||
}
|
@@ -1,34 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace BookStack\Search\Vectors;
|
||||
|
||||
class VectorSearchRunner
|
||||
{
|
||||
public function __construct(
|
||||
protected VectorQueryServiceProvider $vectorQueryServiceProvider
|
||||
) {
|
||||
}
|
||||
|
||||
public function run(string $query): array
|
||||
{
|
||||
$queryService = $this->vectorQueryServiceProvider->get();
|
||||
$queryVector = $queryService->generateEmbeddings($query);
|
||||
|
||||
// TODO - Apply permissions
|
||||
// TODO - Join models
|
||||
$topMatches = SearchVector::query()->select('text', 'entity_type', 'entity_id')
|
||||
->selectRaw('VEC_DISTANCE_COSINE(VEC_FROMTEXT("[' . implode(',', $queryVector) . ']"), embedding) as distance')
|
||||
->orderBy('distance', 'asc')
|
||||
->having('distance', '<', 0.6)
|
||||
->limit(10)
|
||||
->get();
|
||||
|
||||
$matchesText = array_values(array_map(fn (SearchVector $match) => $match->text, $topMatches->all()));
|
||||
$llmResult = $queryService->query($query, $matchesText);
|
||||
|
||||
return [
|
||||
'llm_result' => $llmResult,
|
||||
'entity_matches' => $topMatches->toArray()
|
||||
];
|
||||
}
|
||||
}
|
@@ -11,7 +11,7 @@ use BookStack\Exports\Controllers as ExportControllers;
|
||||
use BookStack\Http\Middleware\VerifyCsrfToken;
|
||||
use BookStack\Permissions\PermissionsController;
|
||||
use BookStack\References\ReferenceController;
|
||||
use BookStack\Search\QueryController;
|
||||
use BookStack\Search\Queries\QueryController;
|
||||
use BookStack\Search\SearchController;
|
||||
use BookStack\Settings as SettingControllers;
|
||||
use BookStack\Sorting as SortingControllers;
|
||||
@@ -199,6 +199,7 @@ Route::middleware('auth')->group(function () {
|
||||
|
||||
// Queries
|
||||
Route::get('/query', [QueryController::class, 'show']);
|
||||
Route::get('/query/run', [QueryController::class, 'run']); // TODO - Development only, remove
|
||||
Route::post('/query', [QueryController::class, 'run']);
|
||||
|
||||
// User Search
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
namespace Search;
|
||||
|
||||
use BookStack\Search\Vectors\TextChunker;
|
||||
use BookStack\Search\Queries\TextChunker;
|
||||
use Tests\TestCase;
|
||||
|
||||
class TextChunkerTest extends TestCase
|
||||
|
Reference in New Issue
Block a user