Răsfoiți Sursa

store embedded body

Yuki Takei 1 an în urmă
părinte
comite
235c3351ff

+ 4 - 0
apps/app/resource/search/mappings-es7.json

@@ -64,6 +64,10 @@
           }
         }
       },
+      "body_embedded": {
+        "type": "dense_vector",
+        "dims": 768
+      },
       "comments": {
         "type": "text",
         "fields": {

+ 4 - 0
apps/app/resource/search/mappings-es8.json

@@ -64,6 +64,10 @@
           }
         }
       },
+      "body_embedded": {
+        "type": "dense_vector",
+        "dims": 768
+      },
       "comments": {
         "type": "text",
         "fields": {

+ 2 - 0
apps/app/src/server/service/search-delegator/bulk-write.d.ts

@@ -21,6 +21,7 @@ export type AggregatedPage = Pick<IPageHasId,
   },
 } & {
   tagNames: string[],
+  revisionBodyEmbedded: number[],
 };
 
 export type BulkWriteCommand = {
@@ -42,6 +43,7 @@ export type BulkWriteBody = {
   created_at: Date;
   updated_at: Date;
   body: string;
+  body_embedded?: number[];
   username?: string;
   comments?: string[];
   comment_count: number;

+ 16 - 1
apps/app/src/server/service/search-delegator/elasticsearch.ts

@@ -20,7 +20,7 @@ import type { PageModel } from '../../models/page';
 import { createBatchStream } from '../../util/batch-stream';
 import { configManager } from '../config-manager';
 import type { UpdateOrInsertPagesOpts } from '../interfaces/search';
-
+import { openaiService } from '../openai';
 
 import { aggregatePipelineToIndex } from './aggregate-to-index';
 import type { AggregatedPage, BulkWriteBody, BulkWriteCommand } from './bulk-write';
@@ -380,6 +380,7 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
     const document: BulkWriteBody = {
       path: page.path,
       body: page.revision.body,
+      body_embedded: page.revisionBodyEmbedded,
       username: page.creator?.username,
       comments: page.commentsCount > 0 ? page.comments : undefined,
       comment_count: page.commentsCount,
@@ -479,6 +480,19 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
       },
     });
 
+    const appendEmbeddingStream = new Transform({
+      objectMode: true,
+      async transform(chunk: AggregatedPage[], encoding, callback) {
+        // append embedding
+        for await (const doc of chunk) {
+          doc.revisionBodyEmbedded = (await openaiService.embed(doc.creator.username, doc.revision.body))[0].embedding;
+        }
+
+        this.push(chunk);
+        callback();
+      },
+    });
+
     let count = 0;
     const writeStream = new Writable({
       objectMode: true,
@@ -532,6 +546,7 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
     readStream
       .pipe(batchStream)
       .pipe(appendTagNamesStream)
+      .pipe(appendEmbeddingStream)
       .pipe(writeStream);
 
     return streamToPromise(writeStream);