Shun Miyazawa 1 год назад
Родитель
Сommit
57ba31bdcd

+ 24 - 0
apps/app/src/server/service/openai/file-upload.ts

@@ -0,0 +1,24 @@
+import { Readable } from 'stream';
+
+import type { IPageHasId } from '@growi/core';
+import { toFile } from 'openai';
+
+import { openaiClient } from './client';
+
+type PageToUpload = Omit<IPageHasId, 'revision'> & { revision: { body: string } };
+
+export const fileUpload = async(pages: PageToUpload[]): Promise<void> => {
+  const vectorStoreId = process.env.OPENAI_VECTOR_STORE_ID;
+  if (vectorStoreId == null) {
+    return;
+  }
+
+  const filesPromise = pages.map(async(page) => {
+    const file = await toFile(Readable.from(page.revision.body), `${page._id}.md`);
+    return file;
+  });
+
+  const files = await Promise.all(filesPromise);
+
+  await openaiClient.beta.vectorStores.fileBatches.uploadAndPoll(vectorStoreId, { files });
+};

+ 1 - 0
apps/app/src/server/service/openai/index.ts

@@ -1,2 +1,3 @@
 export * from './embeddings';
+export * from './file-upload';
 export * from './client';

+ 11 - 1
apps/app/src/server/service/search-delegator/elasticsearch.ts

@@ -20,7 +20,7 @@ import type { PageModel } from '../../models/page';
 import { createBatchStream } from '../../util/batch-stream';
 import { configManager } from '../config-manager';
 import type { UpdateOrInsertPagesOpts } from '../interfaces/search';
-import { embed } from '../openai';
+import { embed, fileUpload } from '../openai';
 
 import { aggregatePipelineToIndex } from './aggregate-to-index';
 import type { AggregatedPage, BulkWriteBody, BulkWriteCommand } from './bulk-write';
@@ -493,6 +493,15 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
       },
     });
 
+    const appendFileUploadedStream = new Transform({
+      objectMode: true,
+      async transform(chunk, encoding, callback) {
+        await fileUpload(chunk);
+        this.push(chunk);
+        callback();
+      },
+    });
+
     let count = 0;
     const writeStream = new Writable({
       objectMode: true,
@@ -547,6 +556,7 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
       .pipe(batchStream)
       .pipe(appendTagNamesStream)
       .pipe(appendEmbeddingStream)
+      .pipe(appendFileUploadedStream)
       .pipe(writeStream);
 
     return streamToPromise(writeStream);