Просмотр исходного кода

Execute fileUpload using splitMarkdownIntoChunks()

Shun Miyazawa 1 год назад
Родитель
Сommit
15e75849f3
1 измененных файлов с 11 добавлено и 16 удалено
  1. 11 16
      apps/app/src/features/openai/server/services/openai.ts

+ 11 - 16
apps/app/src/features/openai/server/services/openai.ts

@@ -30,6 +30,8 @@ const logger = loggerFactory('growi:service:openai');
 
 let isVectorStoreForPublicScopeExist = false;
 
+type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
+
 export interface IOpenaiService {
   getOrCreateThread(userId: string, vectorStoreId?: string, threadId?: string): Promise<OpenAI.Beta.Threads.Thread | undefined>;
   getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
@@ -135,32 +137,27 @@ class OpenaiService implements IOpenaiService {
     return newVectorStoreDocument;
   }
 
-  private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
-
-    // const splitMarkdownIntoChunks = (await import('./markdown-splitter/markdown-token-splitter')).splitMarkdownIntoChunks;
+  private async uploadFileByChunks(pageId: Types.ObjectId, body: string, vectorStoreFileRelationsMap: VectorStoreFileRelationsMap) {
     const chunks = await splitMarkdownIntoChunks(body, 'gpt-4o');
-    console.log('chunks', chunks);
-
-
-    const file = await toFile(Readable.from(body), `${pageId}.md`);
-    const uploadedFile = await this.client.uploadFile(file);
-    return uploadedFile;
+    for await (const [index, chunk] of chunks.entries()) {
+      const file = await toFile(Readable.from(chunk), `${pageId}-chunk-${index}.md`);
+      const uploadedFile = await this.client.uploadFile(file);
+      prepareVectorStoreFileRelations(pageId, uploadedFile.id, vectorStoreFileRelationsMap);
+    }
   }
 
   async createVectorStoreFile(pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
-    const vectorStoreFileRelationsMap: Map<string, VectorStoreFileRelation> = new Map();
+    const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
     const processUploadFile = async(page: PageDocument) => {
       if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
         if (isPopulated(page.revision) && page.revision.body.length > 0) {
-          const uploadedFile = await this.uploadFile(page._id, page.revision.body);
-          prepareVectorStoreFileRelations(page._id, uploadedFile.id, vectorStoreFileRelationsMap);
+          await this.uploadFileByChunks(page._id, page.revision.body, vectorStoreFileRelationsMap);
           return;
         }
 
         const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
         if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
-          const uploadedFile = await this.uploadFile(page._id, pagePopulatedToShowRevision.revision.body);
-          prepareVectorStoreFileRelations(page._id, uploadedFile.id, vectorStoreFileRelationsMap);
+          await this.uploadFileByChunks(page._id, pagePopulatedToShowRevision.revision.body, vectorStoreFileRelationsMap);
         }
       }
     };
@@ -180,8 +177,6 @@ class OpenaiService implements IOpenaiService {
     const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
     const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
 
-    console.log('uploadedFileIds', uploadedFileIds);
-
     if (uploadedFileIds.length === 0) {
       return;
     }