2
0
Эх сурвалжийг харах

imprv VectorStoreRelationModel

Shun Miyazawa 1 жил өмнө
parent
commit
f7fcf09384

+ 51 - 11
apps/app/src/features/openai/server/models/vector-store-relation.ts

@@ -1,23 +1,63 @@
-import type { Model, Document } from 'mongoose';
+import type { HydratedDocument, Types } from 'mongoose';
 import type mongoose from 'mongoose';
 import type mongoose from 'mongoose';
-import {
-  Schema,
-} from 'mongoose';
+import { type Model, Schema } from 'mongoose';
 
 
 import { getOrCreateModel } from '~/server/util/mongoose-utils';
 import { getOrCreateModel } from '~/server/util/mongoose-utils';
 
 
-type VectorStoreRelation = {
+export type VectorStoreRelation = {
   pageId: mongoose.Types.ObjectId;
   pageId: mongoose.Types.ObjectId;
-  fileId: string;
+  fileIds: string[];
 }
 }
 
 
-interface VectorStoreRelationDocument extends VectorStoreRelation, Document {}
+export const prepareDocumentData = (pageId: Types.ObjectId, fileId: string, updateArray: VectorStoreRelation[]): VectorStoreRelation[] => {
+  const existingData = updateArray.find(relation => relation.pageId.equals(pageId));
 
 
-type VectorStoreRelationModel = Model<VectorStoreRelationDocument>
+  if (existingData != null) {
+    // If the data exists, add the fileId to the fileIds array
+    existingData.fileIds.push(fileId);
+  }
+  else {
+    // If the data doesn't exist, create a new one and add it to the array
+    updateArray.push({
+      pageId,
+      fileIds: [fileId],
+    });
+  }
 
 
-const schema = new Schema<VectorStoreRelationDocument, VectorStoreRelationModel>({
-  pageId: { type: Schema.Types.ObjectId, ref: 'Page', required: true },
-  fileId: { type: String, required: true },
+  return updateArray;
+};
+
+type VectorStoreRelationDocument = HydratedDocument<VectorStoreRelation>;
+
+type VectorStoreRelationModel = Model<VectorStoreRelation, undefined, undefined, undefined, VectorStoreRelationDocument> & {
+  updateOrCreateDocument(requestData: VectorStoreRelation[]): Promise<void>;
+}
+
+const schema = new Schema<VectorStoreRelation, VectorStoreRelationModel>({
+  pageId: {
+    type: Schema.Types.ObjectId,
+    ref: 'Page',
+    required: true,
+    unique: true,
+  },
+  fileIds: [{
+    type: String,
+    required: true,
+  }],
 });
 });
 
 
+schema.statics.updateOrCreateDocument = async function(requestData: VectorStoreRelation[]): Promise<void> {
+  await this.bulkWrite(
+    requestData.map((data) => {
+      return {
+        updateOne: {
+          filter: { pageId: data.pageId },
+          update: { $addToSet: { fileIds: { $each: data.fileIds } } },
+          upsert: true,
+        },
+      };
+    }),
+  );
+};
+
 export default getOrCreateModel<VectorStoreRelationDocument, VectorStoreRelationModel>('VectorStoreRelation', schema);
 export default getOrCreateModel<VectorStoreRelationDocument, VectorStoreRelationModel>('VectorStoreRelation', schema);

+ 14 - 12
apps/app/src/server/service/openai/openai.ts

@@ -6,7 +6,7 @@ import mongoose from 'mongoose';
 import type OpenAI from 'openai';
 import type OpenAI from 'openai';
 import { toFile } from 'openai';
 import { toFile } from 'openai';
 
 
-import VectorStoreRelation from '~/features/openai/server/models/vector-store-relation';
+import VectorStoreRelationModel, { type VectorStoreRelation, prepareDocumentData } from '~/features/openai/server/models/vector-store-relation';
 import { OpenaiServiceTypes } from '~/interfaces/ai';
 import { OpenaiServiceTypes } from '~/interfaces/ai';
 import type { PageDocument, PageModel } from '~/server/models/page';
 import type { PageDocument, PageModel } from '~/server/models/page';
 import { configManager } from '~/server/service/config-manager';
 import { configManager } from '~/server/service/config-manager';
@@ -36,33 +36,27 @@ class OpenaiService implements IOpenaiService {
   private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
   private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
     const file = await toFile(Readable.from(body), `${pageId}.md`);
     const file = await toFile(Readable.from(body), `${pageId}.md`);
     const uploadedFile = await this.client.uploadFile(file);
     const uploadedFile = await this.client.uploadFile(file);
-    await VectorStoreRelation.create({ pageId, fileId: uploadedFile.id });
     return uploadedFile;
     return uploadedFile;
   }
   }
 
 
   async createVectorStoreFile(pages: Array<PageDocument>): Promise<void> {
   async createVectorStoreFile(pages: Array<PageDocument>): Promise<void> {
-    const uploadedFileIds: string[] = [];
-
+    const vectorStoreFileRelationData: VectorStoreRelation[] = [];
     const processUploadFile = async(page: PageDocument) => {
     const processUploadFile = async(page: PageDocument) => {
       if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
       if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
         if (isPopulated(page.revision) && page.revision.body.length > 0) {
         if (isPopulated(page.revision) && page.revision.body.length > 0) {
           const uploadedFile = await this.uploadFile(page._id, page.revision.body);
           const uploadedFile = await this.uploadFile(page._id, page.revision.body);
-          uploadedFileIds.push(uploadedFile.id);
+          prepareDocumentData(page._id, uploadedFile.id, vectorStoreFileRelationData);
           return;
           return;
         }
         }
 
 
         const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
         const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
         if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
         if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
           const uploadedFile = await this.uploadFile(page._id, pagePopulatedToShowRevision.revision.body);
           const uploadedFile = await this.uploadFile(page._id, pagePopulatedToShowRevision.revision.body);
-          uploadedFileIds.push(uploadedFile.id);
+          prepareDocumentData(page._id, uploadedFile.id, vectorStoreFileRelationData);
         }
         }
       }
       }
     };
     };
 
 
-    if (uploadedFileIds.length === 0) {
-      return;
-    }
-
     // Start workers to process results
     // Start workers to process results
     const workers = pages.map(processUploadFile);
     const workers = pages.map(processUploadFile);
 
 
@@ -74,8 +68,16 @@ class OpenaiService implements IOpenaiService {
       }
       }
     });
     });
 
 
-    const res = await this.client.createVectorStoreFileBatch(uploadedFileIds);
-    logger.debug('create vector store file: ', res);
+    try {
+      const uploadedFileIds = vectorStoreFileRelationData.map(data => data.fileIds).flat();
+      const res = await this.client.createVectorStoreFileBatch(uploadedFileIds);
+      logger.debug('create vector store file: ', res);
+      await VectorStoreRelationModel.updateOrCreateDocument(vectorStoreFileRelationData);
+    }
+    catch (err) {
+      logger.error(err);
+    }
+
   }
   }
 
 
   async rebuildVectorStoreAll() {
   async rebuildVectorStoreAll() {