2
0
Эх сурвалжийг харах

Merge pull request #9947 from weseek/feat/165506-delete-associated-vectorstore-file-on-attachment-deletion

feat: Delete associated vectorstore file on attachment deletion
Yuki Takei 10 сар өмнө
parent
commit
8d4a6683ee

+ 12 - 2
apps/app/src/features/openai/server/models/vector-store-file-relation.ts

@@ -7,6 +7,7 @@ import { getOrCreateModel } from '~/server/util/mongoose-utils';
 export interface VectorStoreFileRelation {
   vectorStoreRelationId: mongoose.Types.ObjectId;
   page: mongoose.Types.ObjectId;
+  attachment?: mongoose.Types.ObjectId;
   fileIds: string[];
   isAttachedToVectorStore: boolean;
 }
@@ -19,7 +20,11 @@ interface VectorStoreFileRelationModel extends Model<VectorStoreFileRelation> {
 }
 
 export const prepareVectorStoreFileRelations = (
-    vectorStoreRelationId: Types.ObjectId, page: Types.ObjectId, fileId: string, relationsMap: Map<string, VectorStoreFileRelation>,
+    vectorStoreRelationId: Types.ObjectId,
+    page: Types.ObjectId,
+    fileId: string,
+    relationsMap: Map<string, VectorStoreFileRelation>,
+    attachment?: Types.ObjectId,
 ): Map<string, VectorStoreFileRelation> => {
   const pageIdStr = page.toHexString();
   const existingData = relationsMap.get(pageIdStr);
@@ -35,6 +40,7 @@ export const prepareVectorStoreFileRelations = (
       page,
       fileIds: [fileId],
       isAttachedToVectorStore: false,
+      attachment,
     });
   }
 
@@ -52,6 +58,10 @@ const schema = new Schema<VectorStoreFileRelationDocument, VectorStoreFileRelati
     ref: 'Page',
     required: true,
   },
+  attachment: {
+    type: Schema.Types.ObjectId,
+    ref: 'Attachment',
+  },
   fileIds: [{
     type: String,
     required: true,
@@ -64,7 +74,7 @@ const schema = new Schema<VectorStoreFileRelationDocument, VectorStoreFileRelati
 });
 
 // define unique compound index
-schema.index({ vectorStoreRelationId: 1, page: 1 }, { unique: true });
+schema.index({ vectorStoreRelationId: 1, page: 1, attachment: 1 }, { unique: true });
 
 schema.statics.upsertVectorStoreFileRelations = async function(vectorStoreFileRelations: VectorStoreFileRelation[]): Promise<void> {
   await this.bulkWrite(

+ 44 - 7
apps/app/src/features/openai/server/services/openai.ts

@@ -23,6 +23,7 @@ import VectorStoreFileRelationModel, {
   prepareVectorStoreFileRelations,
 } from '~/features/openai/server/models/vector-store-file-relation';
 import type Crowi from '~/server/crowi';
+import type { IAttachmentDocument } from '~/server/models/attachment';
 import type { PageDocument, PageModel } from '~/server/models/page';
 import UserGroupRelation from '~/server/models/user-group-relation';
 import { configManager } from '~/server/service/config-manager';
@@ -80,10 +81,12 @@ export interface IOpenaiService {
   createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: PageDocument[]): Promise<void>;
   createVectorStoreFileOnPageCreate(pages: PageDocument[]): Promise<void>;
   updateVectorStoreFileOnPageUpdate(page: HydratedDocument<PageDocument>): Promise<void>;
-  createVectorStoreFileOnUploadAttachment(pageId: string, file: Express.Multer.File, readable: Readable): Promise<void>;
+  createVectorStoreFileOnUploadAttachment(
+    pageId: string, attachment: HydratedDocument<IAttachmentDocument>, file: Express.Multer.File, readable: Readable): Promise<void>;
   deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId): Promise<void>;
   deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void>;
   deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
+  deleteVectorStoreFileOnDeleteAttachment(attachmentId: string): Promise<void>;
   isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean>;
   createAiAssistant(data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument>;
   updateAiAssistant(aiAssistantId: string, data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument>;
@@ -95,6 +98,9 @@ class OpenaiService implements IOpenaiService {
   constructor(crowi: Crowi) {
     this.createVectorStoreFileOnUploadAttachment = this.createVectorStoreFileOnUploadAttachment.bind(this);
     crowi.attachmentService.addAttachHandler(this.createVectorStoreFileOnUploadAttachment);
+
+    this.deleteVectorStoreFileOnDeleteAttachment = this.deleteVectorStoreFileOnDeleteAttachment.bind(this);
+    crowi.attachmentService.addDetachHandler(this.deleteVectorStoreFileOnDeleteAttachment);
   }
 
   private get client() {
@@ -498,6 +504,33 @@ class OpenaiService implements IOpenaiService {
     }
   }
 
+  async deleteVectorStoreFileOnDeleteAttachment(attachmentId: string) {
+    // An Attachment has only one VectorStoreFile. This means the id of VectorStoreFile linked to VectorStore is one per Attachment.
+    // Therefore, retrieve only one VectorStoreFile Relation with the target attachmentId.
+    const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ attachment: attachmentId });
+    if (vectorStoreFileRelation == null) {
+      return;
+    }
+
+    const deleteAllRelationDocument = async() => {
+      await VectorStoreFileRelationModel.deleteMany({ attachment: attachmentId });
+    };
+
+    for await (const fileId of vectorStoreFileRelation.fileIds) {
+      try {
+        const response = await this.client.deleteFile(fileId);
+        logger.debug('Delete vector store file', response);
+      }
+      catch (err) {
+        logger.error(err);
+        await openaiApiErrorHandler(err, { notFoundError: () => deleteAllRelationDocument() });
+      }
+    }
+
+    await deleteAllRelationDocument();
+  }
+
+
   async filterPagesByAccessScope(aiAssistant: AiAssistantDocument, pages: HydratedDocument<PageDocument>[]) {
     const isPublicPage = (page :HydratedDocument<PageDocument>) => page.grant === PageGrant.GRANT_PUBLIC;
 
@@ -595,7 +628,9 @@ class OpenaiService implements IOpenaiService {
     }
   }
 
-  async createVectorStoreFileOnUploadAttachment(pageId: string, file: Express.Multer.File, readable: Readable): Promise<void> {
+  async createVectorStoreFileOnUploadAttachment(
+      pageId: string, attachment:HydratedDocument<IAttachmentDocument>, file: Express.Multer.File, readable: Readable,
+  ): Promise<void> {
     if (!isVectorStoreCompatible(file)) {
       return;
     }
@@ -625,12 +660,14 @@ class OpenaiService implements IOpenaiService {
         continue;
       }
 
-      const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
-      prepareVectorStoreFileRelations(vectorStoreRelation._id as Types.ObjectId, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
-      const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
-      await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
-
       await this.client.createVectorStoreFile(vectorStoreRelation.vectorStoreId, uploadedFile.id);
+      await VectorStoreFileRelationModel.create({
+        vectorStoreRelationId: vectorStoreRelation._id,
+        page: page._id,
+        attachment: attachment._id,
+        fileIds: [uploadedFile.id],
+        isAttachedToVectorStore: true,
+      });
     }
   }
 

+ 13 - 2
apps/app/src/server/service/attachment.js

@@ -24,6 +24,7 @@ class AttachmentService {
   /** @type {Array<(pageId: string, file: Express.Multer.File, readable: Readable) => Promise<void>>} */
   attachHandlers = [];
 
+  /** @type {Array<(attachmentId: string) => Promise<void>>} */
   detachHandlers = [];
 
   /** @type {import('~/server/crowi').default} Crowi instance */
@@ -60,7 +61,7 @@ class AttachmentService {
       }
 
       const attachedHandlerPromises = this.attachHandlers.map((handler) => {
-        return handler(pageId, file, fileStreamForAttachedHandler);
+        return handler(pageId, attachment, file, fileStreamForAttachedHandler);
       });
 
       // Do not await, run in background
@@ -107,6 +108,16 @@ class AttachmentService {
     await fileUploadService.deleteFile(attachment);
     await attachment.remove();
 
+    const detachedHandlerPromises = this.detachHandlers.map((handler) => {
+      return handler(attachment._id);
+    });
+
+    // Do not await, run in background
+    Promise.all(detachedHandlerPromises)
+      .catch((err) => {
+        logger.error('Error while executing detached handler', err);
+      });
+
     return;
   }
 
@@ -127,7 +138,7 @@ class AttachmentService {
 
   /**
    * Register a handler that will be called before attachment deletion
-   * @param {(attachment: Attachment) => Promise<void>} handler
+   * @param {(attachmentId: string) => Promise<void>} handler
    */
   addDetachHandler(handler) {
     this.detachHandlers.push(handler);