Procházet zdrojové kódy

Impl sanitizeMarkdownForVectorStoreFile

Shun Miyazawa před 1 rokem
rodič
revize
b1fb5427a8

+ 4 - 1
apps/app/src/features/openai/server/services/openai.ts

@@ -19,11 +19,13 @@ import { createBatchStream } from '~/server/util/batch-stream';
 import loggerFactory from '~/utils/logger';
 
 import { OpenaiServiceTypes } from '../../interfaces/ai';
+import { sanitizeMarkdownForVectorStoreFile } from '../utils/sanitize-markdown-for-vector-store-file';
 
 import { getClient } from './client-delegator';
 // import { splitMarkdownIntoChunks } from './markdown-splitter/markdown-token-splitter';
 import { oepnaiApiErrorHandler } from './openai-api-error-handler';
 
+
 const BATCH_SIZE = 100;
 
 const logger = loggerFactory('growi:service:openai');
@@ -155,7 +157,8 @@ class OpenaiService implements IOpenaiService {
   // }
 
   private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
-    const file = await toFile(Readable.from(body), `${pageId}.md`);
+    const sanitizedMarkdown = sanitizeMarkdownForVectorStoreFile(body);
+    const file = await toFile(Readable.from(sanitizedMarkdown), `${pageId}.md`);
     const uploadedFile = await this.client.uploadFile(file);
     return uploadedFile;
   }

+ 9 - 0
apps/app/src/features/openai/server/utils/sanitize-markdown-for-vector-store-file.ts

@@ -0,0 +1,9 @@
+export const sanitizeMarkdownForVectorStoreFile = (markdown: string): string => {
+  let replacedMarkdown = markdown;
+
+  // Sanitize drawio content
+  // https://regex101.com/r/ieo5Z2/1
+  replacedMarkdown = replacedMarkdown.replace(/``` drawio\n([\s\S]*?)\n```/g, '');
+
+  return replacedMarkdown;
+};