Просмотр исходного кода

Refactor path condition creation to use convertPathPatternsToRegExp for improved pattern handling

Shun Miyazawa 1 год назад
Родитель
Сommit
f38352ce54
1 измененных файлов с 20 добавлено и 13 удалено
  1. 20 13
      apps/app/src/features/openai/server/services/openai.ts

+ 20 - 13
apps/app/src/features/openai/server/services/openai.ts

@@ -3,7 +3,7 @@ import { Readable, Transform } from 'stream';
 import { pipeline } from 'stream/promises';
 
 import { PageGrant, isPopulated } from '@growi/core';
-import { addTrailingSlash, normalizePath } from '@growi/core/dist/utils/path-utils';
+import { isGrobPatternPath } from '@growi/core/dist/utils/page-path-utils';
 import escapeStringRegexp from 'escape-string-regexp';
 import type { HydratedDocument, Types } from 'mongoose';
 import mongoose from 'mongoose';
@@ -39,6 +39,20 @@ const logger = loggerFactory('growi:service:openai');
 
 type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
 
+
+const convertPathPatternsToRegExp = (pagePathPatterns: string[]): Array<string | RegExp> => {
+  return pagePathPatterns.map((pagePathPattern) => {
+    if (isGrobPatternPath(pagePathPattern)) {
+      const trimedPagePathPattern = pagePathPattern.replace('/*', '');
+      const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
+      return new RegExp(`^${escapedPagePathPattern}`);
+    }
+
+    return pagePathPattern;
+  });
+};
+
+
 export interface IOpenaiService {
   getOrCreateThread(userId: string, vectorStoreId?: string, threadId?: string): Promise<OpenAI.Beta.Threads.Thread | undefined>;
   // getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
@@ -380,21 +394,14 @@ class OpenaiService implements IOpenaiService {
   // }
 
   async createAiAssistant(data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument> {
-    // 1. Get pages stream based on path patterns
-    const conditions: Array<{path: string | RegExp}> = data.pagePathPatterns.map((path) => {
-      if (path.endsWith('/*')) {
-        const basePathWithoutGlob = path.slice(0, -2); // remove '/*'
-        const pathWithTrailingSlash = addTrailingSlash(basePathWithoutGlob);
-        const startsPattern = escapeStringRegexp(pathWithTrailingSlash);
-
-        return { path: new RegExp(`^${startsPattern}`) };
-      }
-      return { path: normalizePath(path) };
-    });
+    // 1. Create conditions
+    const conditions = {
+      path: { $in: convertPathPatternsToRegExp(data.pagePathPatterns) },
+    };
 
     // 2. Create vector store file transform stream
     const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
-    const pagesStream = Page.find({ $or: conditions })
+    const pagesStream = Page.find({ ...conditions })
       .populate('revision')
       .cursor({ batchSize: BATCH_SIZE });
     const batchStream = createBatchStream(BATCH_SIZE);