Просмотр исходного кода

Merge branch 'feat/openai-vector-searching' into imprv/155101-move-open-ai-related-files-to-features-directly

Shun Miyazawa 1 год назад
Родитель
Сommit
9c8980f925

+ 34 - 0
apps/app/src/features/openai/server/models/vector-store.ts

@@ -0,0 +1,34 @@
+import { type Model, type Document, Schema } from 'mongoose';
+
+import { getOrCreateModel } from '~/server/util/mongoose-utils';
+
+export const VectorStoreScopeType = {
+  PUBLIC: 'public',
+} as const;
+
+export type VectorStoreScopeType = typeof VectorStoreScopeType[keyof typeof VectorStoreScopeType];
+
+const VectorStoreScopeTypes = Object.values(VectorStoreScopeType);
+interface VectorStore {
+  vectorStoreId: string
+  scorpeType: VectorStoreScopeType
+}
+
+export interface VectorStoreDocument extends VectorStore, Document {}
+
+type VectorStoreModel = Model<VectorStore>
+
+const schema = new Schema<VectorStoreDocument, VectorStoreModel>({
+  vectorStoreId: {
+    type: String,
+    required: true,
+    unique: true,
+  },
+  scorpeType: {
+    enum: VectorStoreScopeTypes,
+    type: String,
+    required: true,
+  },
+});
+
+export default getOrCreateModel<VectorStoreDocument, VectorStoreModel>('VectorStore', schema);

+ 6 - 5
apps/app/src/features/openai/server/routes/thread.ts

@@ -8,6 +8,7 @@ import type { ApiV3Response } from '~/server/routes/apiv3/interfaces/apiv3-respo
 import loggerFactory from '~/utils/logger';
 
 import { openaiClient } from '../services';
+import { getOpenaiService } from '../services/openai';
 
 import { certifyAiService } from './middlewares/certify-ai-service';
 
@@ -31,19 +32,19 @@ export const createThreadHandlersFactory: CreateThreadFactory = (crowi) => {
   return [
     accessTokenParser, loginRequiredStrictly, certifyAiService, validator, apiV3FormValidator,
     async(req: CreateThreadReq, res: ApiV3Response) => {
-
-      const vectorStoreId = process.env.OPENAI_VECTOR_STORE_ID;
-      if (vectorStoreId == null) {
-        return res.apiv3Err('OPENAI_VECTOR_STORE_ID is not setup', 503);
+      const openaiService = getOpenaiService();
+      if (openaiService == null) {
+        return res.apiv3Err('OpenaiService is not available', 503);
       }
 
       try {
+        const vectorStore = await openaiService.getOrCreateVectorStoreForPublicScope();
         const threadId = req.body.threadId;
         const thread = threadId == null
           ? await openaiClient.beta.threads.create({
             tool_resources: {
               file_search: {
-                vector_store_ids: [vectorStoreId],
+                vector_store_ids: [vectorStore.vectorStoreId],
               },
             },
           })

+ 12 - 16
apps/app/src/features/openai/server/services/client-delegator/azure-openai-client-delegator.ts

@@ -3,6 +3,8 @@ import type OpenAI from 'openai';
 import { AzureOpenAI } from 'openai';
 import { type Uploadable } from 'openai/uploads';
 
+import type { VectorStoreScopeType } from '~/features/openai/server/models/vector-store';
+
 import type { IOpenaiClientDelegator } from './interfaces';
 
 
@@ -10,8 +12,6 @@ export class AzureOpenaiClientDelegator implements IOpenaiClientDelegator {
 
   private client: AzureOpenAI;
 
-  private openaiVectorStoreId: string;
-
   constructor() {
     // Retrieve Azure OpenAI related values from environment variables
     const credential = new DefaultAzureCredential();
@@ -22,32 +22,28 @@ export class AzureOpenaiClientDelegator implements IOpenaiClientDelegator {
     // TODO: initialize openaiVectorStoreId property
   }
 
-  async uploadFile(file: Uploadable): Promise<OpenAI.Files.FileObject> {
-    return this.client.files.create({ file, purpose: 'assistants' });
-  }
-
-  async createVectorStoreFileBatch(fileIds: string[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch> {
-    return this.client.beta.vectorStores.fileBatches.create(this.openaiVectorStoreId, { file_ids: fileIds });
+  async createVectorStore(scopeType:VectorStoreScopeType): Promise<OpenAI.Beta.VectorStores.VectorStore> {
+    return this.client.beta.vectorStores.create({ name: `growi-vector-store-{${scopeType}` });
   }
 
-  async getFileList(): Promise<OpenAI.Files.FileObjectsPage> {
-    return this.client.files.list();
+  async retrieveVectorStore(vectorStoreId: string): Promise<OpenAI.Beta.VectorStores.VectorStore> {
+    return this.client.beta.vectorStores.retrieve(vectorStoreId);
   }
 
-  async getVectorStoreFiles(): Promise<OpenAI.Beta.VectorStores.Files.VectorStoreFilesPage> {
-    return this.client.beta.vectorStores.files.list(this.openaiVectorStoreId);
+  async uploadFile(file: Uploadable): Promise<OpenAI.Files.FileObject> {
+    return this.client.files.create({ file, purpose: 'assistants' });
   }
 
-  async deleteVectorStoreFiles(fileId: string): Promise<OpenAI.Beta.VectorStores.Files.VectorStoreFileDeleted> {
-    return this.client.beta.vectorStores.files.del(this.openaiVectorStoreId, fileId);
+  async createVectorStoreFileBatch(vectorStoreId: string, fileIds: string[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch> {
+    return this.client.beta.vectorStores.fileBatches.create(vectorStoreId, { file_ids: fileIds });
   }
 
   async deleteFile(fileId: string): Promise<OpenAI.Files.FileDeleted> {
     return this.client.files.del(fileId);
   }
 
-  async uploadAndPoll(files: Uploadable[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch> {
-    return this.client.beta.vectorStores.fileBatches.uploadAndPoll(this.openaiVectorStoreId, { files });
+  async uploadAndPoll(vectorStoreId: string, files: Uploadable[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch> {
+    return this.client.beta.vectorStores.fileBatches.uploadAndPoll(vectorStoreId, { files });
   }
 
 }

+ 5 - 5
apps/app/src/features/openai/server/services/client-delegator/interfaces.ts

@@ -1,12 +1,12 @@
 import type OpenAI from 'openai';
 import type { Uploadable } from 'openai/uploads';
 
+import type { VectorStoreScopeType } from '~/features/openai/server/models/vector-store';
+
 export interface IOpenaiClientDelegator {
+  retrieveVectorStore(vectorStoreId: string): Promise<OpenAI.Beta.VectorStores.VectorStore>
+  createVectorStore(scopeType:VectorStoreScopeType): Promise<OpenAI.Beta.VectorStores.VectorStore>
   uploadFile(file: Uploadable): Promise<OpenAI.Files.FileObject>
-  createVectorStoreFileBatch(fileIds: string[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch>
-  getVectorStoreFiles(): Promise<OpenAI.Beta.VectorStores.Files.VectorStoreFilesPage>;
-  deleteVectorStoreFiles(fileId: string): Promise<OpenAI.Beta.VectorStores.Files.VectorStoreFileDeleted>;
-  getFileList(): Promise<OpenAI.Files.FileObjectsPage>;
+  createVectorStoreFileBatch(vectorStoreId: string, fileIds: string[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch>
   deleteFile(fileId: string): Promise<OpenAI.Files.FileDeleted>;
-  uploadAndPoll(files: Uploadable[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch>;
 }

+ 12 - 20
apps/app/src/features/openai/server/services/client-delegator/openai-client-delegator.ts

@@ -1,6 +1,7 @@
 import OpenAI from 'openai';
 import { type Uploadable } from 'openai/uploads';
 
+import type { VectorStoreScopeType } from '~/features/openai/server/models/vector-store';
 import { configManager } from '~/server/service/config-manager';
 
 import type { IOpenaiClientDelegator } from './interfaces';
@@ -10,50 +11,41 @@ export class OpenaiClientDelegator implements IOpenaiClientDelegator {
 
   private client: OpenAI;
 
-  private openaiVectorStoreId: string;
-
   constructor() {
     // Retrieve OpenAI related values from environment variables
     const apiKey = configManager.getConfig('crowi', 'openai:apiKey');
-    const vectorStoreId = configManager.getConfig('crowi', 'openai:vectorStoreId');
 
-    const isValid = [apiKey, vectorStoreId].every(value => value != null);
+    const isValid = [apiKey].every(value => value != null);
     if (!isValid) {
       throw new Error("Environment variables required to use OpenAI's API are not set");
     }
 
-    this.openaiVectorStoreId = vectorStoreId;
-
     // initialize client
     this.client = new OpenAI({ apiKey });
   }
 
-  async uploadFile(file: Uploadable): Promise<OpenAI.Files.FileObject> {
-    return this.client.files.create({ file, purpose: 'assistants' });
-  }
-
-  async createVectorStoreFileBatch(fileIds: string[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch> {
-    return this.client.beta.vectorStores.fileBatches.create(this.openaiVectorStoreId, { file_ids: fileIds });
+  async createVectorStore(scopeType:VectorStoreScopeType): Promise<OpenAI.Beta.VectorStores.VectorStore> {
+    return this.client.beta.vectorStores.create({ name: `growi-vector-store-${scopeType}` });
   }
 
-  async getVectorStoreFiles(): Promise<OpenAI.Beta.VectorStores.Files.VectorStoreFilesPage> {
-    return this.client.beta.vectorStores.files.list(this.openaiVectorStoreId);
+  async retrieveVectorStore(vectorStoreId: string): Promise<OpenAI.Beta.VectorStores.VectorStore> {
+    return this.client.beta.vectorStores.retrieve(vectorStoreId);
   }
 
-  async deleteVectorStoreFiles(fileId: string): Promise<OpenAI.Beta.VectorStores.Files.VectorStoreFileDeleted> {
-    return this.client.beta.vectorStores.files.del(this.openaiVectorStoreId, fileId);
+  async uploadFile(file: Uploadable): Promise<OpenAI.Files.FileObject> {
+    return this.client.files.create({ file, purpose: 'assistants' });
   }
 
-  async getFileList(): Promise<OpenAI.Files.FileObjectsPage> {
-    return this.client.files.list();
+  async createVectorStoreFileBatch(vectorStoreId: string, fileIds: string[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch> {
+    return this.client.beta.vectorStores.fileBatches.create(vectorStoreId, { file_ids: fileIds });
   }
 
   async deleteFile(fileId: string): Promise<OpenAI.Files.FileDeleted> {
     return this.client.files.del(fileId);
   }
 
-  async uploadAndPoll(files: Uploadable[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch> {
-    return this.client.beta.vectorStores.fileBatches.uploadAndPoll(this.openaiVectorStoreId, { files });
+  async uploadAndPoll(vectorStoreId: string, files: Uploadable[]): Promise<OpenAI.Beta.VectorStores.FileBatches.VectorStoreFileBatch> {
+    return this.client.beta.vectorStores.fileBatches.uploadAndPoll(vectorStoreId, { files });
   }
 
 }

+ 35 - 5
apps/app/src/features/openai/server/services/openai.ts

@@ -7,16 +7,17 @@ import mongoose from 'mongoose';
 import type OpenAI from 'openai';
 import { toFile } from 'openai';
 
+import VectorStoreModel, { VectorStoreScopeType, type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
+import VectorStoreFileRelationModel, {
+  type VectorStoreFileRelation,
+  prepareVectorStoreFileRelations,
+} from '~/features/openai/server/models/vector-store-file-relation';
 import type { PageDocument, PageModel } from '~/server/models/page';
 import { configManager } from '~/server/service/config-manager';
 import { createBatchStream } from '~/server/util/batch-stream';
 import loggerFactory from '~/utils/logger';
 
 import { OpenaiServiceTypes } from '../../interfaces/ai';
-import VectorStoreFileRelationModel, {
-  type VectorStoreFileRelation,
-  prepareVectorStoreFileRelations,
-} from '../models/vector-store-file-relation';
 
 
 import { getClient } from './client-delegator';
@@ -25,8 +26,10 @@ const BATCH_SIZE = 100;
 
 const logger = loggerFactory('growi:service:openai');
 
+let isVectorStoreForPublicScopeExist = false;
 
 export interface IOpenaiService {
+  getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
   createVectorStoreFile(pages: PageDocument[]): Promise<void>;
   deleteVectorStoreFile(pageId: Types.ObjectId): Promise<void>;
   rebuildVectorStoreAll(): Promise<void>;
@@ -39,6 +42,32 @@ class OpenaiService implements IOpenaiService {
     return getClient({ openaiServiceType });
   }
 
+  public async getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument> {
+    const vectorStoreDocument = await VectorStoreModel.findOne({ scorpeType: VectorStoreScopeType.PUBLIC });
+
+    if (vectorStoreDocument != null && isVectorStoreForPublicScopeExist) {
+      return vectorStoreDocument;
+    }
+
+    if (vectorStoreDocument != null && !isVectorStoreForPublicScopeExist) {
+      const vectorStore = await this.client.retrieveVectorStore(vectorStoreDocument.vectorStoreId);
+      if (vectorStore != null) {
+        isVectorStoreForPublicScopeExist = true;
+        return vectorStoreDocument;
+      }
+    }
+
+    const newVectorStore = await this.client.createVectorStore(VectorStoreScopeType.PUBLIC);
+    const newVectorStoreDocument = await VectorStoreModel.create({
+      vectorStoreId: newVectorStore.id,
+      scorpeType: VectorStoreScopeType.PUBLIC,
+    });
+
+    isVectorStoreForPublicScopeExist = true;
+
+    return newVectorStoreDocument;
+  }
+
   private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
     const file = await toFile(Readable.from(body), `${pageId}.md`);
     const uploadedFile = await this.client.uploadFile(file);
@@ -84,7 +113,8 @@ class OpenaiService implements IOpenaiService {
 
     try {
       // Create vector store file
-      const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(uploadedFileIds);
+      const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
+      const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStore.vectorStoreId, uploadedFileIds);
       logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
 
       // Save vector store file relation

+ 0 - 6
apps/app/src/server/service/config-loader.ts

@@ -806,12 +806,6 @@ const ENV_VAR_NAME_TO_CONFIG_INFO: Record<string, EnvConfig> = {
     type: ValueType.STRING,
     default: null,
   },
-  OPENAI_VECTOR_STORE_ID: {
-    ns: 'crowi',
-    key: 'openai:vectorStoreId',
-    type: ValueType.STRING,
-    default: null,
-  },
 };