openai.ts 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885
  1. import assert from 'node:assert';
  2. import { Readable, Transform } from 'stream';
  3. import { pipeline } from 'stream/promises';
  4. import type { Lang } from '@growi/core';
  5. import {
  6. PageGrant, getIdForRef, getIdStringForRef, isPopulated, type IUserHasId,
  7. } from '@growi/core';
  8. import { deepEquals } from '@growi/core/dist/utils';
  9. import { isGlobPatternPath } from '@growi/core/dist/utils/page-path-utils';
  10. import escapeStringRegexp from 'escape-string-regexp';
  11. import createError from 'http-errors';
  12. import mongoose, { type HydratedDocument, type Types } from 'mongoose';
  13. import { type OpenAI, toFile } from 'openai';
  14. import ExternalUserGroupRelation from '~/features/external-user-group/server/models/external-user-group-relation';
  15. import ThreadRelationModel, { type ThreadRelationDocument } from '~/features/openai/server/models/thread-relation';
  16. import VectorStoreModel, { type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
  17. import VectorStoreFileRelationModel, {
  18. type VectorStoreFileRelation,
  19. prepareVectorStoreFileRelations,
  20. } from '~/features/openai/server/models/vector-store-file-relation';
  21. import type { PageDocument, PageModel } from '~/server/models/page';
  22. import UserGroupRelation from '~/server/models/user-group-relation';
  23. import { configManager } from '~/server/service/config-manager';
  24. import { createBatchStream } from '~/server/util/batch-stream';
  25. import loggerFactory from '~/utils/logger';
  26. import { OpenaiServiceTypes } from '../../interfaces/ai';
  27. import {
  28. type AccessibleAiAssistants, type AiAssistant, AiAssistantAccessScope, AiAssistantShareScope,
  29. } from '../../interfaces/ai-assistant';
  30. import AiAssistantModel, { type AiAssistantDocument } from '../models/ai-assistant';
  31. import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
  32. import { getClient } from './client-delegator';
  33. // import { splitMarkdownIntoChunks } from './markdown-splitter/markdown-token-splitter';
  34. import { openaiApiErrorHandler } from './openai-api-error-handler';
  35. import { replaceAnnotationWithPageLink } from './replace-annotation-with-page-link';
  36. const { isDeepEquals } = deepEquals;
  37. const BATCH_SIZE = 100;
  38. const logger = loggerFactory('growi:service:openai');
  39. // const isVectorStoreForPublicScopeExist = false;
  40. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
  41. const convertPathPatternsToRegExp = (pagePathPatterns: string[]): Array<string | RegExp> => {
  42. return pagePathPatterns.map((pagePathPattern) => {
  43. if (isGlobPatternPath(pagePathPattern)) {
  44. const trimedPagePathPattern = pagePathPattern.replace('/*', '');
  45. const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
  46. // https://regex101.com/r/x5KIZL/1
  47. return new RegExp(`^${escapedPagePathPattern}($|/)`);
  48. }
  49. return pagePathPattern;
  50. });
  51. };
  52. export interface IOpenaiService {
  53. getOrCreateThread(
  54. userId: string, vectorStoreRelation: VectorStoreDocument, threadId?: string, initialUserMessage?: string
  55. ): Promise<OpenAI.Beta.Threads.Thread | undefined>;
  56. getThreads(vectorStoreRelationId: string): Promise<ThreadRelationDocument[]>
  57. // getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
  58. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  59. deleteObsolatedVectorStoreRelations(): Promise<void> // for CronJob
  60. getMessageData(
  61. threadId: string, lang?: Lang, options?: { before?: string, after?: string, limit?: number }
  62. ): Promise<OpenAI.Beta.Threads.Messages.MessagesPage>;
  63. getVectorStoreRelation(aiAssistantId: string): Promise<VectorStoreDocument>
  64. getVectorStoreRelationsByPageIds(pageId: Types.ObjectId[]): Promise<VectorStoreDocument[]>;
  65. createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: PageDocument[]): Promise<void>;
  66. deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId): Promise<void>;
  67. deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void>;
  68. deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  69. // rebuildVectorStoreAll(): Promise<void>;
  70. // rebuildVectorStore(page: HydratedDocument<PageDocument>): Promise<void>;
  71. isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean>;
  72. updateVectorStore(page: HydratedDocument<PageDocument>): Promise<void>;
  73. createAiAssistant(data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument>;
  74. updateAiAssistant(aiAssistantId: string, data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument>;
  75. getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants>
  76. deleteAiAssistant(ownerId: string, aiAssistantId: string): Promise<AiAssistantDocument>
  77. }
  78. class OpenaiService implements IOpenaiService {
  79. private get client() {
  80. const openaiServiceType = configManager.getConfig('openai:serviceType');
  81. return getClient({ openaiServiceType });
  82. }
  83. async generateThreadTitle(message: string): Promise<string | null> {
  84. const model = configManager.getConfig('openai:assistantModel:chat');
  85. const systemMessage = [
  86. 'Create a brief title (max 5 words) from your message.',
  87. 'Respond in the same language the user uses in their input.',
  88. 'Response should only contain the title.',
  89. ].join('');
  90. const threadTitleCompletion = await this.client.chatCompletion({
  91. model,
  92. messages: [
  93. {
  94. role: 'system',
  95. content: systemMessage,
  96. },
  97. {
  98. role: 'user',
  99. content: message,
  100. },
  101. ],
  102. });
  103. const threadTitle = threadTitleCompletion.choices[0].message.content;
  104. return threadTitle;
  105. }
  106. async getOrCreateThread(
  107. userId: string, vectorStoreRelation: VectorStoreDocument, threadId?: string, initialUserMessage?: string,
  108. ): Promise<OpenAI.Beta.Threads.Thread> {
  109. if (threadId == null) {
  110. let threadTitle: string | null = null;
  111. if (initialUserMessage != null) {
  112. try {
  113. threadTitle = await this.generateThreadTitle(initialUserMessage);
  114. }
  115. catch (err) {
  116. logger.error(err);
  117. }
  118. }
  119. try {
  120. const thread = await this.client.createThread(vectorStoreRelation.vectorStoreId);
  121. await ThreadRelationModel.create({
  122. userId,
  123. threadId: thread.id,
  124. vectorStore: vectorStoreRelation._id,
  125. title: threadTitle,
  126. });
  127. return thread;
  128. }
  129. catch (err) {
  130. throw new Error(err);
  131. }
  132. }
  133. const threadRelation = await ThreadRelationModel.findOne({ threadId });
  134. if (threadRelation == null) {
  135. throw new Error('ThreadRelation document is not exists');
  136. }
  137. // Check if a thread entity exists
  138. // If the thread entity does not exist, the thread-relation document is deleted
  139. try {
  140. const thread = await this.client.retrieveThread(threadRelation.threadId);
  141. // Update expiration date if thread entity exists
  142. await threadRelation.updateThreadExpiration();
  143. return thread;
  144. }
  145. catch (err) {
  146. await openaiApiErrorHandler(err, { notFoundError: async() => { await threadRelation.remove() } });
  147. throw new Error(err);
  148. }
  149. }
  150. async getThreads(vectorStoreRelationId: string): Promise<ThreadRelationDocument[]> {
  151. const threadRelations = await ThreadRelationModel.find({ vectorStore: vectorStoreRelationId });
  152. return threadRelations;
  153. }
  154. public async deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void> {
  155. const expiredThreadRelations = await ThreadRelationModel.getExpiredThreadRelations(limit);
  156. if (expiredThreadRelations == null) {
  157. return;
  158. }
  159. const deletedThreadIds: string[] = [];
  160. for await (const expiredThreadRelation of expiredThreadRelations) {
  161. try {
  162. const deleteThreadResponse = await this.client.deleteThread(expiredThreadRelation.threadId);
  163. logger.debug('Delete thread', deleteThreadResponse);
  164. deletedThreadIds.push(expiredThreadRelation.threadId);
  165. // sleep
  166. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  167. }
  168. catch (err) {
  169. logger.error(err);
  170. }
  171. }
  172. await ThreadRelationModel.deleteMany({ threadId: { $in: deletedThreadIds } });
  173. }
  174. async getMessageData(
  175. threadId: string, lang?: Lang, options?: { limit: number, before: string, after: string },
  176. ): Promise<OpenAI.Beta.Threads.Messages.MessagesPage> {
  177. const messages = await this.client.getMessages(threadId, options);
  178. for await (const message of messages.data) {
  179. for await (const content of message.content) {
  180. if (content.type === 'text') {
  181. await replaceAnnotationWithPageLink(content, lang);
  182. }
  183. }
  184. }
  185. return messages;
  186. }
  187. // TODO: https://redmine.weseek.co.jp/issues/160332
  188. // public async getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument> {
  189. // const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ scopeType: VectorStoreScopeType.PUBLIC, isDeleted: false });
  190. // if (vectorStoreDocument != null && isVectorStoreForPublicScopeExist) {
  191. // return vectorStoreDocument;
  192. // }
  193. // if (vectorStoreDocument != null && !isVectorStoreForPublicScopeExist) {
  194. // try {
  195. // // Check if vector store entity exists
  196. // // If the vector store entity does not exist, the vector store document is deleted
  197. // await this.client.retrieveVectorStore(vectorStoreDocument.vectorStoreId);
  198. // isVectorStoreForPublicScopeExist = true;
  199. // return vectorStoreDocument;
  200. // }
  201. // catch (err) {
  202. // await oepnaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  203. // throw new Error(err);
  204. // }
  205. // }
  206. // const newVectorStore = await this.client.createVectorStore(VectorStoreScopeType.PUBLIC);
  207. // const newVectorStoreDocument = await VectorStoreModel.create({
  208. // vectorStoreId: newVectorStore.id,
  209. // scopeType: VectorStoreScopeType.PUBLIC,
  210. // }) as VectorStoreDocument;
  211. // isVectorStoreForPublicScopeExist = true;
  212. // return newVectorStoreDocument;
  213. // }
  214. async getVectorStoreRelation(aiAssistantId: string): Promise<VectorStoreDocument> {
  215. const aiAssistant = await AiAssistantModel.findById({ _id: aiAssistantId }).populate('vectorStore');
  216. if (aiAssistant == null) {
  217. throw createError(404, 'AiAssistant document does not exist');
  218. }
  219. return aiAssistant.vectorStore as VectorStoreDocument;
  220. }
  221. async getVectorStoreRelationsByPageIds(pageIds: Types.ObjectId[]): Promise<VectorStoreDocument[]> {
  222. const pipeline = [
  223. // Stage 1: Match documents with the given pageId
  224. {
  225. $match: {
  226. page: {
  227. $in: pageIds,
  228. },
  229. },
  230. },
  231. // Stage 2: Lookup VectorStore documents
  232. {
  233. $lookup: {
  234. from: 'vectorstores',
  235. localField: 'vectorStoreRelationId',
  236. foreignField: '_id',
  237. as: 'vectorStore',
  238. },
  239. },
  240. // Stage 3: Unwind the vectorStore array
  241. {
  242. $unwind: '$vectorStore',
  243. },
  244. // Stage 4: Match non-deleted vector stores
  245. {
  246. $match: {
  247. 'vectorStore.isDeleted': false,
  248. },
  249. },
  250. // Stage 5: Replace the root with vectorStore document
  251. {
  252. $replaceRoot: {
  253. newRoot: '$vectorStore',
  254. },
  255. },
  256. // Stage 6: Group by _id to remove duplicates
  257. {
  258. $group: {
  259. _id: '$_id',
  260. doc: { $first: '$$ROOT' },
  261. },
  262. },
  263. // Stage 7: Restore the document structure
  264. {
  265. $replaceRoot: {
  266. newRoot: '$doc',
  267. },
  268. },
  269. ];
  270. const vectorStoreRelations = await VectorStoreFileRelationModel.aggregate<VectorStoreDocument>(pipeline);
  271. return vectorStoreRelations;
  272. }
  273. private async createVectorStore(name: string): Promise<VectorStoreDocument> {
  274. try {
  275. const newVectorStore = await this.client.createVectorStore(name);
  276. const newVectorStoreDocument = await VectorStoreModel.create({
  277. vectorStoreId: newVectorStore.id,
  278. }) as VectorStoreDocument;
  279. return newVectorStoreDocument;
  280. }
  281. catch (err) {
  282. throw new Error(err);
  283. }
  284. }
  285. // TODO: https://redmine.weseek.co.jp/issues/160332
  286. // TODO: https://redmine.weseek.co.jp/issues/156643
  287. // private async uploadFileByChunks(pageId: Types.ObjectId, body: string, vectorStoreFileRelationsMap: VectorStoreFileRelationsMap) {
  288. // const chunks = await splitMarkdownIntoChunks(body, 'gpt-4o');
  289. // for await (const [index, chunk] of chunks.entries()) {
  290. // try {
  291. // const file = await toFile(Readable.from(chunk), `${pageId}-chunk-${index}.md`);
  292. // const uploadedFile = await this.client.uploadFile(file);
  293. // prepareVectorStoreFileRelations(pageId, uploadedFile.id, vectorStoreFileRelationsMap);
  294. // }
  295. // catch (err) {
  296. // logger.error(err);
  297. // }
  298. // }
  299. // }
  300. private async uploadFile(pageId: Types.ObjectId, pagePath: string, revisionBody: string): Promise<OpenAI.Files.FileObject> {
  301. const convertedHtml = await convertMarkdownToHtml({ pagePath, revisionBody });
  302. const file = await toFile(Readable.from(convertedHtml), `${pageId}.html`);
  303. const uploadedFile = await this.client.uploadFile(file);
  304. return uploadedFile;
  305. }
  306. private async deleteVectorStore(vectorStoreRelationId: string): Promise<void> {
  307. const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ _id: vectorStoreRelationId, isDeleted: false });
  308. if (vectorStoreDocument == null) {
  309. return;
  310. }
  311. try {
  312. await this.client.deleteVectorStore(vectorStoreDocument.vectorStoreId);
  313. await vectorStoreDocument.markAsDeleted();
  314. }
  315. catch (err) {
  316. await openaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  317. throw new Error(err);
  318. }
  319. }
  320. async createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
  321. // const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  322. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  323. const processUploadFile = async(page: HydratedDocument<PageDocument>) => {
  324. if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
  325. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  326. const uploadedFile = await this.uploadFile(page._id, page.path, page.revision.body);
  327. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  328. return;
  329. }
  330. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  331. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  332. const uploadedFile = await this.uploadFile(page._id, page.path, pagePopulatedToShowRevision.revision.body);
  333. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  334. }
  335. }
  336. };
  337. // Start workers to process results
  338. const workers = pages.map(processUploadFile);
  339. // Wait for all processing to complete.
  340. assert(workers.length <= BATCH_SIZE, 'workers.length must be less than or equal to BATCH_SIZE');
  341. const fileUploadResult = await Promise.allSettled(workers);
  342. fileUploadResult.forEach((result) => {
  343. if (result.status === 'rejected') {
  344. logger.error(result.reason);
  345. }
  346. });
  347. const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
  348. const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
  349. if (uploadedFileIds.length === 0) {
  350. return;
  351. }
  352. const pageIds = pages.map(page => page._id);
  353. try {
  354. // Save vector store file relation
  355. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
  356. // Create vector store file
  357. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStoreRelation.vectorStoreId, uploadedFileIds);
  358. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  359. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  360. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  361. }
  362. catch (err) {
  363. logger.error(err);
  364. // Delete all uploaded files if createVectorStoreFileBatch fails
  365. for await (const pageId of pageIds) {
  366. await this.deleteVectorStoreFile(vectorStoreRelation._id, pageId);
  367. }
  368. }
  369. }
  370. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  371. async deleteObsolatedVectorStoreRelations(): Promise<void> {
  372. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  373. if (deletedVectorStoreRelations.length === 0) {
  374. return;
  375. }
  376. const currentVectorStoreRelationIds: Types.ObjectId[] = await VectorStoreFileRelationModel.aggregate([
  377. {
  378. $group: {
  379. _id: '$vectorStoreRelationId',
  380. relationCount: { $sum: 1 },
  381. },
  382. },
  383. { $match: { relationCount: { $gt: 0 } } },
  384. { $project: { _id: 1 } },
  385. ]);
  386. if (currentVectorStoreRelationIds.length === 0) {
  387. return;
  388. }
  389. await VectorStoreModel.deleteMany({ _id: { $nin: currentVectorStoreRelationIds }, isDeleted: true });
  390. }
  391. async deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId, apiCallInterval?: number): Promise<void> {
  392. // Delete vector store file and delete vector store file relation
  393. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ vectorStoreRelationId, page: pageId });
  394. if (vectorStoreFileRelation == null) {
  395. return;
  396. }
  397. const deletedFileIds: string[] = [];
  398. for await (const fileId of vectorStoreFileRelation.fileIds) {
  399. try {
  400. const deleteFileResponse = await this.client.deleteFile(fileId);
  401. logger.debug('Delete vector store file', deleteFileResponse);
  402. deletedFileIds.push(fileId);
  403. if (apiCallInterval != null) {
  404. // sleep
  405. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  406. }
  407. }
  408. catch (err) {
  409. await openaiApiErrorHandler(err, { notFoundError: async() => { deletedFileIds.push(fileId) } });
  410. logger.error(err);
  411. }
  412. }
  413. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  414. if (undeletedFileIds.length === 0) {
  415. await vectorStoreFileRelation.remove();
  416. return;
  417. }
  418. vectorStoreFileRelation.fileIds = undeletedFileIds;
  419. await vectorStoreFileRelation.save();
  420. }
  421. async deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void> {
  422. const vectorStoreRelations = await this.getVectorStoreRelationsByPageIds(pageIds);
  423. if (vectorStoreRelations != null && vectorStoreRelations.length !== 0) {
  424. for await (const pageId of pageIds) {
  425. const deleteVectorStoreFilePromises = vectorStoreRelations.map(vectorStoreRelation => this.deleteVectorStoreFile(vectorStoreRelation._id, pageId));
  426. await Promise.allSettled(deleteVectorStoreFilePromises);
  427. }
  428. }
  429. }
  430. async deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void> {
  431. // Retrieves all VectorStore documents that are marked as deleted
  432. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  433. if (deletedVectorStoreRelations.length === 0) {
  434. return;
  435. }
  436. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  437. const obsoleteVectorStoreFileRelations = await VectorStoreFileRelationModel.find(
  438. { vectorStoreRelationId: { $in: deletedVectorStoreRelations.map(deletedVectorStoreRelation => deletedVectorStoreRelation._id) } },
  439. ).limit(limit);
  440. if (obsoleteVectorStoreFileRelations.length === 0) {
  441. return;
  442. }
  443. // Delete obsolete VectorStoreFile
  444. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  445. try {
  446. await this.deleteVectorStoreFile(vectorStoreFileRelation.vectorStoreRelationId, vectorStoreFileRelation.page, apiCallInterval);
  447. }
  448. catch (err) {
  449. logger.error(err);
  450. }
  451. }
  452. }
  453. // TODO: https://redmine.weseek.co.jp/issues/160332
  454. // async rebuildVectorStoreAll() {
  455. // await this.deleteVectorStore(VectorStoreScopeType.PUBLIC);
  456. // // Create all public pages VectorStoreFile
  457. // const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  458. // const pagesStream = Page.find({ grant: PageGrant.GRANT_PUBLIC }).populate('revision').cursor({ batch_size: BATCH_SIZE });
  459. // const batchStrem = createBatchStream(BATCH_SIZE);
  460. // const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  461. // const createVectorStoreFileStream = new Transform({
  462. // objectMode: true,
  463. // async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  464. // await createVectorStoreFile(chunk);
  465. // this.push(chunk);
  466. // callback();
  467. // },
  468. // });
  469. // await pipeline(pagesStream, batchStrem, createVectorStoreFileStream);
  470. // }
  471. async updateVectorStore(page: HydratedDocument<PageDocument>) {
  472. const vectorStoreRelations = await this.getVectorStoreRelationsByPageIds([page._id]);
  473. console.log('vectorStoreRelations', vectorStoreRelations);
  474. vectorStoreRelations.forEach(async(vectorStoreRelation) => {
  475. await this.deleteVectorStoreFile(vectorStoreRelation._id, page._id);
  476. await this.createVectorStoreFile(vectorStoreRelation, [page]);
  477. });
  478. }
  479. private async createVectorStoreFileWithStream(vectorStoreRelation: VectorStoreDocument, conditions: mongoose.FilterQuery<PageDocument>): Promise<void> {
  480. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  481. const pagesStream = Page.find({ ...conditions })
  482. .populate('revision')
  483. .cursor({ batchSize: BATCH_SIZE });
  484. const batchStream = createBatchStream(BATCH_SIZE);
  485. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  486. const createVectorStoreFileStream = new Transform({
  487. objectMode: true,
  488. async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  489. try {
  490. logger.debug('Search results of page paths', chunk.map(page => page.path));
  491. await createVectorStoreFile(vectorStoreRelation, chunk);
  492. this.push(chunk);
  493. callback();
  494. }
  495. catch (error) {
  496. callback(error);
  497. }
  498. },
  499. });
  500. await pipeline(pagesStream, batchStream, createVectorStoreFileStream);
  501. }
  502. private async createConditionForCreateVectorStoreFile(
  503. owner: AiAssistant['owner'],
  504. accessScope: AiAssistant['accessScope'],
  505. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  506. pagePathPatterns: AiAssistant['pagePathPatterns'],
  507. ): Promise<mongoose.FilterQuery<PageDocument>> {
  508. const convertedPagePathPatterns = convertPathPatternsToRegExp(pagePathPatterns);
  509. // Include pages in search targets when their paths with 'Anyone with the link' permission are directly specified instead of using glob pattern
  510. const nonGrabPagePathPatterns = pagePathPatterns.filter(pagePathPattern => !isGlobPatternPath(pagePathPattern));
  511. const baseCondition: mongoose.FilterQuery<PageDocument> = {
  512. grant: PageGrant.GRANT_RESTRICTED,
  513. path: { $in: nonGrabPagePathPatterns },
  514. };
  515. if (accessScope === AiAssistantAccessScope.PUBLIC_ONLY) {
  516. return {
  517. $or: [
  518. baseCondition,
  519. {
  520. grant: PageGrant.GRANT_PUBLIC,
  521. path: { $in: convertedPagePathPatterns },
  522. },
  523. ],
  524. };
  525. }
  526. if (accessScope === AiAssistantAccessScope.GROUPS) {
  527. if (grantedGroupsForAccessScope == null || grantedGroupsForAccessScope.length === 0) {
  528. throw new Error('grantedGroups is required when accessScope is GROUPS');
  529. }
  530. const extractedGrantedGroupIdsForAccessScope = grantedGroupsForAccessScope.map(group => getIdForRef(group.item).toString());
  531. return {
  532. $or: [
  533. baseCondition,
  534. {
  535. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP] },
  536. path: { $in: convertedPagePathPatterns },
  537. $or: [
  538. { 'grantedGroups.item': { $in: extractedGrantedGroupIdsForAccessScope } },
  539. { grant: PageGrant.GRANT_PUBLIC },
  540. ],
  541. },
  542. ],
  543. };
  544. }
  545. if (accessScope === AiAssistantAccessScope.OWNER) {
  546. const ownerUserGroups = [
  547. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  548. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  549. ].map(group => group.toString());
  550. return {
  551. $or: [
  552. baseCondition,
  553. {
  554. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP, PageGrant.GRANT_OWNER] },
  555. path: { $in: convertedPagePathPatterns },
  556. $or: [
  557. { 'grantedGroups.item': { $in: ownerUserGroups } },
  558. { grantedUsers: { $in: [getIdForRef(owner)] } },
  559. { grant: PageGrant.GRANT_PUBLIC },
  560. ],
  561. },
  562. ],
  563. };
  564. }
  565. throw new Error('Invalid accessScope value');
  566. }
  567. private async validateGrantedUserGroupsForAiAssistant(
  568. owner: AiAssistant['owner'],
  569. shareScope: AiAssistant['shareScope'],
  570. accessScope: AiAssistant['accessScope'],
  571. grantedGroupsForShareScope: AiAssistant['grantedGroupsForShareScope'],
  572. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  573. ) {
  574. // Check if grantedGroupsForShareScope is not specified when shareScope is not a “group”
  575. if (shareScope !== AiAssistantShareScope.GROUPS && grantedGroupsForShareScope != null) {
  576. throw new Error('grantedGroupsForShareScope is specified when shareScope is not “groups”.');
  577. }
  578. // Check if grantedGroupsForAccessScope is not specified when accessScope is not a “group”
  579. if (accessScope !== AiAssistantAccessScope.GROUPS && grantedGroupsForAccessScope != null) {
  580. throw new Error('grantedGroupsForAccessScope is specified when accsessScope is not “groups”.');
  581. }
  582. const ownerUserGroupIds = [
  583. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  584. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  585. ].map(group => group.toString());
  586. // Check if the owner belongs to the group specified in grantedGroupsForShareScope
  587. if (grantedGroupsForShareScope != null && grantedGroupsForShareScope.length > 0) {
  588. const extractedGrantedGroupIdsForShareScope = grantedGroupsForShareScope.map(group => getIdForRef(group.item).toString());
  589. const isValid = extractedGrantedGroupIdsForShareScope.every(groupId => ownerUserGroupIds.includes(groupId));
  590. if (!isValid) {
  591. throw new Error('A userGroup to which the owner does not belong is specified in grantedGroupsForShareScope');
  592. }
  593. }
  594. // Check if the owner belongs to the group specified in grantedGroupsForAccessScope
  595. if (grantedGroupsForAccessScope != null && grantedGroupsForAccessScope.length > 0) {
  596. const extractedGrantedGroupIdsForAccessScope = grantedGroupsForAccessScope.map(group => getIdForRef(group.item).toString());
  597. const isValid = extractedGrantedGroupIdsForAccessScope.every(groupId => ownerUserGroupIds.includes(groupId));
  598. if (!isValid) {
  599. throw new Error('A userGroup to which the owner does not belong is specified in grantedGroupsForAccessScope');
  600. }
  601. }
  602. }
  603. async isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean> {
  604. const aiAssistant = await AiAssistantModel.findById(aiAssistantId);
  605. if (aiAssistant == null) {
  606. throw createError(404, 'AiAssistant document does not exist');
  607. }
  608. const isOwner = getIdStringForRef(aiAssistant.owner) === getIdStringForRef(user._id);
  609. if (aiAssistant.shareScope === AiAssistantShareScope.PUBLIC_ONLY) {
  610. return true;
  611. }
  612. if ((aiAssistant.shareScope === AiAssistantShareScope.OWNER) && isOwner) {
  613. return true;
  614. }
  615. if ((aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE) && (aiAssistant.accessScope === AiAssistantAccessScope.OWNER) && isOwner) {
  616. return true;
  617. }
  618. if ((aiAssistant.shareScope === AiAssistantShareScope.GROUPS)
  619. || ((aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE) && (aiAssistant.accessScope === AiAssistantAccessScope.GROUPS))) {
  620. const userGroupIds = [
  621. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  622. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  623. ].map(group => group.toString());
  624. const grantedGroupIdsForShareScope = aiAssistant.grantedGroupsForShareScope?.map(group => getIdStringForRef(group.item)) ?? [];
  625. const isShared = userGroupIds.some(userGroupId => grantedGroupIdsForShareScope.includes(userGroupId));
  626. return isShared;
  627. }
  628. return false;
  629. }
  630. async createAiAssistant(data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument> {
  631. await this.validateGrantedUserGroupsForAiAssistant(
  632. data.owner,
  633. data.shareScope,
  634. data.accessScope,
  635. data.grantedGroupsForShareScope,
  636. data.grantedGroupsForAccessScope,
  637. );
  638. const conditions = await this.createConditionForCreateVectorStoreFile(
  639. data.owner,
  640. data.accessScope,
  641. data.grantedGroupsForAccessScope,
  642. data.pagePathPatterns,
  643. );
  644. const vectorStoreRelation = await this.createVectorStore(data.name);
  645. const aiAssistant = await AiAssistantModel.create({
  646. ...data, vectorStore: vectorStoreRelation,
  647. });
  648. // VectorStore creation process does not await
  649. this.createVectorStoreFileWithStream(vectorStoreRelation, conditions);
  650. return aiAssistant;
  651. }
  652. async updateAiAssistant(aiAssistantId: string, data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument> {
  653. const aiAssistant = await AiAssistantModel.findOne({ owner: data.owner, _id: aiAssistantId });
  654. if (aiAssistant == null) {
  655. throw createError(404, 'AiAssistant document does not exist');
  656. }
  657. await this.validateGrantedUserGroupsForAiAssistant(
  658. data.owner,
  659. data.shareScope,
  660. data.accessScope,
  661. data.grantedGroupsForShareScope,
  662. data.grantedGroupsForAccessScope,
  663. );
  664. const grantedGroupIdsForAccessScopeFromReq = data.grantedGroupsForAccessScope?.map(group => getIdStringForRef(group.item)) ?? []; // ObjectId[] -> string[]
  665. const grantedGroupIdsForAccessScopeFromDb = aiAssistant.grantedGroupsForAccessScope?.map(group => getIdStringForRef(group.item)) ?? []; // ObjectId[] -> string[]
  666. // If accessScope, pagePathPatterns, grantedGroupsForAccessScope have not changed, do not build VectorStore
  667. const shouldRebuildVectorStore = data.accessScope !== aiAssistant.accessScope
  668. || !isDeepEquals(data.pagePathPatterns, aiAssistant.pagePathPatterns)
  669. || !isDeepEquals(grantedGroupIdsForAccessScopeFromReq, grantedGroupIdsForAccessScopeFromDb);
  670. let newVectorStoreRelation: VectorStoreDocument | undefined;
  671. if (shouldRebuildVectorStore) {
  672. const conditions = await this.createConditionForCreateVectorStoreFile(
  673. data.owner,
  674. data.accessScope,
  675. data.grantedGroupsForAccessScope,
  676. data.pagePathPatterns,
  677. );
  678. // Delete obsoleted VectorStore
  679. const obsoletedVectorStoreRelationId = getIdStringForRef(aiAssistant.vectorStore);
  680. await this.deleteVectorStore(obsoletedVectorStoreRelationId);
  681. newVectorStoreRelation = await this.createVectorStore(data.name);
  682. // VectorStore creation process does not await
  683. this.createVectorStoreFileWithStream(newVectorStoreRelation, conditions);
  684. }
  685. const newData = {
  686. ...data,
  687. vectorStore: newVectorStoreRelation ?? aiAssistant.vectorStore,
  688. };
  689. aiAssistant.set({ ...newData });
  690. const updatedAiAssistant = await aiAssistant.save();
  691. return updatedAiAssistant;
  692. }
  693. async getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants> {
  694. const userGroupIds = [
  695. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  696. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  697. ];
  698. const assistants = await AiAssistantModel.find({
  699. $or: [
  700. // Case 1: Assistants owned by the user
  701. { owner: user },
  702. // Case 2: Public assistants owned by others
  703. {
  704. $and: [
  705. { owner: { $ne: user } },
  706. { shareScope: AiAssistantShareScope.PUBLIC_ONLY },
  707. ],
  708. },
  709. // Case 3: Group-restricted assistants where user is in granted groups
  710. {
  711. $and: [
  712. { owner: { $ne: user } },
  713. { shareScope: AiAssistantShareScope.GROUPS },
  714. { 'grantedGroupsForShareScope.item': { $in: userGroupIds } },
  715. ],
  716. },
  717. ],
  718. })
  719. .populate('grantedGroupsForShareScope.item')
  720. .populate('grantedGroupsForAccessScope.item');
  721. return {
  722. myAiAssistants: assistants.filter(assistant => assistant.owner.toString() === user._id.toString()) ?? [],
  723. teamAiAssistants: assistants.filter(assistant => assistant.owner.toString() !== user._id.toString()) ?? [],
  724. };
  725. }
  726. async deleteAiAssistant(ownerId: string, aiAssistantId: string): Promise<AiAssistantDocument> {
  727. const aiAssistant = await AiAssistantModel.findOne({ owner: ownerId, _id: aiAssistantId });
  728. if (aiAssistant == null) {
  729. throw createError(404, 'AiAssistant document does not exist');
  730. }
  731. const vectorStoreRelationId = getIdStringForRef(aiAssistant.vectorStore);
  732. await this.deleteVectorStore(vectorStoreRelationId);
  733. const deletedAiAssistant = await aiAssistant.remove();
  734. return deletedAiAssistant;
  735. }
  736. }
  737. let instance: OpenaiService;
  738. export const getOpenaiService = (): IOpenaiService | undefined => {
  739. if (instance != null) {
  740. return instance;
  741. }
  742. const aiEnabled = configManager.getConfig('app:aiEnabled');
  743. const openaiServiceType = configManager.getConfig('openai:serviceType');
  744. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  745. instance = new OpenaiService();
  746. return instance;
  747. }
  748. return;
  749. };