openai.ts 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918
  1. import assert from 'node:assert';
  2. import { Readable, Transform } from 'stream';
  3. import { pipeline } from 'stream/promises';
  4. import type { IUser, Ref, Lang } from '@growi/core';
  5. import {
  6. PageGrant, getIdForRef, getIdStringForRef, isPopulated, type IUserHasId,
  7. } from '@growi/core';
  8. import { deepEquals } from '@growi/core/dist/utils';
  9. import { isGlobPatternPath } from '@growi/core/dist/utils/page-path-utils';
  10. import escapeStringRegexp from 'escape-string-regexp';
  11. import createError from 'http-errors';
  12. import mongoose, { type HydratedDocument, type Types } from 'mongoose';
  13. import { type OpenAI, toFile } from 'openai';
  14. import ExternalUserGroupRelation from '~/features/external-user-group/server/models/external-user-group-relation';
  15. import ThreadRelationModel, { type ThreadRelationDocument } from '~/features/openai/server/models/thread-relation';
  16. import VectorStoreModel, { type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
  17. import VectorStoreFileRelationModel, {
  18. type VectorStoreFileRelation,
  19. prepareVectorStoreFileRelations,
  20. } from '~/features/openai/server/models/vector-store-file-relation';
  21. import type { PageDocument, PageModel } from '~/server/models/page';
  22. import UserGroupRelation from '~/server/models/user-group-relation';
  23. import { configManager } from '~/server/service/config-manager';
  24. import { createBatchStream } from '~/server/util/batch-stream';
  25. import loggerFactory from '~/utils/logger';
  26. import { OpenaiServiceTypes } from '../../interfaces/ai';
  27. import {
  28. type AccessibleAiAssistants, type AiAssistant, AiAssistantAccessScope, AiAssistantShareScope,
  29. } from '../../interfaces/ai-assistant';
  30. import AiAssistantModel, { type AiAssistantDocument } from '../models/ai-assistant';
  31. import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
  32. import { getClient } from './client-delegator';
  33. // import { splitMarkdownIntoChunks } from './markdown-splitter/markdown-token-splitter';
  34. import { openaiApiErrorHandler } from './openai-api-error-handler';
  35. import { replaceAnnotationWithPageLink } from './replace-annotation-with-page-link';
  36. const { isDeepEquals } = deepEquals;
  37. const BATCH_SIZE = 100;
  38. const logger = loggerFactory('growi:service:openai');
  39. // const isVectorStoreForPublicScopeExist = false;
  40. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
  41. const convertPathPatternsToRegExp = (pagePathPatterns: string[]): Array<string | RegExp> => {
  42. return pagePathPatterns.map((pagePathPattern) => {
  43. if (isGlobPatternPath(pagePathPattern)) {
  44. const trimedPagePathPattern = pagePathPattern.replace('/*', '');
  45. const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
  46. // https://regex101.com/r/x5KIZL/1
  47. return new RegExp(`^${escapedPagePathPattern}($|/)`);
  48. }
  49. return pagePathPattern;
  50. });
  51. };
  52. export interface IOpenaiService {
  53. getOrCreateThread(userId: string, vectorStoreRelation: VectorStoreDocument, threadId?: string): Promise<OpenAI.Beta.Threads.Thread | undefined>;
  54. getThreads(vectorStoreRelationId: string): Promise<ThreadRelationDocument[]>
  55. // getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
  56. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  57. deleteObsolatedVectorStoreRelations(): Promise<void> // for CronJob
  58. getMessageData(
  59. threadId: string, lang?: Lang, options?: { before?: string, after?: string, limit?: number }
  60. ): Promise<OpenAI.Beta.Threads.Messages.MessagesPage>;
  61. getVectorStoreRelation(aiAssistantId: string): Promise<VectorStoreDocument>
  62. getVectorStoreRelationsByPageIds(pageId: Types.ObjectId[]): Promise<VectorStoreDocument[]>;
  63. createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: PageDocument[]): Promise<void>;
  64. createVectorStoreFileOnPageCreate(pages: PageDocument[]): Promise<void>;
  65. updateVectorStoreFileOnPageUpdate(page: HydratedDocument<PageDocument>): Promise<void>;
  66. deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId): Promise<void>;
  67. deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void>;
  68. deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  69. // rebuildVectorStoreAll(): Promise<void>;
  70. // rebuildVectorStore(page: HydratedDocument<PageDocument>): Promise<void>;
  71. isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean>;
  72. createAiAssistant(data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument>;
  73. updateAiAssistant(aiAssistantId: string, data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument>;
  74. getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants>
  75. deleteAiAssistant(ownerId: string, aiAssistantId: string): Promise<AiAssistantDocument>
  76. }
  77. class OpenaiService implements IOpenaiService {
  78. private get client() {
  79. const openaiServiceType = configManager.getConfig('openai:serviceType');
  80. return getClient({ openaiServiceType });
  81. }
  82. public async getOrCreateThread(userId: string, vectorStoreRelation: VectorStoreDocument, threadId?: string): Promise<OpenAI.Beta.Threads.Thread> {
  83. if (threadId == null) {
  84. try {
  85. const thread = await this.client.createThread(vectorStoreRelation.vectorStoreId);
  86. await ThreadRelationModel.create({ userId, threadId: thread.id, vectorStore: vectorStoreRelation._id });
  87. return thread;
  88. }
  89. catch (err) {
  90. throw new Error(err);
  91. }
  92. }
  93. const threadRelation = await ThreadRelationModel.findOne({ threadId });
  94. if (threadRelation == null) {
  95. throw new Error('ThreadRelation document is not exists');
  96. }
  97. // Check if a thread entity exists
  98. // If the thread entity does not exist, the thread-relation document is deleted
  99. try {
  100. const thread = await this.client.retrieveThread(threadRelation.threadId);
  101. // Update expiration date if thread entity exists
  102. await threadRelation.updateThreadExpiration();
  103. return thread;
  104. }
  105. catch (err) {
  106. await openaiApiErrorHandler(err, { notFoundError: async() => { await threadRelation.remove() } });
  107. throw new Error(err);
  108. }
  109. }
  110. async getThreads(vectorStoreRelationId: string): Promise<ThreadRelationDocument[]> {
  111. const threadRelations = await ThreadRelationModel.find({ vectorStore: vectorStoreRelationId });
  112. return threadRelations;
  113. }
  114. public async deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void> {
  115. const expiredThreadRelations = await ThreadRelationModel.getExpiredThreadRelations(limit);
  116. if (expiredThreadRelations == null) {
  117. return;
  118. }
  119. const deletedThreadIds: string[] = [];
  120. for await (const expiredThreadRelation of expiredThreadRelations) {
  121. try {
  122. const deleteThreadResponse = await this.client.deleteThread(expiredThreadRelation.threadId);
  123. logger.debug('Delete thread', deleteThreadResponse);
  124. deletedThreadIds.push(expiredThreadRelation.threadId);
  125. // sleep
  126. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  127. }
  128. catch (err) {
  129. logger.error(err);
  130. }
  131. }
  132. await ThreadRelationModel.deleteMany({ threadId: { $in: deletedThreadIds } });
  133. }
  134. async getMessageData(
  135. threadId: string, lang?: Lang, options?: { limit: number, before: string, after: string },
  136. ): Promise<OpenAI.Beta.Threads.Messages.MessagesPage> {
  137. const messages = await this.client.getMessages(threadId, options);
  138. for await (const message of messages.data) {
  139. for await (const content of message.content) {
  140. if (content.type === 'text') {
  141. await replaceAnnotationWithPageLink(content, lang);
  142. }
  143. }
  144. }
  145. return messages;
  146. }
  147. // TODO: https://redmine.weseek.co.jp/issues/160332
  148. // public async getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument> {
  149. // const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ scopeType: VectorStoreScopeType.PUBLIC, isDeleted: false });
  150. // if (vectorStoreDocument != null && isVectorStoreForPublicScopeExist) {
  151. // return vectorStoreDocument;
  152. // }
  153. // if (vectorStoreDocument != null && !isVectorStoreForPublicScopeExist) {
  154. // try {
  155. // // Check if vector store entity exists
  156. // // If the vector store entity does not exist, the vector store document is deleted
  157. // await this.client.retrieveVectorStore(vectorStoreDocument.vectorStoreId);
  158. // isVectorStoreForPublicScopeExist = true;
  159. // return vectorStoreDocument;
  160. // }
  161. // catch (err) {
  162. // await oepnaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  163. // throw new Error(err);
  164. // }
  165. // }
  166. // const newVectorStore = await this.client.createVectorStore(VectorStoreScopeType.PUBLIC);
  167. // const newVectorStoreDocument = await VectorStoreModel.create({
  168. // vectorStoreId: newVectorStore.id,
  169. // scopeType: VectorStoreScopeType.PUBLIC,
  170. // }) as VectorStoreDocument;
  171. // isVectorStoreForPublicScopeExist = true;
  172. // return newVectorStoreDocument;
  173. // }
  174. async getVectorStoreRelation(aiAssistantId: string): Promise<VectorStoreDocument> {
  175. const aiAssistant = await AiAssistantModel.findById({ _id: aiAssistantId }).populate('vectorStore');
  176. if (aiAssistant == null) {
  177. throw createError(404, 'AiAssistant document does not exist');
  178. }
  179. return aiAssistant.vectorStore as VectorStoreDocument;
  180. }
  181. async getVectorStoreRelationsByPageIds(pageIds: Types.ObjectId[]): Promise<VectorStoreDocument[]> {
  182. const pipeline = [
  183. // Stage 1: Match documents with the given pageId
  184. {
  185. $match: {
  186. page: {
  187. $in: pageIds,
  188. },
  189. },
  190. },
  191. // Stage 2: Lookup VectorStore documents
  192. {
  193. $lookup: {
  194. from: 'vectorstores',
  195. localField: 'vectorStoreRelationId',
  196. foreignField: '_id',
  197. as: 'vectorStore',
  198. },
  199. },
  200. // Stage 3: Unwind the vectorStore array
  201. {
  202. $unwind: '$vectorStore',
  203. },
  204. // Stage 4: Match non-deleted vector stores
  205. {
  206. $match: {
  207. 'vectorStore.isDeleted': false,
  208. },
  209. },
  210. // Stage 5: Replace the root with vectorStore document
  211. {
  212. $replaceRoot: {
  213. newRoot: '$vectorStore',
  214. },
  215. },
  216. // Stage 6: Group by _id to remove duplicates
  217. {
  218. $group: {
  219. _id: '$_id',
  220. doc: { $first: '$$ROOT' },
  221. },
  222. },
  223. // Stage 7: Restore the document structure
  224. {
  225. $replaceRoot: {
  226. newRoot: '$doc',
  227. },
  228. },
  229. ];
  230. const vectorStoreRelations = await VectorStoreFileRelationModel.aggregate<VectorStoreDocument>(pipeline);
  231. return vectorStoreRelations;
  232. }
  233. private async createVectorStore(name: string): Promise<VectorStoreDocument> {
  234. try {
  235. const newVectorStore = await this.client.createVectorStore(name);
  236. const newVectorStoreDocument = await VectorStoreModel.create({
  237. vectorStoreId: newVectorStore.id,
  238. }) as VectorStoreDocument;
  239. return newVectorStoreDocument;
  240. }
  241. catch (err) {
  242. throw new Error(err);
  243. }
  244. }
  245. // TODO: https://redmine.weseek.co.jp/issues/160332
  246. // TODO: https://redmine.weseek.co.jp/issues/156643
  247. // private async uploadFileByChunks(pageId: Types.ObjectId, body: string, vectorStoreFileRelationsMap: VectorStoreFileRelationsMap) {
  248. // const chunks = await splitMarkdownIntoChunks(body, 'gpt-4o');
  249. // for await (const [index, chunk] of chunks.entries()) {
  250. // try {
  251. // const file = await toFile(Readable.from(chunk), `${pageId}-chunk-${index}.md`);
  252. // const uploadedFile = await this.client.uploadFile(file);
  253. // prepareVectorStoreFileRelations(pageId, uploadedFile.id, vectorStoreFileRelationsMap);
  254. // }
  255. // catch (err) {
  256. // logger.error(err);
  257. // }
  258. // }
  259. // }
  260. private async uploadFile(pageId: Types.ObjectId, pagePath: string, revisionBody: string): Promise<OpenAI.Files.FileObject> {
  261. const convertedHtml = await convertMarkdownToHtml({ pagePath, revisionBody });
  262. const file = await toFile(Readable.from(convertedHtml), `${pageId}.html`);
  263. const uploadedFile = await this.client.uploadFile(file);
  264. return uploadedFile;
  265. }
  266. private async deleteVectorStore(vectorStoreRelationId: string): Promise<void> {
  267. const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ _id: vectorStoreRelationId, isDeleted: false });
  268. if (vectorStoreDocument == null) {
  269. return;
  270. }
  271. try {
  272. await this.client.deleteVectorStore(vectorStoreDocument.vectorStoreId);
  273. await vectorStoreDocument.markAsDeleted();
  274. }
  275. catch (err) {
  276. await openaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  277. throw new Error(err);
  278. }
  279. }
  280. async createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
  281. // const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  282. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  283. const processUploadFile = async(page: HydratedDocument<PageDocument>) => {
  284. if (page._id != null && page.revision != null) {
  285. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  286. const uploadedFile = await this.uploadFile(page._id, page.path, page.revision.body);
  287. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  288. return;
  289. }
  290. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  291. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  292. const uploadedFile = await this.uploadFile(page._id, page.path, pagePopulatedToShowRevision.revision.body);
  293. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  294. }
  295. }
  296. };
  297. // Start workers to process results
  298. const workers = pages.map(processUploadFile);
  299. // Wait for all processing to complete.
  300. assert(workers.length <= BATCH_SIZE, 'workers.length must be less than or equal to BATCH_SIZE');
  301. const fileUploadResult = await Promise.allSettled(workers);
  302. fileUploadResult.forEach((result) => {
  303. if (result.status === 'rejected') {
  304. logger.error(result.reason);
  305. }
  306. });
  307. const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
  308. const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
  309. if (uploadedFileIds.length === 0) {
  310. return;
  311. }
  312. const pageIds = pages.map(page => page._id);
  313. try {
  314. // Save vector store file relation
  315. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
  316. // Create vector store file
  317. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStoreRelation.vectorStoreId, uploadedFileIds);
  318. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  319. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  320. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  321. }
  322. catch (err) {
  323. logger.error(err);
  324. // Delete all uploaded files if createVectorStoreFileBatch fails
  325. for await (const pageId of pageIds) {
  326. await this.deleteVectorStoreFile(vectorStoreRelation._id, pageId);
  327. }
  328. }
  329. }
  330. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  331. async deleteObsolatedVectorStoreRelations(): Promise<void> {
  332. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  333. if (deletedVectorStoreRelations.length === 0) {
  334. return;
  335. }
  336. const currentVectorStoreRelationIds: Types.ObjectId[] = await VectorStoreFileRelationModel.aggregate([
  337. {
  338. $group: {
  339. _id: '$vectorStoreRelationId',
  340. relationCount: { $sum: 1 },
  341. },
  342. },
  343. { $match: { relationCount: { $gt: 0 } } },
  344. { $project: { _id: 1 } },
  345. ]);
  346. if (currentVectorStoreRelationIds.length === 0) {
  347. return;
  348. }
  349. await VectorStoreModel.deleteMany({ _id: { $nin: currentVectorStoreRelationIds }, isDeleted: true });
  350. }
  351. async deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId, apiCallInterval?: number): Promise<void> {
  352. // Delete vector store file and delete vector store file relation
  353. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ vectorStoreRelationId, page: pageId });
  354. if (vectorStoreFileRelation == null) {
  355. return;
  356. }
  357. const deletedFileIds: string[] = [];
  358. for await (const fileId of vectorStoreFileRelation.fileIds) {
  359. try {
  360. const deleteFileResponse = await this.client.deleteFile(fileId);
  361. logger.debug('Delete vector store file', deleteFileResponse);
  362. deletedFileIds.push(fileId);
  363. if (apiCallInterval != null) {
  364. // sleep
  365. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  366. }
  367. }
  368. catch (err) {
  369. await openaiApiErrorHandler(err, { notFoundError: async() => { deletedFileIds.push(fileId) } });
  370. logger.error(err);
  371. }
  372. }
  373. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  374. if (undeletedFileIds.length === 0) {
  375. await vectorStoreFileRelation.remove();
  376. return;
  377. }
  378. vectorStoreFileRelation.fileIds = undeletedFileIds;
  379. await vectorStoreFileRelation.save();
  380. }
  381. async deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void> {
  382. const vectorStoreRelations = await this.getVectorStoreRelationsByPageIds(pageIds);
  383. if (vectorStoreRelations != null && vectorStoreRelations.length !== 0) {
  384. for await (const pageId of pageIds) {
  385. const deleteVectorStoreFilePromises = vectorStoreRelations.map(vectorStoreRelation => this.deleteVectorStoreFile(vectorStoreRelation._id, pageId));
  386. await Promise.allSettled(deleteVectorStoreFilePromises);
  387. }
  388. }
  389. }
  390. async deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void> {
  391. // Retrieves all VectorStore documents that are marked as deleted
  392. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  393. if (deletedVectorStoreRelations.length === 0) {
  394. return;
  395. }
  396. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  397. const obsoleteVectorStoreFileRelations = await VectorStoreFileRelationModel.find(
  398. { vectorStoreRelationId: { $in: deletedVectorStoreRelations.map(deletedVectorStoreRelation => deletedVectorStoreRelation._id) } },
  399. ).limit(limit);
  400. if (obsoleteVectorStoreFileRelations.length === 0) {
  401. return;
  402. }
  403. // Delete obsolete VectorStoreFile
  404. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  405. try {
  406. await this.deleteVectorStoreFile(vectorStoreFileRelation.vectorStoreRelationId, vectorStoreFileRelation.page, apiCallInterval);
  407. }
  408. catch (err) {
  409. logger.error(err);
  410. }
  411. }
  412. }
  413. // TODO: https://redmine.weseek.co.jp/issues/160332
  414. // async rebuildVectorStoreAll() {
  415. // await this.deleteVectorStore(VectorStoreScopeType.PUBLIC);
  416. // // Create all public pages VectorStoreFile
  417. // const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  418. // const pagesStream = Page.find({ grant: PageGrant.GRANT_PUBLIC }).populate('revision').cursor({ batch_size: BATCH_SIZE });
  419. // const batchStrem = createBatchStream(BATCH_SIZE);
  420. // const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  421. // const createVectorStoreFileStream = new Transform({
  422. // objectMode: true,
  423. // async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  424. // await createVectorStoreFile(chunk);
  425. // this.push(chunk);
  426. // callback();
  427. // },
  428. // });
  429. // await pipeline(pagesStream, batchStrem, createVectorStoreFileStream);
  430. // }
  431. async filterPagesByAccessScope(aiAssistant: AiAssistantDocument, pages: HydratedDocument<PageDocument>[]) {
  432. const isPublicPage = (page :HydratedDocument<PageDocument>) => page.grant === PageGrant.GRANT_PUBLIC;
  433. const isUserGroupAccessible = (page :HydratedDocument<PageDocument>, ownerUserGroupIds: string[]) => {
  434. if (page.grant !== PageGrant.GRANT_USER_GROUP) return false;
  435. return page.grantedGroups.some(group => ownerUserGroupIds.includes(getIdStringForRef(group.item)));
  436. };
  437. const isOwnerAccessible = (page: HydratedDocument<PageDocument>, ownerId: Ref<IUser>) => {
  438. if (page.grant !== PageGrant.GRANT_OWNER) return false;
  439. return page.grantedUsers.some(user => getIdStringForRef(user) === getIdStringForRef(ownerId));
  440. };
  441. const getOwnerUserGroupIds = async(owner: Ref<IUser>) => {
  442. const userGroups = await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  443. const externalGroups = await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  444. return [...userGroups, ...externalGroups].map(group => getIdStringForRef(group));
  445. };
  446. switch (aiAssistant.accessScope) {
  447. case AiAssistantAccessScope.PUBLIC_ONLY:
  448. return pages.filter(isPublicPage);
  449. case AiAssistantAccessScope.GROUPS: {
  450. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  451. return pages.filter(page => isPublicPage(page) || isUserGroupAccessible(page, ownerUserGroupIds));
  452. }
  453. case AiAssistantAccessScope.OWNER: {
  454. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  455. return pages.filter(page => isPublicPage(page) || isOwnerAccessible(page, aiAssistant.owner) || isUserGroupAccessible(page, ownerUserGroupIds));
  456. }
  457. default:
  458. return [];
  459. }
  460. }
  461. async createVectorStoreFileOnPageCreate(pages: HydratedDocument<PageDocument>[]): Promise<void> {
  462. const pagePaths = pages.map(page => page.path);
  463. const aiAssistants = await AiAssistantModel.findByPagePaths(pagePaths);
  464. if (aiAssistants.length === 0) {
  465. return;
  466. }
  467. for await (const aiAssistant of aiAssistants) {
  468. const pagesToVectorize = await this.filterPagesByAccessScope(aiAssistant, pages);
  469. const vectorStoreRelation = aiAssistant.vectorStore;
  470. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  471. continue;
  472. }
  473. logger.debug('--------- createVectorStoreFileOnPageCreate ---------');
  474. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  475. logger.debug('VectorStoreFile pagePath to be created: ', pagesToVectorize.map(page => page.path));
  476. logger.debug('-----------------------------------------------------');
  477. await this.createVectorStoreFile(vectorStoreRelation as VectorStoreDocument, pagesToVectorize);
  478. }
  479. }
  480. async updateVectorStoreFileOnPageUpdate(page: HydratedDocument<PageDocument>) {
  481. const aiAssistants = await AiAssistantModel.findByPagePaths([page.path]);
  482. if (aiAssistants.length === 0) {
  483. return;
  484. }
  485. for await (const aiAssistant of aiAssistants) {
  486. const pagesToVectorize = await this.filterPagesByAccessScope(aiAssistant, [page]);
  487. const vectorStoreRelation = aiAssistant.vectorStore;
  488. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  489. continue;
  490. }
  491. logger.debug('---------- updateVectorStoreOnPageUpdate ------------');
  492. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  493. logger.debug('PagePath of VectorStoreFile to be deleted: ', page.path);
  494. logger.debug('pagePath of VectorStoreFile to be created: ', pagesToVectorize.map(page => page.path));
  495. logger.debug('-----------------------------------------------------');
  496. // Do not create a new VectorStoreFile if page is changed to a permission that AiAssistant does not have access to
  497. await this.createVectorStoreFile(vectorStoreRelation as VectorStoreDocument, pagesToVectorize);
  498. await this.deleteVectorStoreFile((vectorStoreRelation as VectorStoreDocument)._id, page._id);
  499. }
  500. }
  501. private async createVectorStoreFileWithStream(vectorStoreRelation: VectorStoreDocument, conditions: mongoose.FilterQuery<PageDocument>): Promise<void> {
  502. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  503. const pagesStream = Page.find({ ...conditions })
  504. .populate('revision')
  505. .cursor({ batchSize: BATCH_SIZE });
  506. const batchStream = createBatchStream(BATCH_SIZE);
  507. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  508. const createVectorStoreFileStream = new Transform({
  509. objectMode: true,
  510. async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  511. try {
  512. logger.debug('Search results of page paths', chunk.map(page => page.path));
  513. await createVectorStoreFile(vectorStoreRelation, chunk);
  514. this.push(chunk);
  515. callback();
  516. }
  517. catch (error) {
  518. callback(error);
  519. }
  520. },
  521. });
  522. await pipeline(pagesStream, batchStream, createVectorStoreFileStream);
  523. }
  524. private async createConditionForCreateVectorStoreFile(
  525. owner: AiAssistant['owner'],
  526. accessScope: AiAssistant['accessScope'],
  527. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  528. pagePathPatterns: AiAssistant['pagePathPatterns'],
  529. ): Promise<mongoose.FilterQuery<PageDocument>> {
  530. const convertedPagePathPatterns = convertPathPatternsToRegExp(pagePathPatterns);
  531. // Include pages in search targets when their paths with 'Anyone with the link' permission are directly specified instead of using glob pattern
  532. const nonGrabPagePathPatterns = pagePathPatterns.filter(pagePathPattern => !isGlobPatternPath(pagePathPattern));
  533. const baseCondition: mongoose.FilterQuery<PageDocument> = {
  534. grant: PageGrant.GRANT_RESTRICTED,
  535. path: { $in: nonGrabPagePathPatterns },
  536. };
  537. if (accessScope === AiAssistantAccessScope.PUBLIC_ONLY) {
  538. return {
  539. $or: [
  540. baseCondition,
  541. {
  542. grant: PageGrant.GRANT_PUBLIC,
  543. path: { $in: convertedPagePathPatterns },
  544. },
  545. ],
  546. };
  547. }
  548. if (accessScope === AiAssistantAccessScope.GROUPS) {
  549. if (grantedGroupsForAccessScope == null || grantedGroupsForAccessScope.length === 0) {
  550. throw new Error('grantedGroups is required when accessScope is GROUPS');
  551. }
  552. const extractedGrantedGroupIdsForAccessScope = grantedGroupsForAccessScope.map(group => getIdForRef(group.item).toString());
  553. return {
  554. $or: [
  555. baseCondition,
  556. {
  557. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP] },
  558. path: { $in: convertedPagePathPatterns },
  559. $or: [
  560. { 'grantedGroups.item': { $in: extractedGrantedGroupIdsForAccessScope } },
  561. { grant: PageGrant.GRANT_PUBLIC },
  562. ],
  563. },
  564. ],
  565. };
  566. }
  567. if (accessScope === AiAssistantAccessScope.OWNER) {
  568. const ownerUserGroups = [
  569. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  570. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  571. ].map(group => group.toString());
  572. return {
  573. $or: [
  574. baseCondition,
  575. {
  576. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP, PageGrant.GRANT_OWNER] },
  577. path: { $in: convertedPagePathPatterns },
  578. $or: [
  579. { 'grantedGroups.item': { $in: ownerUserGroups } },
  580. { grantedUsers: { $in: [getIdForRef(owner)] } },
  581. { grant: PageGrant.GRANT_PUBLIC },
  582. ],
  583. },
  584. ],
  585. };
  586. }
  587. throw new Error('Invalid accessScope value');
  588. }
  589. private async validateGrantedUserGroupsForAiAssistant(
  590. owner: AiAssistant['owner'],
  591. shareScope: AiAssistant['shareScope'],
  592. accessScope: AiAssistant['accessScope'],
  593. grantedGroupsForShareScope: AiAssistant['grantedGroupsForShareScope'],
  594. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  595. ) {
  596. // Check if grantedGroupsForShareScope is not specified when shareScope is not a “group”
  597. if (shareScope !== AiAssistantShareScope.GROUPS && grantedGroupsForShareScope != null) {
  598. throw new Error('grantedGroupsForShareScope is specified when shareScope is not “groups”.');
  599. }
  600. // Check if grantedGroupsForAccessScope is not specified when accessScope is not a “group”
  601. if (accessScope !== AiAssistantAccessScope.GROUPS && grantedGroupsForAccessScope != null) {
  602. throw new Error('grantedGroupsForAccessScope is specified when accsessScope is not “groups”.');
  603. }
  604. const ownerUserGroupIds = [
  605. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  606. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  607. ].map(group => group.toString());
  608. // Check if the owner belongs to the group specified in grantedGroupsForShareScope
  609. if (grantedGroupsForShareScope != null && grantedGroupsForShareScope.length > 0) {
  610. const extractedGrantedGroupIdsForShareScope = grantedGroupsForShareScope.map(group => getIdForRef(group.item).toString());
  611. const isValid = extractedGrantedGroupIdsForShareScope.every(groupId => ownerUserGroupIds.includes(groupId));
  612. if (!isValid) {
  613. throw new Error('A userGroup to which the owner does not belong is specified in grantedGroupsForShareScope');
  614. }
  615. }
  616. // Check if the owner belongs to the group specified in grantedGroupsForAccessScope
  617. if (grantedGroupsForAccessScope != null && grantedGroupsForAccessScope.length > 0) {
  618. const extractedGrantedGroupIdsForAccessScope = grantedGroupsForAccessScope.map(group => getIdForRef(group.item).toString());
  619. const isValid = extractedGrantedGroupIdsForAccessScope.every(groupId => ownerUserGroupIds.includes(groupId));
  620. if (!isValid) {
  621. throw new Error('A userGroup to which the owner does not belong is specified in grantedGroupsForAccessScope');
  622. }
  623. }
  624. }
  625. async isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean> {
  626. const aiAssistant = await AiAssistantModel.findById(aiAssistantId);
  627. if (aiAssistant == null) {
  628. throw createError(404, 'AiAssistant document does not exist');
  629. }
  630. const isOwner = getIdStringForRef(aiAssistant.owner) === getIdStringForRef(user._id);
  631. if (aiAssistant.shareScope === AiAssistantShareScope.PUBLIC_ONLY) {
  632. return true;
  633. }
  634. if ((aiAssistant.shareScope === AiAssistantShareScope.OWNER) && isOwner) {
  635. return true;
  636. }
  637. if ((aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE) && (aiAssistant.accessScope === AiAssistantAccessScope.OWNER) && isOwner) {
  638. return true;
  639. }
  640. if ((aiAssistant.shareScope === AiAssistantShareScope.GROUPS)
  641. || ((aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE) && (aiAssistant.accessScope === AiAssistantAccessScope.GROUPS))) {
  642. const userGroupIds = [
  643. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  644. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  645. ].map(group => group.toString());
  646. const grantedGroupIdsForShareScope = aiAssistant.grantedGroupsForShareScope?.map(group => getIdStringForRef(group.item)) ?? [];
  647. const isShared = userGroupIds.some(userGroupId => grantedGroupIdsForShareScope.includes(userGroupId));
  648. return isShared;
  649. }
  650. return false;
  651. }
  652. async createAiAssistant(data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument> {
  653. await this.validateGrantedUserGroupsForAiAssistant(
  654. data.owner,
  655. data.shareScope,
  656. data.accessScope,
  657. data.grantedGroupsForShareScope,
  658. data.grantedGroupsForAccessScope,
  659. );
  660. const conditions = await this.createConditionForCreateVectorStoreFile(
  661. data.owner,
  662. data.accessScope,
  663. data.grantedGroupsForAccessScope,
  664. data.pagePathPatterns,
  665. );
  666. const vectorStoreRelation = await this.createVectorStore(data.name);
  667. const aiAssistant = await AiAssistantModel.create({
  668. ...data, vectorStore: vectorStoreRelation,
  669. });
  670. // VectorStore creation process does not await
  671. this.createVectorStoreFileWithStream(vectorStoreRelation, conditions);
  672. return aiAssistant;
  673. }
  674. async updateAiAssistant(aiAssistantId: string, data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument> {
  675. const aiAssistant = await AiAssistantModel.findOne({ owner: data.owner, _id: aiAssistantId });
  676. if (aiAssistant == null) {
  677. throw createError(404, 'AiAssistant document does not exist');
  678. }
  679. await this.validateGrantedUserGroupsForAiAssistant(
  680. data.owner,
  681. data.shareScope,
  682. data.accessScope,
  683. data.grantedGroupsForShareScope,
  684. data.grantedGroupsForAccessScope,
  685. );
  686. const grantedGroupIdsForAccessScopeFromReq = data.grantedGroupsForAccessScope?.map(group => getIdStringForRef(group.item)) ?? []; // ObjectId[] -> string[]
  687. const grantedGroupIdsForAccessScopeFromDb = aiAssistant.grantedGroupsForAccessScope?.map(group => getIdStringForRef(group.item)) ?? []; // ObjectId[] -> string[]
  688. // If accessScope, pagePathPatterns, grantedGroupsForAccessScope have not changed, do not build VectorStore
  689. const shouldRebuildVectorStore = data.accessScope !== aiAssistant.accessScope
  690. || !isDeepEquals(data.pagePathPatterns, aiAssistant.pagePathPatterns)
  691. || !isDeepEquals(grantedGroupIdsForAccessScopeFromReq, grantedGroupIdsForAccessScopeFromDb);
  692. let newVectorStoreRelation: VectorStoreDocument | undefined;
  693. if (shouldRebuildVectorStore) {
  694. const conditions = await this.createConditionForCreateVectorStoreFile(
  695. data.owner,
  696. data.accessScope,
  697. data.grantedGroupsForAccessScope,
  698. data.pagePathPatterns,
  699. );
  700. // Delete obsoleted VectorStore
  701. const obsoletedVectorStoreRelationId = getIdStringForRef(aiAssistant.vectorStore);
  702. await this.deleteVectorStore(obsoletedVectorStoreRelationId);
  703. newVectorStoreRelation = await this.createVectorStore(data.name);
  704. // VectorStore creation process does not await
  705. this.createVectorStoreFileWithStream(newVectorStoreRelation, conditions);
  706. }
  707. const newData = {
  708. ...data,
  709. vectorStore: newVectorStoreRelation ?? aiAssistant.vectorStore,
  710. };
  711. aiAssistant.set({ ...newData });
  712. const updatedAiAssistant = await aiAssistant.save();
  713. return updatedAiAssistant;
  714. }
  715. async getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants> {
  716. const userGroupIds = [
  717. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  718. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  719. ];
  720. const assistants = await AiAssistantModel.find({
  721. $or: [
  722. // Case 1: Assistants owned by the user
  723. { owner: user },
  724. // Case 2: Public assistants owned by others
  725. {
  726. $and: [
  727. { owner: { $ne: user } },
  728. { shareScope: AiAssistantShareScope.PUBLIC_ONLY },
  729. ],
  730. },
  731. // Case 3: Group-restricted assistants where user is in granted groups
  732. {
  733. $and: [
  734. { owner: { $ne: user } },
  735. { shareScope: AiAssistantShareScope.GROUPS },
  736. { 'grantedGroupsForShareScope.item': { $in: userGroupIds } },
  737. ],
  738. },
  739. ],
  740. })
  741. .populate('grantedGroupsForShareScope.item')
  742. .populate('grantedGroupsForAccessScope.item');
  743. return {
  744. myAiAssistants: assistants.filter(assistant => assistant.owner.toString() === user._id.toString()) ?? [],
  745. teamAiAssistants: assistants.filter(assistant => assistant.owner.toString() !== user._id.toString()) ?? [],
  746. };
  747. }
  748. async deleteAiAssistant(ownerId: string, aiAssistantId: string): Promise<AiAssistantDocument> {
  749. const aiAssistant = await AiAssistantModel.findOne({ owner: ownerId, _id: aiAssistantId });
  750. if (aiAssistant == null) {
  751. throw createError(404, 'AiAssistant document does not exist');
  752. }
  753. const vectorStoreRelationId = getIdStringForRef(aiAssistant.vectorStore);
  754. await this.deleteVectorStore(vectorStoreRelationId);
  755. const deletedAiAssistant = await aiAssistant.remove();
  756. return deletedAiAssistant;
  757. }
  758. }
  759. let instance: OpenaiService;
  760. export const getOpenaiService = (): IOpenaiService | undefined => {
  761. if (instance != null) {
  762. return instance;
  763. }
  764. const aiEnabled = configManager.getConfig('app:aiEnabled');
  765. const openaiServiceType = configManager.getConfig('openai:serviceType');
  766. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  767. instance = new OpenaiService();
  768. return instance;
  769. }
  770. return;
  771. };