openai.ts 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. import assert from 'node:assert';
  2. import { Readable, Transform } from 'stream';
  3. import { pipeline } from 'stream/promises';
  4. import type {
  5. IUser, Ref, Lang, IPage,
  6. } from '@growi/core';
  7. import {
  8. PageGrant, getIdForRef, getIdStringForRef, isPopulated, type IUserHasId,
  9. } from '@growi/core';
  10. import { deepEquals } from '@growi/core/dist/utils';
  11. import { isGlobPatternPath } from '@growi/core/dist/utils/page-path-utils';
  12. import escapeStringRegexp from 'escape-string-regexp';
  13. import createError from 'http-errors';
  14. import mongoose, { type HydratedDocument, type Types } from 'mongoose';
  15. import { type OpenAI, toFile } from 'openai';
  16. import ExternalUserGroupRelation from '~/features/external-user-group/server/models/external-user-group-relation';
  17. import ThreadRelationModel, { type ThreadRelationDocument } from '~/features/openai/server/models/thread-relation';
  18. import VectorStoreModel, { type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
  19. import VectorStoreFileRelationModel, {
  20. type VectorStoreFileRelation,
  21. prepareVectorStoreFileRelations,
  22. } from '~/features/openai/server/models/vector-store-file-relation';
  23. import type { PageDocument, PageModel } from '~/server/models/page';
  24. import UserGroupRelation from '~/server/models/user-group-relation';
  25. import { configManager } from '~/server/service/config-manager';
  26. import { createBatchStream } from '~/server/util/batch-stream';
  27. import loggerFactory from '~/utils/logger';
  28. import { OpenaiServiceTypes } from '../../interfaces/ai';
  29. import type { UpsertAiAssistantData } from '../../interfaces/ai-assistant';
  30. import {
  31. type AccessibleAiAssistants, type AiAssistant, AiAssistantAccessScope, AiAssistantShareScope,
  32. } from '../../interfaces/ai-assistant';
  33. import type { MessageListParams } from '../../interfaces/message';
  34. import { removeGlobPath } from '../../utils/remove-glob-path';
  35. import AiAssistantModel, { type AiAssistantDocument } from '../models/ai-assistant';
  36. import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
  37. import { generateGlobPatterns } from '../utils/generate-glob-patterns';
  38. import { getClient } from './client-delegator';
  39. import { openaiApiErrorHandler } from './openai-api-error-handler';
  40. import { replaceAnnotationWithPageLink } from './replace-annotation-with-page-link';
  41. const { isDeepEquals } = deepEquals;
  42. const BATCH_SIZE = 100;
  43. const logger = loggerFactory('growi:service:openai');
  44. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
  45. const convertPathPatternsToRegExp = (pagePathPatterns: string[]): Array<string | RegExp> => {
  46. return pagePathPatterns.map((pagePathPattern) => {
  47. if (isGlobPatternPath(pagePathPattern)) {
  48. const trimedPagePathPattern = pagePathPattern.replace('/*', '');
  49. const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
  50. // https://regex101.com/r/x5KIZL/1
  51. return new RegExp(`^${escapedPagePathPattern}($|/)`);
  52. }
  53. return pagePathPattern;
  54. });
  55. };
  56. export interface IOpenaiService {
  57. createThread(userId: string, aiAssistantId: string, initialUserMessage: string): Promise<ThreadRelationDocument>;
  58. getThreadsByAiAssistantId(aiAssistantId: string): Promise<ThreadRelationDocument[]>
  59. deleteThread(threadRelationId: string): Promise<ThreadRelationDocument>;
  60. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  61. deleteObsoletedVectorStoreRelations(): Promise<void> // for CronJob
  62. deleteVectorStore(vectorStoreRelationId: string): Promise<void>;
  63. getMessageData(threadId: string, lang?: Lang, options?: MessageListParams): Promise<OpenAI.Beta.Threads.Messages.MessagesPage>;
  64. createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: PageDocument[]): Promise<void>;
  65. createVectorStoreFileOnPageCreate(pages: PageDocument[]): Promise<void>;
  66. updateVectorStoreFileOnPageUpdate(page: HydratedDocument<PageDocument>): Promise<void>;
  67. deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId): Promise<void>;
  68. deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void>;
  69. deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  70. isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean>;
  71. createAiAssistant(data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument>;
  72. updateAiAssistant(aiAssistantId: string, data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument>;
  73. getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants>
  74. isLearnablePageLimitExceeded(user: IUserHasId, pagePathPatterns: string[]): Promise<boolean>;
  75. }
  76. class OpenaiService implements IOpenaiService {
  77. private get client() {
  78. const openaiServiceType = configManager.getConfig('openai:serviceType');
  79. return getClient({ openaiServiceType });
  80. }
  81. async generateThreadTitle(message: string): Promise<string | null> {
  82. const model = configManager.getConfig('openai:assistantModel:chat');
  83. const systemMessage = [
  84. 'Create a brief title (max 5 words) from your message.',
  85. 'Respond in the same language the user uses in their input.',
  86. 'Response should only contain the title.',
  87. ].join('');
  88. const threadTitleCompletion = await this.client.chatCompletion({
  89. model,
  90. messages: [
  91. {
  92. role: 'system',
  93. content: systemMessage,
  94. },
  95. {
  96. role: 'user',
  97. content: message,
  98. },
  99. ],
  100. });
  101. const threadTitle = threadTitleCompletion.choices[0].message.content;
  102. return threadTitle;
  103. }
  104. async createThread(userId: string, aiAssistantId: string, initialUserMessage: string): Promise<ThreadRelationDocument> {
  105. const vectorStoreRelation = await this.getVectorStoreRelationByAiAssistantId(aiAssistantId);
  106. let threadTitle: string | null = null;
  107. if (initialUserMessage != null) {
  108. try {
  109. threadTitle = await this.generateThreadTitle(initialUserMessage);
  110. }
  111. catch (err) {
  112. logger.error(err);
  113. }
  114. }
  115. try {
  116. const thread = await this.client.createThread(vectorStoreRelation.vectorStoreId);
  117. const threadRelation = await ThreadRelationModel.create({
  118. userId,
  119. aiAssistant: aiAssistantId,
  120. threadId: thread.id,
  121. title: threadTitle,
  122. });
  123. return threadRelation;
  124. }
  125. catch (err) {
  126. throw err;
  127. }
  128. }
  129. async updateThreads(aiAssistantId: string, vectorStoreId: string): Promise<void> {
  130. const threadRelations = await this.getThreadsByAiAssistantId(aiAssistantId);
  131. for await (const threadRelation of threadRelations) {
  132. try {
  133. const updatedThreadResponse = await this.client.updateThread(threadRelation.threadId, vectorStoreId);
  134. logger.debug('Update thread', updatedThreadResponse);
  135. }
  136. catch (err) {
  137. logger.error(err);
  138. }
  139. }
  140. }
  141. async getThreadsByAiAssistantId(aiAssistantId: string): Promise<ThreadRelationDocument[]> {
  142. const threadRelations = await ThreadRelationModel.find({ aiAssistant: aiAssistantId });
  143. return threadRelations;
  144. }
  145. async deleteThread(threadRelationId: string): Promise<ThreadRelationDocument> {
  146. const threadRelation = await ThreadRelationModel.findById(threadRelationId);
  147. if (threadRelation == null) {
  148. throw createError(404, 'ThreadRelation document does not exist');
  149. }
  150. try {
  151. const deletedThreadResponse = await this.client.deleteThread(threadRelation.threadId);
  152. logger.debug('Delete thread', deletedThreadResponse);
  153. await threadRelation.remove();
  154. }
  155. catch (err) {
  156. await openaiApiErrorHandler(err, { notFoundError: async() => { await threadRelation.remove() } });
  157. throw err;
  158. }
  159. return threadRelation;
  160. }
  161. public async deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void> {
  162. const expiredThreadRelations = await ThreadRelationModel.getExpiredThreadRelations(limit);
  163. if (expiredThreadRelations == null) {
  164. return;
  165. }
  166. const deletedThreadIds: string[] = [];
  167. for await (const expiredThreadRelation of expiredThreadRelations) {
  168. try {
  169. const deleteThreadResponse = await this.client.deleteThread(expiredThreadRelation.threadId);
  170. logger.debug('Delete thread', deleteThreadResponse);
  171. deletedThreadIds.push(expiredThreadRelation.threadId);
  172. // sleep
  173. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  174. }
  175. catch (err) {
  176. logger.error(err);
  177. }
  178. }
  179. await ThreadRelationModel.deleteMany({ threadId: { $in: deletedThreadIds } });
  180. }
  181. async getMessageData(threadId: string, lang?: Lang, options?: MessageListParams): Promise<OpenAI.Beta.Threads.Messages.MessagesPage> {
  182. const messages = await this.client.getMessages(threadId, options);
  183. for await (const message of messages.data) {
  184. for await (const content of message.content) {
  185. if (content.type === 'text') {
  186. await replaceAnnotationWithPageLink(content, lang);
  187. }
  188. }
  189. }
  190. return messages;
  191. }
  192. async getVectorStoreRelationByAiAssistantId(aiAssistantId: string): Promise<VectorStoreDocument> {
  193. const aiAssistant = await AiAssistantModel.findById({ _id: aiAssistantId }).populate('vectorStore');
  194. if (aiAssistant == null) {
  195. throw createError(404, 'AiAssistant document does not exist');
  196. }
  197. return aiAssistant.vectorStore as VectorStoreDocument;
  198. }
  199. async getVectorStoreRelationsByPageIds(pageIds: Types.ObjectId[]): Promise<VectorStoreDocument[]> {
  200. const pipeline = [
  201. // Stage 1: Match documents with the given pageId
  202. {
  203. $match: {
  204. page: {
  205. $in: pageIds,
  206. },
  207. },
  208. },
  209. // Stage 2: Lookup VectorStore documents
  210. {
  211. $lookup: {
  212. from: 'vectorstores',
  213. localField: 'vectorStoreRelationId',
  214. foreignField: '_id',
  215. as: 'vectorStore',
  216. },
  217. },
  218. // Stage 3: Unwind the vectorStore array
  219. {
  220. $unwind: '$vectorStore',
  221. },
  222. // Stage 4: Match non-deleted vector stores
  223. {
  224. $match: {
  225. 'vectorStore.isDeleted': false,
  226. },
  227. },
  228. // Stage 5: Replace the root with vectorStore document
  229. {
  230. $replaceRoot: {
  231. newRoot: '$vectorStore',
  232. },
  233. },
  234. // Stage 6: Group by _id to remove duplicates
  235. {
  236. $group: {
  237. _id: '$_id',
  238. doc: { $first: '$$ROOT' },
  239. },
  240. },
  241. // Stage 7: Restore the document structure
  242. {
  243. $replaceRoot: {
  244. newRoot: '$doc',
  245. },
  246. },
  247. ];
  248. const vectorStoreRelations = await VectorStoreFileRelationModel.aggregate<VectorStoreDocument>(pipeline);
  249. return vectorStoreRelations;
  250. }
  251. private async createVectorStore(name: string): Promise<VectorStoreDocument> {
  252. try {
  253. const newVectorStore = await this.client.createVectorStore(name);
  254. const newVectorStoreDocument = await VectorStoreModel.create({
  255. vectorStoreId: newVectorStore.id,
  256. }) as VectorStoreDocument;
  257. return newVectorStoreDocument;
  258. }
  259. catch (err) {
  260. throw new Error(err);
  261. }
  262. }
  263. private async uploadFile(pageId: Types.ObjectId, pagePath: string, revisionBody: string): Promise<OpenAI.Files.FileObject> {
  264. const convertedHtml = await convertMarkdownToHtml({ pagePath, revisionBody });
  265. const file = await toFile(Readable.from(convertedHtml), `${pageId}.html`);
  266. const uploadedFile = await this.client.uploadFile(file);
  267. return uploadedFile;
  268. }
  269. async deleteVectorStore(vectorStoreRelationId: string): Promise<void> {
  270. const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ _id: vectorStoreRelationId, isDeleted: false });
  271. if (vectorStoreDocument == null) {
  272. return;
  273. }
  274. try {
  275. const deleteVectorStoreResponse = await this.client.deleteVectorStore(vectorStoreDocument.vectorStoreId);
  276. logger.debug('Delete vector store', deleteVectorStoreResponse);
  277. await vectorStoreDocument.markAsDeleted();
  278. }
  279. catch (err) {
  280. await openaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  281. throw new Error(err);
  282. }
  283. }
  284. async createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
  285. // const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  286. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  287. const processUploadFile = async(page: HydratedDocument<PageDocument>) => {
  288. if (page._id != null && page.revision != null) {
  289. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  290. const uploadedFile = await this.uploadFile(page._id, page.path, page.revision.body);
  291. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  292. return;
  293. }
  294. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  295. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  296. const uploadedFile = await this.uploadFile(page._id, page.path, pagePopulatedToShowRevision.revision.body);
  297. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  298. }
  299. }
  300. };
  301. // Start workers to process results
  302. const workers = pages.map(processUploadFile);
  303. // Wait for all processing to complete.
  304. assert(workers.length <= BATCH_SIZE, 'workers.length must be less than or equal to BATCH_SIZE');
  305. const fileUploadResult = await Promise.allSettled(workers);
  306. fileUploadResult.forEach((result) => {
  307. if (result.status === 'rejected') {
  308. logger.error(result.reason);
  309. }
  310. });
  311. const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
  312. const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
  313. if (uploadedFileIds.length === 0) {
  314. return;
  315. }
  316. const pageIds = pages.map(page => page._id);
  317. try {
  318. // Save vector store file relation
  319. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
  320. // Create vector store file
  321. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStoreRelation.vectorStoreId, uploadedFileIds);
  322. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  323. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  324. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  325. }
  326. catch (err) {
  327. logger.error(err);
  328. // Delete all uploaded files if createVectorStoreFileBatch fails
  329. for await (const pageId of pageIds) {
  330. await this.deleteVectorStoreFile(vectorStoreRelation._id, pageId);
  331. }
  332. }
  333. }
  334. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  335. async deleteObsoletedVectorStoreRelations(): Promise<void> {
  336. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  337. if (deletedVectorStoreRelations.length === 0) {
  338. return;
  339. }
  340. const currentVectorStoreRelationIds: Types.ObjectId[] = await VectorStoreFileRelationModel.aggregate([
  341. {
  342. $group: {
  343. _id: '$vectorStoreRelationId',
  344. relationCount: { $sum: 1 },
  345. },
  346. },
  347. { $match: { relationCount: { $gt: 0 } } },
  348. { $project: { _id: 1 } },
  349. ]);
  350. if (currentVectorStoreRelationIds.length === 0) {
  351. return;
  352. }
  353. await VectorStoreModel.deleteMany({ _id: { $nin: currentVectorStoreRelationIds }, isDeleted: true });
  354. }
  355. async deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId, apiCallInterval?: number): Promise<void> {
  356. // Delete vector store file and delete vector store file relation
  357. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ vectorStoreRelationId, page: pageId });
  358. if (vectorStoreFileRelation == null) {
  359. return;
  360. }
  361. const deletedFileIds: string[] = [];
  362. for await (const fileId of vectorStoreFileRelation.fileIds) {
  363. try {
  364. const deleteFileResponse = await this.client.deleteFile(fileId);
  365. logger.debug('Delete vector store file', deleteFileResponse);
  366. deletedFileIds.push(fileId);
  367. if (apiCallInterval != null) {
  368. // sleep
  369. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  370. }
  371. }
  372. catch (err) {
  373. await openaiApiErrorHandler(err, { notFoundError: async() => { deletedFileIds.push(fileId) } });
  374. logger.error(err);
  375. }
  376. }
  377. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  378. if (undeletedFileIds.length === 0) {
  379. await vectorStoreFileRelation.remove();
  380. return;
  381. }
  382. vectorStoreFileRelation.fileIds = undeletedFileIds;
  383. await vectorStoreFileRelation.save();
  384. }
  385. async deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void> {
  386. const vectorStoreRelations = await this.getVectorStoreRelationsByPageIds(pageIds);
  387. if (vectorStoreRelations != null && vectorStoreRelations.length !== 0) {
  388. for await (const pageId of pageIds) {
  389. const deleteVectorStoreFilePromises = vectorStoreRelations.map(vectorStoreRelation => this.deleteVectorStoreFile(vectorStoreRelation._id, pageId));
  390. await Promise.allSettled(deleteVectorStoreFilePromises);
  391. }
  392. }
  393. }
  394. async deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void> {
  395. // Retrieves all VectorStore documents that are marked as deleted
  396. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  397. if (deletedVectorStoreRelations.length === 0) {
  398. return;
  399. }
  400. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  401. const obsoleteVectorStoreFileRelations = await VectorStoreFileRelationModel.find(
  402. { vectorStoreRelationId: { $in: deletedVectorStoreRelations.map(deletedVectorStoreRelation => deletedVectorStoreRelation._id) } },
  403. ).limit(limit);
  404. if (obsoleteVectorStoreFileRelations.length === 0) {
  405. return;
  406. }
  407. // Delete obsolete VectorStoreFile
  408. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  409. try {
  410. await this.deleteVectorStoreFile(vectorStoreFileRelation.vectorStoreRelationId, vectorStoreFileRelation.page, apiCallInterval);
  411. }
  412. catch (err) {
  413. logger.error(err);
  414. }
  415. }
  416. }
  417. async filterPagesByAccessScope(aiAssistant: AiAssistantDocument, pages: HydratedDocument<PageDocument>[]) {
  418. const isPublicPage = (page :HydratedDocument<PageDocument>) => page.grant === PageGrant.GRANT_PUBLIC;
  419. const isUserGroupAccessible = (page :HydratedDocument<PageDocument>, ownerUserGroupIds: string[]) => {
  420. if (page.grant !== PageGrant.GRANT_USER_GROUP) return false;
  421. return page.grantedGroups.some(group => ownerUserGroupIds.includes(getIdStringForRef(group.item)));
  422. };
  423. const isOwnerAccessible = (page: HydratedDocument<PageDocument>, ownerId: Ref<IUser>) => {
  424. if (page.grant !== PageGrant.GRANT_OWNER) return false;
  425. return page.grantedUsers.some(user => getIdStringForRef(user) === getIdStringForRef(ownerId));
  426. };
  427. const getOwnerUserGroupIds = async(owner: Ref<IUser>) => {
  428. const userGroups = await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  429. const externalGroups = await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  430. return [...userGroups, ...externalGroups].map(group => getIdStringForRef(group));
  431. };
  432. switch (aiAssistant.accessScope) {
  433. case AiAssistantAccessScope.PUBLIC_ONLY:
  434. return pages.filter(isPublicPage);
  435. case AiAssistantAccessScope.GROUPS: {
  436. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  437. return pages.filter(page => isPublicPage(page) || isUserGroupAccessible(page, ownerUserGroupIds));
  438. }
  439. case AiAssistantAccessScope.OWNER: {
  440. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  441. return pages.filter(page => isPublicPage(page) || isOwnerAccessible(page, aiAssistant.owner) || isUserGroupAccessible(page, ownerUserGroupIds));
  442. }
  443. default:
  444. return [];
  445. }
  446. }
  447. async createVectorStoreFileOnPageCreate(pages: HydratedDocument<PageDocument>[]): Promise<void> {
  448. const pagePaths = pages.map(page => page.path);
  449. const aiAssistants = await this.findAiAssistantByPagePath(pagePaths, { shouldPopulateOwner: true, shouldPopulateVectorStore: true });
  450. if (aiAssistants.length === 0) {
  451. return;
  452. }
  453. for await (const aiAssistant of aiAssistants) {
  454. if (!isPopulated(aiAssistant.owner)) {
  455. continue;
  456. }
  457. const isLearnablePageLimitExceeded = await this.isLearnablePageLimitExceeded(aiAssistant.owner, aiAssistant.pagePathPatterns);
  458. if (isLearnablePageLimitExceeded) {
  459. continue;
  460. }
  461. const pagesToVectorize = await this.filterPagesByAccessScope(aiAssistant, pages);
  462. const vectorStoreRelation = aiAssistant.vectorStore;
  463. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  464. continue;
  465. }
  466. logger.debug('--------- createVectorStoreFileOnPageCreate ---------');
  467. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  468. logger.debug('VectorStoreFile pagePath to be created: ', pagesToVectorize.map(page => page.path));
  469. logger.debug('-----------------------------------------------------');
  470. await this.createVectorStoreFile(vectorStoreRelation as VectorStoreDocument, pagesToVectorize);
  471. }
  472. }
  473. async updateVectorStoreFileOnPageUpdate(page: HydratedDocument<PageDocument>) {
  474. const aiAssistants = await this.findAiAssistantByPagePath([page.path], { shouldPopulateVectorStore: true });
  475. if (aiAssistants.length === 0) {
  476. return;
  477. }
  478. for await (const aiAssistant of aiAssistants) {
  479. const pagesToVectorize = await this.filterPagesByAccessScope(aiAssistant, [page]);
  480. const vectorStoreRelation = aiAssistant.vectorStore;
  481. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  482. continue;
  483. }
  484. logger.debug('---------- updateVectorStoreOnPageUpdate ------------');
  485. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  486. logger.debug('PagePath of VectorStoreFile to be deleted: ', page.path);
  487. logger.debug('pagePath of VectorStoreFile to be created: ', pagesToVectorize.map(page => page.path));
  488. logger.debug('-----------------------------------------------------');
  489. // Do not create a new VectorStoreFile if page is changed to a permission that AiAssistant does not have access to
  490. await this.createVectorStoreFile(vectorStoreRelation as VectorStoreDocument, pagesToVectorize);
  491. await this.deleteVectorStoreFile((vectorStoreRelation as VectorStoreDocument)._id, page._id);
  492. }
  493. }
  494. private async createVectorStoreFileWithStream(vectorStoreRelation: VectorStoreDocument, conditions: mongoose.FilterQuery<PageDocument>): Promise<void> {
  495. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  496. const pagesStream = Page.find({ ...conditions })
  497. .populate('revision')
  498. .cursor({ batchSize: BATCH_SIZE });
  499. const batchStream = createBatchStream(BATCH_SIZE);
  500. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  501. const createVectorStoreFileStream = new Transform({
  502. objectMode: true,
  503. async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  504. try {
  505. logger.debug('Search results of page paths', chunk.map(page => page.path));
  506. await createVectorStoreFile(vectorStoreRelation, chunk);
  507. this.push(chunk);
  508. callback();
  509. }
  510. catch (error) {
  511. callback(error);
  512. }
  513. },
  514. });
  515. await pipeline(pagesStream, batchStream, createVectorStoreFileStream);
  516. }
  517. private async createConditionForCreateVectorStoreFile(
  518. owner: AiAssistant['owner'],
  519. accessScope: AiAssistant['accessScope'],
  520. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  521. pagePathPatterns: AiAssistant['pagePathPatterns'],
  522. ): Promise<mongoose.FilterQuery<PageDocument>> {
  523. const convertedPagePathPatterns = convertPathPatternsToRegExp(pagePathPatterns);
  524. // Include pages in search targets when their paths with 'Anyone with the link' permission are directly specified instead of using glob pattern
  525. const nonGrabPagePathPatterns = pagePathPatterns.filter(pagePathPattern => !isGlobPatternPath(pagePathPattern));
  526. const baseCondition: mongoose.FilterQuery<PageDocument> = {
  527. grant: PageGrant.GRANT_RESTRICTED,
  528. path: { $in: nonGrabPagePathPatterns },
  529. };
  530. if (accessScope === AiAssistantAccessScope.PUBLIC_ONLY) {
  531. return {
  532. $or: [
  533. baseCondition,
  534. {
  535. grant: PageGrant.GRANT_PUBLIC,
  536. path: { $in: convertedPagePathPatterns },
  537. },
  538. ],
  539. };
  540. }
  541. if (accessScope === AiAssistantAccessScope.GROUPS) {
  542. if (grantedGroupsForAccessScope == null || grantedGroupsForAccessScope.length === 0) {
  543. throw new Error('grantedGroups is required when accessScope is GROUPS');
  544. }
  545. const extractedGrantedGroupIdsForAccessScope = grantedGroupsForAccessScope.map(group => getIdForRef(group.item).toString());
  546. return {
  547. $or: [
  548. baseCondition,
  549. {
  550. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP] },
  551. path: { $in: convertedPagePathPatterns },
  552. $or: [
  553. { 'grantedGroups.item': { $in: extractedGrantedGroupIdsForAccessScope } },
  554. { grant: PageGrant.GRANT_PUBLIC },
  555. ],
  556. },
  557. ],
  558. };
  559. }
  560. if (accessScope === AiAssistantAccessScope.OWNER) {
  561. const ownerUserGroups = [
  562. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  563. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  564. ].map(group => group.toString());
  565. return {
  566. $or: [
  567. baseCondition,
  568. {
  569. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP, PageGrant.GRANT_OWNER] },
  570. path: { $in: convertedPagePathPatterns },
  571. $or: [
  572. { 'grantedGroups.item': { $in: ownerUserGroups } },
  573. { grantedUsers: { $in: [getIdForRef(owner)] } },
  574. { grant: PageGrant.GRANT_PUBLIC },
  575. ],
  576. },
  577. ],
  578. };
  579. }
  580. throw new Error('Invalid accessScope value');
  581. }
  582. private async validateGrantedUserGroupsForAiAssistant(
  583. owner: AiAssistant['owner'],
  584. shareScope: AiAssistant['shareScope'],
  585. accessScope: AiAssistant['accessScope'],
  586. grantedGroupsForShareScope: AiAssistant['grantedGroupsForShareScope'],
  587. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  588. ) {
  589. // Check if grantedGroupsForShareScope is not specified when shareScope is not a “group”
  590. if (shareScope !== AiAssistantShareScope.GROUPS && grantedGroupsForShareScope != null) {
  591. throw new Error('grantedGroupsForShareScope is specified when shareScope is not “groups”.');
  592. }
  593. // Check if grantedGroupsForAccessScope is not specified when accessScope is not a “group”
  594. if (accessScope !== AiAssistantAccessScope.GROUPS && grantedGroupsForAccessScope != null) {
  595. throw new Error('grantedGroupsForAccessScope is specified when accsessScope is not “groups”.');
  596. }
  597. const ownerUserGroupIds = [
  598. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  599. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  600. ].map(group => group.toString());
  601. // Check if the owner belongs to the group specified in grantedGroupsForShareScope
  602. if (grantedGroupsForShareScope != null && grantedGroupsForShareScope.length > 0) {
  603. const extractedGrantedGroupIdsForShareScope = grantedGroupsForShareScope.map(group => getIdForRef(group.item).toString());
  604. const isValid = extractedGrantedGroupIdsForShareScope.every(groupId => ownerUserGroupIds.includes(groupId));
  605. if (!isValid) {
  606. throw new Error('A userGroup to which the owner does not belong is specified in grantedGroupsForShareScope');
  607. }
  608. }
  609. // Check if the owner belongs to the group specified in grantedGroupsForAccessScope
  610. if (grantedGroupsForAccessScope != null && grantedGroupsForAccessScope.length > 0) {
  611. const extractedGrantedGroupIdsForAccessScope = grantedGroupsForAccessScope.map(group => getIdForRef(group.item).toString());
  612. const isValid = extractedGrantedGroupIdsForAccessScope.every(groupId => ownerUserGroupIds.includes(groupId));
  613. if (!isValid) {
  614. throw new Error('A userGroup to which the owner does not belong is specified in grantedGroupsForAccessScope');
  615. }
  616. }
  617. }
  618. async isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean> {
  619. const aiAssistant = await AiAssistantModel.findById(aiAssistantId);
  620. if (aiAssistant == null) {
  621. throw createError(404, 'AiAssistant document does not exist');
  622. }
  623. const isOwner = getIdStringForRef(aiAssistant.owner) === getIdStringForRef(user._id);
  624. if (aiAssistant.shareScope === AiAssistantShareScope.PUBLIC_ONLY) {
  625. return true;
  626. }
  627. if ((aiAssistant.shareScope === AiAssistantShareScope.OWNER) && isOwner) {
  628. return true;
  629. }
  630. if ((aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE) && (aiAssistant.accessScope === AiAssistantAccessScope.OWNER) && isOwner) {
  631. return true;
  632. }
  633. if ((aiAssistant.shareScope === AiAssistantShareScope.GROUPS)
  634. || ((aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE) && (aiAssistant.accessScope === AiAssistantAccessScope.GROUPS))) {
  635. const userGroupIds = [
  636. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  637. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  638. ].map(group => group.toString());
  639. const grantedGroupIdsForShareScope = aiAssistant.grantedGroupsForShareScope?.map(group => getIdStringForRef(group.item)) ?? [];
  640. const isShared = userGroupIds.some(userGroupId => grantedGroupIdsForShareScope.includes(userGroupId));
  641. return isShared;
  642. }
  643. return false;
  644. }
  645. async createAiAssistant(data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument> {
  646. await this.validateGrantedUserGroupsForAiAssistant(
  647. user,
  648. data.shareScope,
  649. data.accessScope,
  650. data.grantedGroupsForShareScope,
  651. data.grantedGroupsForAccessScope,
  652. );
  653. const conditions = await this.createConditionForCreateVectorStoreFile(
  654. user,
  655. data.accessScope,
  656. data.grantedGroupsForAccessScope,
  657. data.pagePathPatterns,
  658. );
  659. const vectorStoreRelation = await this.createVectorStore(data.name);
  660. const aiAssistant = await AiAssistantModel.create({
  661. ...data, owner: user, vectorStore: vectorStoreRelation,
  662. });
  663. // VectorStore creation process does not await
  664. this.createVectorStoreFileWithStream(vectorStoreRelation, conditions);
  665. return aiAssistant;
  666. }
  667. async updateAiAssistant(aiAssistantId: string, data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument> {
  668. const aiAssistant = await AiAssistantModel.findOne({ owner: user, _id: aiAssistantId });
  669. if (aiAssistant == null) {
  670. throw createError(404, 'AiAssistant document does not exist');
  671. }
  672. await this.validateGrantedUserGroupsForAiAssistant(
  673. user,
  674. data.shareScope,
  675. data.accessScope,
  676. data.grantedGroupsForShareScope,
  677. data.grantedGroupsForAccessScope,
  678. );
  679. const grantedGroupIdsForAccessScopeFromReq = data.grantedGroupsForAccessScope?.map(group => getIdStringForRef(group.item)) ?? []; // ObjectId[] -> string[]
  680. const grantedGroupIdsForAccessScopeFromDb = aiAssistant.grantedGroupsForAccessScope?.map(group => getIdStringForRef(group.item)) ?? []; // ObjectId[] -> string[]
  681. // If accessScope, pagePathPatterns, grantedGroupsForAccessScope have not changed, do not build VectorStore
  682. const shouldRebuildVectorStore = data.accessScope !== aiAssistant.accessScope
  683. || !isDeepEquals(data.pagePathPatterns, aiAssistant.pagePathPatterns)
  684. || !isDeepEquals(grantedGroupIdsForAccessScopeFromReq, grantedGroupIdsForAccessScopeFromDb);
  685. let newVectorStoreRelation: VectorStoreDocument | undefined;
  686. if (shouldRebuildVectorStore) {
  687. const conditions = await this.createConditionForCreateVectorStoreFile(
  688. user,
  689. data.accessScope,
  690. data.grantedGroupsForAccessScope,
  691. data.pagePathPatterns,
  692. );
  693. // Delete obsoleted VectorStore
  694. const obsoletedVectorStoreRelationId = getIdStringForRef(aiAssistant.vectorStore);
  695. await this.deleteVectorStore(obsoletedVectorStoreRelationId);
  696. newVectorStoreRelation = await this.createVectorStore(data.name);
  697. this.updateThreads(aiAssistantId, newVectorStoreRelation.vectorStoreId);
  698. // VectorStore creation process does not await
  699. this.createVectorStoreFileWithStream(newVectorStoreRelation, conditions);
  700. }
  701. const newData = {
  702. ...data,
  703. vectorStore: newVectorStoreRelation ?? aiAssistant.vectorStore,
  704. };
  705. aiAssistant.set({ ...newData });
  706. let updatedAiAssistant: AiAssistantDocument = await aiAssistant.save();
  707. if (data.shareScope !== AiAssistantShareScope.PUBLIC_ONLY && aiAssistant.isDefault) {
  708. updatedAiAssistant = await AiAssistantModel.setDefault(aiAssistant._id, false);
  709. }
  710. return updatedAiAssistant;
  711. }
  712. async getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants> {
  713. const userGroupIds = [
  714. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  715. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  716. ];
  717. const assistants = await AiAssistantModel.find({
  718. $or: [
  719. // Case 1: Assistants owned by the user
  720. { owner: user },
  721. // Case 2: Public assistants owned by others
  722. {
  723. $and: [
  724. { owner: { $ne: user } },
  725. { shareScope: AiAssistantShareScope.PUBLIC_ONLY },
  726. ],
  727. },
  728. // Case 3: Group-restricted assistants where user is in granted groups
  729. {
  730. $and: [
  731. { owner: { $ne: user } },
  732. { shareScope: AiAssistantShareScope.GROUPS },
  733. { 'grantedGroupsForShareScope.item': { $in: userGroupIds } },
  734. ],
  735. },
  736. ],
  737. })
  738. .populate('grantedGroupsForShareScope.item')
  739. .populate('grantedGroupsForAccessScope.item');
  740. return {
  741. myAiAssistants: assistants.filter(assistant => assistant.owner.toString() === user._id.toString()) ?? [],
  742. teamAiAssistants: assistants.filter(assistant => assistant.owner.toString() !== user._id.toString()) ?? [],
  743. };
  744. }
  745. async isLearnablePageLimitExceeded(user: IUserHasId, pagePathPatterns: string[]): Promise<boolean> {
  746. const normalizedPagePathPatterns = removeGlobPath(pagePathPatterns);
  747. const PageModel = mongoose.model<IPage, PageModel>('Page');
  748. const pagePathsWithDescendantCount = await PageModel.descendantCountByPaths(normalizedPagePathPatterns, user, null, true, true);
  749. const totalPageCount = pagePathsWithDescendantCount.reduce((total, pagePathWithDescendantCount) => {
  750. const descendantCount = pagePathPatterns.includes(pagePathWithDescendantCount.path)
  751. ? 0 // Treat as single page when included in "pagePathPatterns"
  752. : pagePathWithDescendantCount.descendantCount;
  753. const pageCount = descendantCount + 1;
  754. return total + pageCount;
  755. }, 0);
  756. logger.debug('TotalPageCount: ', totalPageCount);
  757. const limitLearnablePageCountPerAssistant = configManager.getConfig('openai:limitLearnablePageCountPerAssistant');
  758. return totalPageCount > limitLearnablePageCountPerAssistant;
  759. }
  760. async findAiAssistantByPagePath(
  761. pagePaths: string[], options?: { shouldPopulateOwner?: boolean, shouldPopulateVectorStore?: boolean },
  762. ): Promise<AiAssistantDocument[]> {
  763. const pagePathsWithGlobPattern = pagePaths.map(pagePath => generateGlobPatterns(pagePath)).flat();
  764. const query = AiAssistantModel.find({
  765. $or: [
  766. // Case 1: Exact match
  767. { pagePathPatterns: { $in: pagePaths } },
  768. // Case 2: Glob pattern match
  769. { pagePathPatterns: { $in: pagePathsWithGlobPattern } },
  770. ],
  771. });
  772. if (options?.shouldPopulateOwner) {
  773. query.populate('owner');
  774. }
  775. if (options?.shouldPopulateVectorStore) {
  776. query.populate('vectorStore');
  777. }
  778. const aiAssistants = await query.exec();
  779. return aiAssistants;
  780. }
  781. }
  782. let instance: OpenaiService;
  783. export const getOpenaiService = (): IOpenaiService | undefined => {
  784. if (instance != null) {
  785. return instance;
  786. }
  787. const aiEnabled = configManager.getConfig('app:aiEnabled');
  788. const openaiServiceType = configManager.getConfig('openai:serviceType');
  789. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  790. instance = new OpenaiService();
  791. return instance;
  792. }
  793. return;
  794. };