openai.ts 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042
  1. import assert from 'node:assert';
  2. import { Readable, Transform } from 'stream';
  3. import { pipeline } from 'stream/promises';
  4. import type {
  5. IUser, Ref, Lang, IPage,
  6. } from '@growi/core';
  7. import {
  8. PageGrant, getIdForRef, getIdStringForRef, isPopulated, type IUserHasId,
  9. } from '@growi/core';
  10. import { deepEquals } from '@growi/core/dist/utils';
  11. import { isGlobPatternPath } from '@growi/core/dist/utils/page-path-utils';
  12. import escapeStringRegexp from 'escape-string-regexp';
  13. import createError from 'http-errors';
  14. import mongoose, { type HydratedDocument, type Types } from 'mongoose';
  15. import { type OpenAI, toFile } from 'openai';
  16. import ExternalUserGroupRelation from '~/features/external-user-group/server/models/external-user-group-relation';
  17. import ThreadRelationModel, { type ThreadRelationDocument } from '~/features/openai/server/models/thread-relation';
  18. import VectorStoreModel, { type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
  19. import VectorStoreFileRelationModel, {
  20. type VectorStoreFileRelation,
  21. prepareVectorStoreFileRelations,
  22. } from '~/features/openai/server/models/vector-store-file-relation';
  23. import type Crowi from '~/server/crowi';
  24. import type { IAttachmentDocument } from '~/server/models/attachment';
  25. import type { PageDocument, PageModel } from '~/server/models/page';
  26. import UserGroupRelation from '~/server/models/user-group-relation';
  27. import { configManager } from '~/server/service/config-manager';
  28. import { createBatchStream } from '~/server/util/batch-stream';
  29. import loggerFactory from '~/utils/logger';
  30. import { OpenaiServiceTypes } from '../../interfaces/ai';
  31. import type { UpsertAiAssistantData } from '../../interfaces/ai-assistant';
  32. import {
  33. type AccessibleAiAssistants, type AiAssistant, AiAssistantAccessScope, AiAssistantShareScope,
  34. } from '../../interfaces/ai-assistant';
  35. import type { MessageListParams } from '../../interfaces/message';
  36. import { ThreadType } from '../../interfaces/thread-relation';
  37. import type { IVectorStore } from '../../interfaces/vector-store';
  38. import { removeGlobPath } from '../../utils/remove-glob-path';
  39. import AiAssistantModel, { type AiAssistantDocument } from '../models/ai-assistant';
  40. import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
  41. import { generateGlobPatterns } from '../utils/generate-glob-patterns';
  42. import { isVectorStoreCompatible } from '../utils/is-vector-store-compatible';
  43. import { getClient } from './client-delegator';
  44. import { openaiApiErrorHandler } from './openai-api-error-handler';
  45. import { replaceAnnotationWithPageLink } from './replace-annotation-with-page-link';
  46. const { isDeepEquals } = deepEquals;
  47. const BATCH_SIZE = 100;
  48. const logger = loggerFactory('growi:service:openai');
  49. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
  50. const convertPathPatternsToRegExp = (pagePathPatterns: string[]): Array<string | RegExp> => {
  51. return pagePathPatterns.map((pagePathPattern) => {
  52. if (isGlobPatternPath(pagePathPattern)) {
  53. const trimedPagePathPattern = pagePathPattern.replace('/*', '');
  54. const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
  55. // https://regex101.com/r/x5KIZL/1
  56. return new RegExp(`^${escapedPagePathPattern}($|/)`);
  57. }
  58. return pagePathPattern;
  59. });
  60. };
  61. export interface IOpenaiService {
  62. createThread(userId: string, type: ThreadType, aiAssistantId?: string, initialUserMessage?: string): Promise<ThreadRelationDocument>;
  63. getThreadsByAiAssistantId(aiAssistantId: string): Promise<ThreadRelationDocument[]>
  64. deleteThread(threadRelationId: string): Promise<ThreadRelationDocument>;
  65. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  66. deleteObsoletedVectorStoreRelations(): Promise<void> // for CronJob
  67. deleteVectorStore(vectorStoreRelationId: string): Promise<void>;
  68. getMessageData(threadId: string, lang?: Lang, options?: MessageListParams): Promise<OpenAI.Beta.Threads.Messages.MessagesPage>;
  69. createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: PageDocument[]): Promise<void>;
  70. createVectorStoreFileOnPageCreate(pages: PageDocument[]): Promise<void>;
  71. updateVectorStoreFileOnPageUpdate(page: HydratedDocument<PageDocument>): Promise<void>;
  72. createVectorStoreFileOnUploadAttachment(
  73. pageId: string, attachment: HydratedDocument<IAttachmentDocument>, file: Express.Multer.File, readable: Readable): Promise<void>;
  74. deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId): Promise<void>;
  75. deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void>;
  76. deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  77. deleteVectorStoreFileOnDeleteAttachment(attachmentId: string): Promise<void>;
  78. isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean>;
  79. createAiAssistant(data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument>;
  80. updateAiAssistant(aiAssistantId: string, data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument>;
  81. getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants>
  82. isLearnablePageLimitExceeded(user: IUserHasId, pagePathPatterns: string[]): Promise<boolean>;
  83. }
  84. class OpenaiService implements IOpenaiService {
  85. constructor(crowi: Crowi) {
  86. this.createVectorStoreFileOnUploadAttachment = this.createVectorStoreFileOnUploadAttachment.bind(this);
  87. crowi.attachmentService.addAttachHandler(this.createVectorStoreFileOnUploadAttachment);
  88. this.deleteVectorStoreFileOnDeleteAttachment = this.deleteVectorStoreFileOnDeleteAttachment.bind(this);
  89. crowi.attachmentService.addDetachHandler(this.deleteVectorStoreFileOnDeleteAttachment);
  90. }
  91. private get client() {
  92. const openaiServiceType = configManager.getConfig('openai:serviceType');
  93. return getClient({ openaiServiceType });
  94. }
  95. async generateThreadTitle(message: string): Promise<string | null> {
  96. const systemMessage = [
  97. 'Create a brief title (max 5 words) from your message.',
  98. 'Respond in the same language the user uses in their input.',
  99. 'Response should only contain the title.',
  100. ].join('');
  101. const threadTitleCompletion = await this.client.chatCompletion({
  102. model: 'gpt-4.1-nano',
  103. messages: [
  104. {
  105. role: 'system',
  106. content: systemMessage,
  107. },
  108. {
  109. role: 'user',
  110. content: message,
  111. },
  112. ],
  113. });
  114. const threadTitle = threadTitleCompletion.choices[0].message.content;
  115. return threadTitle;
  116. }
  117. async createThread(userId: string, type: ThreadType, aiAssistantId?: string, initialUserMessage?: string): Promise<ThreadRelationDocument> {
  118. try {
  119. const aiAssistant = aiAssistantId != null
  120. ? await AiAssistantModel.findOne({ _id: { $eq: aiAssistantId } }).populate<{ vectorStore: IVectorStore }>('vectorStore')
  121. : null;
  122. const thread = await this.client.createThread(aiAssistant?.vectorStore?.vectorStoreId);
  123. const threadRelation = await ThreadRelationModel.create({
  124. userId,
  125. type,
  126. aiAssistant: aiAssistantId,
  127. threadId: thread.id,
  128. title: null, // Initialize title as null
  129. });
  130. if (initialUserMessage != null) {
  131. // Do not await, run in background
  132. this.generateThreadTitle(initialUserMessage)
  133. .then(async(generatedTitle) => {
  134. if (generatedTitle != null) {
  135. threadRelation.title = generatedTitle;
  136. await threadRelation.save();
  137. }
  138. })
  139. .catch((err) => {
  140. logger.error(`Failed to generate thread title for threadId ${thread.id}:`, err);
  141. });
  142. }
  143. return threadRelation;
  144. }
  145. catch (err) {
  146. throw err;
  147. }
  148. }
  149. async updateThreads(aiAssistantId: string, vectorStoreId: string): Promise<void> {
  150. const threadRelations = await this.getThreadsByAiAssistantId(aiAssistantId);
  151. for await (const threadRelation of threadRelations) {
  152. try {
  153. const updatedThreadResponse = await this.client.updateThread(threadRelation.threadId, vectorStoreId);
  154. logger.debug('Update thread', updatedThreadResponse);
  155. }
  156. catch (err) {
  157. logger.error(err);
  158. }
  159. }
  160. }
  161. async getThreadsByAiAssistantId(aiAssistantId: string, type: ThreadType = ThreadType.KNOWLEDGE): Promise<ThreadRelationDocument[]> {
  162. const threadRelations = await ThreadRelationModel.find({ aiAssistant: aiAssistantId, type });
  163. return threadRelations;
  164. }
  165. async deleteThread(threadRelationId: string): Promise<ThreadRelationDocument> {
  166. const threadRelation = await ThreadRelationModel.findById(threadRelationId);
  167. if (threadRelation == null) {
  168. throw createError(404, 'ThreadRelation document does not exist');
  169. }
  170. try {
  171. const deletedThreadResponse = await this.client.deleteThread(threadRelation.threadId);
  172. logger.debug('Delete thread', deletedThreadResponse);
  173. await threadRelation.remove();
  174. }
  175. catch (err) {
  176. await openaiApiErrorHandler(err, { notFoundError: async() => { await threadRelation.remove() } });
  177. throw err;
  178. }
  179. return threadRelation;
  180. }
  181. public async deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void> {
  182. const expiredThreadRelations = await ThreadRelationModel.getExpiredThreadRelations(limit);
  183. if (expiredThreadRelations == null) {
  184. return;
  185. }
  186. const deletedThreadIds: string[] = [];
  187. for await (const expiredThreadRelation of expiredThreadRelations) {
  188. try {
  189. const deleteThreadResponse = await this.client.deleteThread(expiredThreadRelation.threadId);
  190. logger.debug('Delete thread', deleteThreadResponse);
  191. deletedThreadIds.push(expiredThreadRelation.threadId);
  192. // sleep
  193. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  194. }
  195. catch (err) {
  196. logger.error(err);
  197. }
  198. }
  199. await ThreadRelationModel.deleteMany({ threadId: { $in: deletedThreadIds } });
  200. }
  201. async getMessageData(threadId: string, lang?: Lang, options?: MessageListParams): Promise<OpenAI.Beta.Threads.Messages.MessagesPage> {
  202. const messages = await this.client.getMessages(threadId, options);
  203. for await (const message of messages.data) {
  204. for await (const content of message.content) {
  205. if (content.type === 'text') {
  206. await replaceAnnotationWithPageLink(content, lang);
  207. }
  208. }
  209. }
  210. return messages;
  211. }
  212. async getVectorStoreRelationsByPageIds(pageIds: Types.ObjectId[]): Promise<VectorStoreDocument[]> {
  213. const pipeline = [
  214. // Stage 1: Match documents with the given pageId
  215. {
  216. $match: {
  217. page: {
  218. $in: pageIds,
  219. },
  220. },
  221. },
  222. // Stage 2: Lookup VectorStore documents
  223. {
  224. $lookup: {
  225. from: 'vectorstores',
  226. localField: 'vectorStoreRelationId',
  227. foreignField: '_id',
  228. as: 'vectorStore',
  229. },
  230. },
  231. // Stage 3: Unwind the vectorStore array
  232. {
  233. $unwind: '$vectorStore',
  234. },
  235. // Stage 4: Match non-deleted vector stores
  236. {
  237. $match: {
  238. 'vectorStore.isDeleted': false,
  239. },
  240. },
  241. // Stage 5: Replace the root with vectorStore document
  242. {
  243. $replaceRoot: {
  244. newRoot: '$vectorStore',
  245. },
  246. },
  247. // Stage 6: Group by _id to remove duplicates
  248. {
  249. $group: {
  250. _id: '$_id',
  251. doc: { $first: '$$ROOT' },
  252. },
  253. },
  254. // Stage 7: Restore the document structure
  255. {
  256. $replaceRoot: {
  257. newRoot: '$doc',
  258. },
  259. },
  260. ];
  261. const vectorStoreRelations = await VectorStoreFileRelationModel.aggregate<VectorStoreDocument>(pipeline);
  262. return vectorStoreRelations;
  263. }
  264. private async createVectorStore(name: string): Promise<VectorStoreDocument> {
  265. try {
  266. const newVectorStore = await this.client.createVectorStore(name);
  267. const newVectorStoreDocument = await VectorStoreModel.create({
  268. vectorStoreId: newVectorStore.id,
  269. }) as VectorStoreDocument;
  270. return newVectorStoreDocument;
  271. }
  272. catch (err) {
  273. throw new Error(err);
  274. }
  275. }
  276. private async uploadFile(revisionBody: string, page: HydratedDocument<PageDocument>): Promise<OpenAI.Files.FileObject> {
  277. const siteUrl = configManager.getConfig('app:siteUrl');
  278. const convertedHtml = await convertMarkdownToHtml(revisionBody, { page, siteUrl });
  279. const file = await toFile(Readable.from(convertedHtml), `${page._id}.html`);
  280. const uploadedFile = await this.client.uploadFile(file);
  281. return uploadedFile;
  282. }
  283. private async uploadFileForAttachment(readable: Readable, fileName: string): Promise<OpenAI.Files.FileObject> {
  284. const uploadableFile = await toFile(Readable.from(readable), fileName);
  285. const uploadedFile = await this.client.uploadFile(uploadableFile);
  286. return uploadedFile;
  287. }
  288. async deleteVectorStore(vectorStoreRelationId: string): Promise<void> {
  289. const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ _id: vectorStoreRelationId, isDeleted: false });
  290. if (vectorStoreDocument == null) {
  291. return;
  292. }
  293. try {
  294. const deleteVectorStoreResponse = await this.client.deleteVectorStore(vectorStoreDocument.vectorStoreId);
  295. logger.debug('Delete vector store', deleteVectorStoreResponse);
  296. await vectorStoreDocument.markAsDeleted();
  297. }
  298. catch (err) {
  299. await openaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  300. throw new Error(err);
  301. }
  302. }
  303. async createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
  304. // const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  305. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  306. const processUploadFile = async(page: HydratedDocument<PageDocument>) => {
  307. if (page._id != null && page.revision != null) {
  308. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  309. const uploadedFile = await this.uploadFile(page.revision.body, page);
  310. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  311. return;
  312. }
  313. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  314. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  315. const uploadedFile = await this.uploadFile(pagePopulatedToShowRevision.revision.body, page);
  316. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  317. }
  318. }
  319. };
  320. // Start workers to process results
  321. const workers = pages.map(processUploadFile);
  322. // Wait for all processing to complete.
  323. assert(workers.length <= BATCH_SIZE, 'workers.length must be less than or equal to BATCH_SIZE');
  324. const fileUploadResult = await Promise.allSettled(workers);
  325. fileUploadResult.forEach((result) => {
  326. if (result.status === 'rejected') {
  327. logger.error(result.reason);
  328. }
  329. });
  330. const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
  331. const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
  332. if (uploadedFileIds.length === 0) {
  333. return;
  334. }
  335. const pageIds = pages.map(page => page._id);
  336. try {
  337. // Save vector store file relation
  338. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
  339. // Create vector store file
  340. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStoreRelation.vectorStoreId, uploadedFileIds);
  341. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  342. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  343. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  344. }
  345. catch (err) {
  346. logger.error(err);
  347. // Delete all uploaded files if createVectorStoreFileBatch fails
  348. for await (const pageId of pageIds) {
  349. await this.deleteVectorStoreFile(vectorStoreRelation._id, pageId);
  350. }
  351. }
  352. }
  353. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  354. async deleteObsoletedVectorStoreRelations(): Promise<void> {
  355. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  356. if (deletedVectorStoreRelations.length === 0) {
  357. return;
  358. }
  359. const currentVectorStoreRelationIds: Types.ObjectId[] = await VectorStoreFileRelationModel.aggregate([
  360. {
  361. $group: {
  362. _id: '$vectorStoreRelationId',
  363. relationCount: { $sum: 1 },
  364. },
  365. },
  366. { $match: { relationCount: { $gt: 0 } } },
  367. { $project: { _id: 1 } },
  368. ]);
  369. if (currentVectorStoreRelationIds.length === 0) {
  370. return;
  371. }
  372. await VectorStoreModel.deleteMany({ _id: { $nin: currentVectorStoreRelationIds }, isDeleted: true });
  373. }
  374. async deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId, apiCallInterval?: number): Promise<void> {
  375. // Delete vector store file and delete vector store file relation
  376. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ vectorStoreRelationId, page: pageId });
  377. if (vectorStoreFileRelation == null) {
  378. return;
  379. }
  380. const deletedFileIds: string[] = [];
  381. for await (const fileId of vectorStoreFileRelation.fileIds) {
  382. try {
  383. const deleteFileResponse = await this.client.deleteFile(fileId);
  384. logger.debug('Delete vector store file', deleteFileResponse);
  385. deletedFileIds.push(fileId);
  386. if (apiCallInterval != null) {
  387. // sleep
  388. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  389. }
  390. }
  391. catch (err) {
  392. await openaiApiErrorHandler(err, { notFoundError: async() => { deletedFileIds.push(fileId) } });
  393. logger.error(err);
  394. }
  395. }
  396. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  397. if (undeletedFileIds.length === 0) {
  398. await vectorStoreFileRelation.remove();
  399. return;
  400. }
  401. vectorStoreFileRelation.fileIds = undeletedFileIds;
  402. await vectorStoreFileRelation.save();
  403. }
  404. async deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void> {
  405. const vectorStoreRelations = await this.getVectorStoreRelationsByPageIds(pageIds);
  406. if (vectorStoreRelations != null && vectorStoreRelations.length !== 0) {
  407. for await (const pageId of pageIds) {
  408. const deleteVectorStoreFilePromises = vectorStoreRelations.map(vectorStoreRelation => this.deleteVectorStoreFile(vectorStoreRelation._id, pageId));
  409. await Promise.allSettled(deleteVectorStoreFilePromises);
  410. }
  411. }
  412. }
  413. async deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void> {
  414. // Retrieves all VectorStore documents that are marked as deleted
  415. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  416. if (deletedVectorStoreRelations.length === 0) {
  417. return;
  418. }
  419. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  420. const obsoleteVectorStoreFileRelations = await VectorStoreFileRelationModel.find(
  421. { vectorStoreRelationId: { $in: deletedVectorStoreRelations.map(deletedVectorStoreRelation => deletedVectorStoreRelation._id) } },
  422. ).limit(limit);
  423. if (obsoleteVectorStoreFileRelations.length === 0) {
  424. return;
  425. }
  426. // Delete obsolete VectorStoreFile
  427. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  428. try {
  429. await this.deleteVectorStoreFile(vectorStoreFileRelation.vectorStoreRelationId, vectorStoreFileRelation.page, apiCallInterval);
  430. }
  431. catch (err) {
  432. logger.error(err);
  433. }
  434. }
  435. }
  436. async deleteVectorStoreFileOnDeleteAttachment(attachmentId: string) {
  437. // An Attachment has only one VectorStoreFile. This means the id of VectorStoreFile linked to VectorStore is one per Attachment.
  438. // Therefore, retrieve only one VectorStoreFile Relation with the target attachmentId.
  439. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ attachment: attachmentId });
  440. if (vectorStoreFileRelation == null) {
  441. return;
  442. }
  443. const deleteAllRelationDocument = async() => {
  444. await VectorStoreFileRelationModel.deleteMany({ attachment: attachmentId });
  445. };
  446. for await (const fileId of vectorStoreFileRelation.fileIds) {
  447. try {
  448. const response = await this.client.deleteFile(fileId);
  449. logger.debug('Delete vector store file', response);
  450. }
  451. catch (err) {
  452. logger.error(err);
  453. await openaiApiErrorHandler(err, { notFoundError: () => deleteAllRelationDocument() });
  454. }
  455. }
  456. await deleteAllRelationDocument();
  457. }
  458. async filterPagesByAccessScope(aiAssistant: AiAssistantDocument, pages: HydratedDocument<PageDocument>[]) {
  459. const isPublicPage = (page :HydratedDocument<PageDocument>) => page.grant === PageGrant.GRANT_PUBLIC;
  460. const isUserGroupAccessible = (page :HydratedDocument<PageDocument>, ownerUserGroupIds: string[]) => {
  461. if (page.grant !== PageGrant.GRANT_USER_GROUP) return false;
  462. return page.grantedGroups.some(group => ownerUserGroupIds.includes(getIdStringForRef(group.item)));
  463. };
  464. const isOwnerAccessible = (page: HydratedDocument<PageDocument>, ownerId: Ref<IUser>) => {
  465. if (page.grant !== PageGrant.GRANT_OWNER) return false;
  466. return page.grantedUsers.some(user => getIdStringForRef(user) === getIdStringForRef(ownerId));
  467. };
  468. const getOwnerUserGroupIds = async(owner: Ref<IUser>) => {
  469. const userGroups = await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  470. const externalGroups = await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  471. return [...userGroups, ...externalGroups].map(group => getIdStringForRef(group));
  472. };
  473. switch (aiAssistant.accessScope) {
  474. case AiAssistantAccessScope.PUBLIC_ONLY:
  475. return pages.filter(isPublicPage);
  476. case AiAssistantAccessScope.GROUPS: {
  477. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  478. return pages.filter(page => isPublicPage(page) || isUserGroupAccessible(page, ownerUserGroupIds));
  479. }
  480. case AiAssistantAccessScope.OWNER: {
  481. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  482. return pages.filter(page => isPublicPage(page) || isOwnerAccessible(page, aiAssistant.owner) || isUserGroupAccessible(page, ownerUserGroupIds));
  483. }
  484. default:
  485. return [];
  486. }
  487. }
  488. async createVectorStoreFileOnPageCreate(pages: HydratedDocument<PageDocument>[]): Promise<void> {
  489. const pagePaths = pages.map(page => page.path);
  490. const aiAssistants = await this.findAiAssistantByPagePath(pagePaths, { shouldPopulateOwner: true, shouldPopulateVectorStore: true });
  491. if (aiAssistants.length === 0) {
  492. return;
  493. }
  494. for await (const aiAssistant of aiAssistants) {
  495. if (!isPopulated(aiAssistant.owner)) {
  496. continue;
  497. }
  498. const isLearnablePageLimitExceeded = await this.isLearnablePageLimitExceeded(aiAssistant.owner, aiAssistant.pagePathPatterns);
  499. if (isLearnablePageLimitExceeded) {
  500. continue;
  501. }
  502. const pagesToVectorize = await this.filterPagesByAccessScope(aiAssistant, pages);
  503. const vectorStoreRelation = aiAssistant.vectorStore;
  504. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  505. continue;
  506. }
  507. logger.debug('--------- createVectorStoreFileOnPageCreate ---------');
  508. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  509. logger.debug('VectorStoreFile pagePath to be created: ', pagesToVectorize.map(page => page.path));
  510. logger.debug('-----------------------------------------------------');
  511. await this.createVectorStoreFile(vectorStoreRelation as VectorStoreDocument, pagesToVectorize);
  512. }
  513. }
  514. async updateVectorStoreFileOnPageUpdate(page: HydratedDocument<PageDocument>) {
  515. const aiAssistants = await this.findAiAssistantByPagePath([page.path], { shouldPopulateVectorStore: true });
  516. if (aiAssistants.length === 0) {
  517. return;
  518. }
  519. for await (const aiAssistant of aiAssistants) {
  520. const pagesToVectorize = await this.filterPagesByAccessScope(aiAssistant, [page]);
  521. const vectorStoreRelation = aiAssistant.vectorStore;
  522. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  523. continue;
  524. }
  525. logger.debug('---------- updateVectorStoreOnPageUpdate ------------');
  526. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  527. logger.debug('PagePath of VectorStoreFile to be deleted: ', page.path);
  528. logger.debug('pagePath of VectorStoreFile to be created: ', pagesToVectorize.map(page => page.path));
  529. logger.debug('-----------------------------------------------------');
  530. // Do not create a new VectorStoreFile if page is changed to a permission that AiAssistant does not have access to
  531. await this.deleteVectorStoreFile((vectorStoreRelation as VectorStoreDocument)._id, page._id);
  532. await this.createVectorStoreFile(vectorStoreRelation as VectorStoreDocument, pagesToVectorize);
  533. }
  534. }
  535. async createVectorStoreFileOnUploadAttachment(
  536. pageId: string, attachment:HydratedDocument<IAttachmentDocument>, file: Express.Multer.File, readable: Readable,
  537. ): Promise<void> {
  538. if (!isVectorStoreCompatible(file)) {
  539. return;
  540. }
  541. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  542. const page = await Page.findById(pageId);
  543. if (page == null) {
  544. return;
  545. }
  546. const aiAssistants = await this.findAiAssistantByPagePath([page.path], { shouldPopulateVectorStore: true });
  547. if (aiAssistants.length === 0) {
  548. return;
  549. }
  550. const uploadedFile = await this.uploadFileForAttachment(readable, file.originalname);
  551. logger.debug('Uploaded file', uploadedFile);
  552. for await (const aiAssistant of aiAssistants) {
  553. const pagesToVectorize = await this.filterPagesByAccessScope(aiAssistant, [page]);
  554. if (pagesToVectorize.length === 0) {
  555. continue;
  556. }
  557. const vectorStoreRelation = aiAssistant.vectorStore;
  558. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  559. continue;
  560. }
  561. await this.client.createVectorStoreFile(vectorStoreRelation.vectorStoreId, uploadedFile.id);
  562. await VectorStoreFileRelationModel.create({
  563. vectorStoreRelationId: vectorStoreRelation._id,
  564. page: page._id,
  565. attachment: attachment._id,
  566. fileIds: [uploadedFile.id],
  567. isAttachedToVectorStore: true,
  568. });
  569. }
  570. }
  571. private async createVectorStoreFileWithStream(vectorStoreRelation: VectorStoreDocument, conditions: mongoose.FilterQuery<PageDocument>): Promise<void> {
  572. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  573. const pagesStream = Page.find({ ...conditions })
  574. .populate('revision')
  575. .cursor({ batchSize: BATCH_SIZE });
  576. const batchStream = createBatchStream(BATCH_SIZE);
  577. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  578. const createVectorStoreFileStream = new Transform({
  579. objectMode: true,
  580. async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  581. try {
  582. logger.debug('Search results of page paths', chunk.map(page => page.path));
  583. await createVectorStoreFile(vectorStoreRelation, chunk);
  584. this.push(chunk);
  585. callback();
  586. }
  587. catch (error) {
  588. callback(error);
  589. }
  590. },
  591. });
  592. await pipeline(pagesStream, batchStream, createVectorStoreFileStream);
  593. }
  594. private async createConditionForCreateVectorStoreFile(
  595. owner: AiAssistant['owner'],
  596. accessScope: AiAssistant['accessScope'],
  597. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  598. pagePathPatterns: AiAssistant['pagePathPatterns'],
  599. ): Promise<mongoose.FilterQuery<PageDocument>> {
  600. const convertedPagePathPatterns = convertPathPatternsToRegExp(pagePathPatterns);
  601. // Include pages in search targets when their paths with 'Anyone with the link' permission are directly specified instead of using glob pattern
  602. const nonGrabPagePathPatterns = pagePathPatterns.filter(pagePathPattern => !isGlobPatternPath(pagePathPattern));
  603. const baseCondition: mongoose.FilterQuery<PageDocument> = {
  604. grant: PageGrant.GRANT_RESTRICTED,
  605. path: { $in: nonGrabPagePathPatterns },
  606. };
  607. if (accessScope === AiAssistantAccessScope.PUBLIC_ONLY) {
  608. return {
  609. $or: [
  610. baseCondition,
  611. {
  612. grant: PageGrant.GRANT_PUBLIC,
  613. path: { $in: convertedPagePathPatterns },
  614. },
  615. ],
  616. };
  617. }
  618. if (accessScope === AiAssistantAccessScope.GROUPS) {
  619. if (grantedGroupsForAccessScope == null || grantedGroupsForAccessScope.length === 0) {
  620. throw new Error('grantedGroups is required when accessScope is GROUPS');
  621. }
  622. const extractedGrantedGroupIdsForAccessScope = grantedGroupsForAccessScope.map(group => getIdForRef(group.item).toString());
  623. return {
  624. $or: [
  625. baseCondition,
  626. {
  627. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP] },
  628. path: { $in: convertedPagePathPatterns },
  629. $or: [
  630. { 'grantedGroups.item': { $in: extractedGrantedGroupIdsForAccessScope } },
  631. { grant: PageGrant.GRANT_PUBLIC },
  632. ],
  633. },
  634. ],
  635. };
  636. }
  637. if (accessScope === AiAssistantAccessScope.OWNER) {
  638. const ownerUserGroups = [
  639. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  640. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  641. ].map(group => group.toString());
  642. return {
  643. $or: [
  644. baseCondition,
  645. {
  646. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP, PageGrant.GRANT_OWNER] },
  647. path: { $in: convertedPagePathPatterns },
  648. $or: [
  649. { 'grantedGroups.item': { $in: ownerUserGroups } },
  650. { grantedUsers: { $in: [getIdForRef(owner)] } },
  651. { grant: PageGrant.GRANT_PUBLIC },
  652. ],
  653. },
  654. ],
  655. };
  656. }
  657. throw new Error('Invalid accessScope value');
  658. }
  659. private async validateGrantedUserGroupsForAiAssistant(
  660. owner: AiAssistant['owner'],
  661. shareScope: AiAssistant['shareScope'],
  662. accessScope: AiAssistant['accessScope'],
  663. grantedGroupsForShareScope: AiAssistant['grantedGroupsForShareScope'],
  664. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  665. ) {
  666. // Check if grantedGroupsForShareScope is not specified when shareScope is not a “group”
  667. if (shareScope !== AiAssistantShareScope.GROUPS && grantedGroupsForShareScope != null) {
  668. throw new Error('grantedGroupsForShareScope is specified when shareScope is not “groups”.');
  669. }
  670. // Check if grantedGroupsForAccessScope is not specified when accessScope is not a “group”
  671. if (accessScope !== AiAssistantAccessScope.GROUPS && grantedGroupsForAccessScope != null) {
  672. throw new Error('grantedGroupsForAccessScope is specified when accsessScope is not “groups”.');
  673. }
  674. const ownerUserGroupIds = [
  675. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  676. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  677. ].map(group => group.toString());
  678. // Check if the owner belongs to the group specified in grantedGroupsForShareScope
  679. if (grantedGroupsForShareScope != null && grantedGroupsForShareScope.length > 0) {
  680. const extractedGrantedGroupIdsForShareScope = grantedGroupsForShareScope.map(group => getIdForRef(group.item).toString());
  681. const isValid = extractedGrantedGroupIdsForShareScope.every(groupId => ownerUserGroupIds.includes(groupId));
  682. if (!isValid) {
  683. throw new Error('A userGroup to which the owner does not belong is specified in grantedGroupsForShareScope');
  684. }
  685. }
  686. // Check if the owner belongs to the group specified in grantedGroupsForAccessScope
  687. if (grantedGroupsForAccessScope != null && grantedGroupsForAccessScope.length > 0) {
  688. const extractedGrantedGroupIdsForAccessScope = grantedGroupsForAccessScope.map(group => getIdForRef(group.item).toString());
  689. const isValid = extractedGrantedGroupIdsForAccessScope.every(groupId => ownerUserGroupIds.includes(groupId));
  690. if (!isValid) {
  691. throw new Error('A userGroup to which the owner does not belong is specified in grantedGroupsForAccessScope');
  692. }
  693. }
  694. }
  695. async isAiAssistantUsable(aiAssistantId: string, user: IUserHasId): Promise<boolean> {
  696. const aiAssistant = await AiAssistantModel.findOne({ _id: { $eq: aiAssistantId } });
  697. if (aiAssistant == null) {
  698. throw createError(404, 'AiAssistant document does not exist');
  699. }
  700. const isOwner = getIdStringForRef(aiAssistant.owner) === getIdStringForRef(user._id);
  701. if (aiAssistant.shareScope === AiAssistantShareScope.PUBLIC_ONLY) {
  702. return true;
  703. }
  704. if ((aiAssistant.shareScope === AiAssistantShareScope.OWNER) && isOwner) {
  705. return true;
  706. }
  707. if ((aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE) && (aiAssistant.accessScope === AiAssistantAccessScope.OWNER) && isOwner) {
  708. return true;
  709. }
  710. if ((aiAssistant.shareScope === AiAssistantShareScope.GROUPS)
  711. || ((aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE) && (aiAssistant.accessScope === AiAssistantAccessScope.GROUPS))) {
  712. const userGroupIds = [
  713. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  714. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  715. ].map(group => group.toString());
  716. const grantedGroupIdsForShareScope = aiAssistant.grantedGroupsForShareScope?.map(group => getIdStringForRef(group.item)) ?? [];
  717. const isShared = userGroupIds.some(userGroupId => grantedGroupIdsForShareScope.includes(userGroupId));
  718. return isShared;
  719. }
  720. return false;
  721. }
  722. async createAiAssistant(data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument> {
  723. await this.validateGrantedUserGroupsForAiAssistant(
  724. user,
  725. data.shareScope,
  726. data.accessScope,
  727. data.grantedGroupsForShareScope,
  728. data.grantedGroupsForAccessScope,
  729. );
  730. const conditions = await this.createConditionForCreateVectorStoreFile(
  731. user,
  732. data.accessScope,
  733. data.grantedGroupsForAccessScope,
  734. data.pagePathPatterns,
  735. );
  736. const vectorStoreRelation = await this.createVectorStore(data.name);
  737. const aiAssistant = await AiAssistantModel.create({
  738. ...data, owner: user, vectorStore: vectorStoreRelation,
  739. });
  740. // VectorStore creation process does not await
  741. this.createVectorStoreFileWithStream(vectorStoreRelation, conditions);
  742. return aiAssistant;
  743. }
  744. async updateAiAssistant(aiAssistantId: string, data: UpsertAiAssistantData, user: IUserHasId): Promise<AiAssistantDocument> {
  745. const aiAssistant = await AiAssistantModel.findOne({ owner: user, _id: aiAssistantId });
  746. if (aiAssistant == null) {
  747. throw createError(404, 'AiAssistant document does not exist');
  748. }
  749. await this.validateGrantedUserGroupsForAiAssistant(
  750. user,
  751. data.shareScope,
  752. data.accessScope,
  753. data.grantedGroupsForShareScope,
  754. data.grantedGroupsForAccessScope,
  755. );
  756. const grantedGroupIdsForAccessScopeFromReq = data.grantedGroupsForAccessScope?.map(group => getIdStringForRef(group.item)) ?? []; // ObjectId[] -> string[]
  757. const grantedGroupIdsForAccessScopeFromDb = aiAssistant.grantedGroupsForAccessScope?.map(group => getIdStringForRef(group.item)) ?? []; // ObjectId[] -> string[]
  758. // If accessScope, pagePathPatterns, grantedGroupsForAccessScope have not changed, do not build VectorStore
  759. const shouldRebuildVectorStore = data.accessScope !== aiAssistant.accessScope
  760. || !isDeepEquals(data.pagePathPatterns, aiAssistant.pagePathPatterns)
  761. || !isDeepEquals(grantedGroupIdsForAccessScopeFromReq, grantedGroupIdsForAccessScopeFromDb);
  762. let newVectorStoreRelation: VectorStoreDocument | undefined;
  763. if (shouldRebuildVectorStore) {
  764. const conditions = await this.createConditionForCreateVectorStoreFile(
  765. user,
  766. data.accessScope,
  767. data.grantedGroupsForAccessScope,
  768. data.pagePathPatterns,
  769. );
  770. // Delete obsoleted VectorStore
  771. const obsoletedVectorStoreRelationId = getIdStringForRef(aiAssistant.vectorStore);
  772. await this.deleteVectorStore(obsoletedVectorStoreRelationId);
  773. newVectorStoreRelation = await this.createVectorStore(data.name);
  774. this.updateThreads(aiAssistantId, newVectorStoreRelation.vectorStoreId);
  775. // VectorStore creation process does not await
  776. this.createVectorStoreFileWithStream(newVectorStoreRelation, conditions);
  777. }
  778. const newData = {
  779. ...data,
  780. vectorStore: newVectorStoreRelation ?? aiAssistant.vectorStore,
  781. };
  782. aiAssistant.set({ ...newData });
  783. let updatedAiAssistant: AiAssistantDocument = await aiAssistant.save();
  784. if (data.shareScope !== AiAssistantShareScope.PUBLIC_ONLY && aiAssistant.isDefault) {
  785. updatedAiAssistant = await AiAssistantModel.setDefault(aiAssistant._id, false);
  786. }
  787. return updatedAiAssistant;
  788. }
  789. async getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants> {
  790. const userGroupIds = [
  791. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  792. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  793. ];
  794. const assistants = await AiAssistantModel.find({
  795. $or: [
  796. // Case 1: Assistants owned by the user
  797. { owner: user },
  798. // Case 2: Public assistants owned by others
  799. {
  800. $and: [
  801. { owner: { $ne: user } },
  802. { shareScope: AiAssistantShareScope.PUBLIC_ONLY },
  803. ],
  804. },
  805. // Case 3: Group-restricted assistants where user is in granted groups
  806. {
  807. $and: [
  808. { owner: { $ne: user } },
  809. { shareScope: AiAssistantShareScope.GROUPS },
  810. { 'grantedGroupsForShareScope.item': { $in: userGroupIds } },
  811. ],
  812. },
  813. ],
  814. })
  815. .populate('grantedGroupsForShareScope.item')
  816. .populate('grantedGroupsForAccessScope.item');
  817. return {
  818. myAiAssistants: assistants.filter(assistant => assistant.owner.toString() === user._id.toString()) ?? [],
  819. teamAiAssistants: assistants.filter(assistant => assistant.owner.toString() !== user._id.toString()) ?? [],
  820. };
  821. }
  822. async isLearnablePageLimitExceeded(user: IUserHasId, pagePathPatterns: string[]): Promise<boolean> {
  823. const normalizedPagePathPatterns = removeGlobPath(pagePathPatterns);
  824. const PageModel = mongoose.model<IPage, PageModel>('Page');
  825. const pagePathsWithDescendantCount = await PageModel.descendantCountByPaths(normalizedPagePathPatterns, user, null, true, true);
  826. const totalPageCount = pagePathsWithDescendantCount.reduce((total, pagePathWithDescendantCount) => {
  827. const descendantCount = pagePathPatterns.includes(pagePathWithDescendantCount.path)
  828. ? 0 // Treat as single page when included in "pagePathPatterns"
  829. : pagePathWithDescendantCount.descendantCount;
  830. const pageCount = descendantCount + 1;
  831. return total + pageCount;
  832. }, 0);
  833. logger.debug('TotalPageCount: ', totalPageCount);
  834. const limitLearnablePageCountPerAssistant = configManager.getConfig('openai:limitLearnablePageCountPerAssistant');
  835. return totalPageCount > limitLearnablePageCountPerAssistant;
  836. }
  837. async findAiAssistantByPagePath(
  838. pagePaths: string[], options?: { shouldPopulateOwner?: boolean, shouldPopulateVectorStore?: boolean },
  839. ): Promise<AiAssistantDocument[]> {
  840. const pagePathsWithGlobPattern = pagePaths.map(pagePath => generateGlobPatterns(pagePath)).flat();
  841. const query = AiAssistantModel.find({
  842. $or: [
  843. // Case 1: Exact match
  844. { pagePathPatterns: { $in: pagePaths } },
  845. // Case 2: Glob pattern match
  846. { pagePathPatterns: { $in: pagePathsWithGlobPattern } },
  847. ],
  848. });
  849. if (options?.shouldPopulateOwner) {
  850. query.populate('owner');
  851. }
  852. if (options?.shouldPopulateVectorStore) {
  853. query.populate('vectorStore');
  854. }
  855. const aiAssistants = await query.exec();
  856. return aiAssistants;
  857. }
  858. }
  859. let instance: OpenaiService;
  860. export const initializeOpenaiService = (crowi: Crowi): void => {
  861. const aiEnabled = configManager.getConfig('app:aiEnabled');
  862. const openaiServiceType = configManager.getConfig('openai:serviceType');
  863. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  864. instance = new OpenaiService(crowi);
  865. }
  866. };
  867. export const getOpenaiService = (): IOpenaiService | undefined => {
  868. if (instance != null) {
  869. return instance;
  870. }
  871. return;
  872. };