openai.ts 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639
  1. /// <reference types="multer" />
  2. import assert from 'node:assert';
  3. import fs from 'node:fs';
  4. import { Readable, Transform, Writable } from 'node:stream';
  5. import { pipeline } from 'node:stream/promises';
  6. import type { IPage, IUser, Lang, Nullable, Ref } from '@growi/core';
  7. import {
  8. getIdForRef,
  9. getIdStringForRef,
  10. type IUserHasId,
  11. isPopulated,
  12. PageGrant,
  13. } from '@growi/core';
  14. import { deepEquals } from '@growi/core/dist/utils';
  15. import { isGlobPatternPath } from '@growi/core/dist/utils/page-path-utils';
  16. import escapeStringRegexp from 'escape-string-regexp';
  17. import createError from 'http-errors';
  18. import mongoose, { type HydratedDocument, type Types } from 'mongoose';
  19. import type { OpenAI } from 'openai';
  20. import { toFile } from 'openai';
  21. import type { ChatCompletionChunk } from 'openai/resources/chat/completions';
  22. import ExternalUserGroupRelation from '~/features/external-user-group/server/models/external-user-group-relation';
  23. import ThreadRelationModel, {
  24. type ThreadRelationDocument,
  25. } from '~/features/openai/server/models/thread-relation';
  26. import VectorStoreModel, {
  27. type VectorStoreDocument,
  28. } from '~/features/openai/server/models/vector-store';
  29. import VectorStoreFileRelationModel, {
  30. prepareVectorStoreFileRelations,
  31. type VectorStoreFileRelation,
  32. } from '~/features/openai/server/models/vector-store-file-relation';
  33. import type Crowi from '~/server/crowi';
  34. import type {
  35. IAttachmentDocument,
  36. IAttachmentModel,
  37. } from '~/server/models/attachment';
  38. import type { PageDocument, PageModel } from '~/server/models/page';
  39. import UserGroupRelation from '~/server/models/user-group-relation';
  40. import { configManager } from '~/server/service/config-manager';
  41. import { createBatchStream } from '~/server/util/batch-stream';
  42. import loggerFactory from '~/utils/logger';
  43. import { OpenaiServiceTypes } from '../../interfaces/ai';
  44. import type { UpsertAiAssistantData } from '../../interfaces/ai-assistant';
  45. import {
  46. type AccessibleAiAssistants,
  47. type AiAssistant,
  48. AiAssistantAccessScope,
  49. AiAssistantShareScope,
  50. } from '../../interfaces/ai-assistant';
  51. import type { MessageListParams } from '../../interfaces/message';
  52. import { ThreadType } from '../../interfaces/thread-relation';
  53. import type { IVectorStore } from '../../interfaces/vector-store';
  54. import { removeGlobPath } from '../../utils/remove-glob-path';
  55. import AiAssistantModel, {
  56. type AiAssistantDocument,
  57. } from '../models/ai-assistant';
  58. import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
  59. import { generateGlobPatterns } from '../utils/generate-glob-patterns';
  60. import { isVectorStoreCompatible } from '../utils/is-vector-store-compatible';
  61. import { getClient, isStreamResponse } from './client-delegator';
  62. import { openaiApiErrorHandler } from './openai-api-error-handler';
  63. import { replaceAnnotationWithPageLink } from './replace-annotation-with-page-link';
  64. const { isDeepEquals } = deepEquals;
  65. const BATCH_SIZE = 100;
  66. const logger = loggerFactory('growi:service:openai');
  67. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>;
  68. const convertPathPatternsToRegExp = (
  69. pagePathPatterns: string[],
  70. ): Array<string | RegExp> => {
  71. return pagePathPatterns.map((pagePathPattern) => {
  72. if (isGlobPatternPath(pagePathPattern)) {
  73. const trimedPagePathPattern = pagePathPattern.replace('/*', '');
  74. const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
  75. // https://regex101.com/r/x5KIZL/1
  76. return new RegExp(`^${escapedPagePathPattern}($|/)`);
  77. }
  78. return pagePathPattern;
  79. });
  80. };
  81. export interface IOpenaiService {
  82. generateAndProcessPreMessage(
  83. message: string,
  84. chunkProcessor: (chunk: ChatCompletionChunk) => void,
  85. ): Promise<void>;
  86. createThread(
  87. userId: string,
  88. type: ThreadType,
  89. aiAssistantId?: string,
  90. initialUserMessage?: string,
  91. ): Promise<ThreadRelationDocument>;
  92. getThreadsByAiAssistantId(
  93. aiAssistantId: string,
  94. ): Promise<ThreadRelationDocument[]>;
  95. deleteThread(threadRelationId: string): Promise<ThreadRelationDocument>;
  96. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  97. deleteObsoletedVectorStoreRelations(): Promise<void>; // for CronJob
  98. deleteVectorStore(vectorStoreRelationId: string): Promise<void>;
  99. getMessageData(
  100. threadId: string,
  101. lang?: Lang,
  102. options?: MessageListParams,
  103. ): Promise<OpenAI.Beta.Threads.Messages.MessagesPage>;
  104. createVectorStoreFileOnPageCreate(pages: PageDocument[]): Promise<void>;
  105. updateVectorStoreFileOnPageUpdate(
  106. page: HydratedDocument<PageDocument>,
  107. ): Promise<void>;
  108. deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void>;
  109. deleteObsoleteVectorStoreFile(
  110. limit: number,
  111. apiCallInterval: number,
  112. ): Promise<void>; // for CronJob
  113. isAiAssistantUsable(
  114. aiAssistantId: string,
  115. user: IUserHasId,
  116. ): Promise<boolean>;
  117. createAiAssistant(
  118. data: UpsertAiAssistantData,
  119. user: IUserHasId,
  120. ): Promise<AiAssistantDocument>;
  121. updateAiAssistant(
  122. aiAssistantId: string,
  123. data: UpsertAiAssistantData,
  124. user: IUserHasId,
  125. ): Promise<AiAssistantDocument>;
  126. getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants>;
  127. isLearnablePageLimitExceeded(
  128. user: IUserHasId,
  129. pagePathPatterns: string[],
  130. ): Promise<boolean>;
  131. }
  132. class OpenaiService implements IOpenaiService {
  133. private crowi: Crowi;
  134. constructor(crowi: Crowi) {
  135. this.crowi = crowi;
  136. this.createVectorStoreFileOnUploadAttachment =
  137. this.createVectorStoreFileOnUploadAttachment.bind(this);
  138. crowi.attachmentService.addAttachHandler(
  139. this.createVectorStoreFileOnUploadAttachment,
  140. );
  141. this.deleteVectorStoreFileOnDeleteAttachment =
  142. this.deleteVectorStoreFileOnDeleteAttachment.bind(this);
  143. crowi.attachmentService.addDetachHandler(
  144. this.deleteVectorStoreFileOnDeleteAttachment,
  145. );
  146. }
  147. private get client() {
  148. const openaiServiceType = configManager.getConfig('openai:serviceType');
  149. return getClient({ openaiServiceType });
  150. }
  151. async generateAndProcessPreMessage(
  152. message: string,
  153. chunkProcessor: (delta: ChatCompletionChunk) => void,
  154. ): Promise<void> {
  155. const systemMessage = [
  156. "Generate a message briefly confirming the user's question.",
  157. 'Please generate up to 20 characters',
  158. ].join('');
  159. const preMessageCompletion = await this.client.chatCompletion({
  160. stream: true,
  161. model: 'gpt-4.1-nano',
  162. messages: [
  163. {
  164. role: 'system',
  165. content: systemMessage,
  166. },
  167. {
  168. role: 'user',
  169. content: message,
  170. },
  171. ],
  172. });
  173. if (!isStreamResponse(preMessageCompletion)) {
  174. return;
  175. }
  176. for await (const chunk of preMessageCompletion) {
  177. chunkProcessor(chunk);
  178. }
  179. }
  180. private async generateThreadTitle(
  181. message: string,
  182. ): Promise<Nullable<string>> {
  183. const systemMessage = [
  184. 'Create a brief title (max 5 words) from your message.',
  185. 'Respond in the same language the user uses in their input.',
  186. 'Response should only contain the title.',
  187. ].join('');
  188. const threadTitleCompletion = await this.client.chatCompletion({
  189. model: 'gpt-4.1-nano',
  190. messages: [
  191. {
  192. role: 'system',
  193. content: systemMessage,
  194. },
  195. {
  196. role: 'user',
  197. content: message,
  198. },
  199. ],
  200. });
  201. if (!isStreamResponse(threadTitleCompletion)) {
  202. const threadTitle = threadTitleCompletion.choices[0].message.content;
  203. return threadTitle;
  204. }
  205. }
  206. async createThread(
  207. userId: string,
  208. type: ThreadType,
  209. aiAssistantId?: string,
  210. initialUserMessage?: string,
  211. ): Promise<ThreadRelationDocument> {
  212. try {
  213. const aiAssistant =
  214. aiAssistantId != null
  215. ? await AiAssistantModel.findOne({
  216. _id: { $eq: aiAssistantId },
  217. }).populate<{ vectorStore: IVectorStore }>('vectorStore')
  218. : null;
  219. const thread = await this.client.createThread(
  220. aiAssistant?.vectorStore?.vectorStoreId,
  221. );
  222. const threadRelation = await ThreadRelationModel.create({
  223. userId,
  224. type,
  225. aiAssistant: aiAssistantId,
  226. threadId: thread.id,
  227. title: null, // Initialize title as null
  228. });
  229. if (initialUserMessage != null) {
  230. // Do not await, run in background
  231. this.generateThreadTitle(initialUserMessage)
  232. .then(async (generatedTitle) => {
  233. if (generatedTitle != null) {
  234. threadRelation.title = generatedTitle;
  235. await threadRelation.save();
  236. }
  237. })
  238. .catch((err) => {
  239. logger.error(
  240. `Failed to generate thread title for threadId ${thread.id}:`,
  241. err,
  242. );
  243. });
  244. }
  245. return threadRelation;
  246. } catch (err) {
  247. throw err;
  248. }
  249. }
  250. private async updateThreads(
  251. aiAssistantId: string,
  252. vectorStoreId: string,
  253. ): Promise<void> {
  254. const threadRelations = await this.getThreadsByAiAssistantId(aiAssistantId);
  255. for await (const threadRelation of threadRelations) {
  256. try {
  257. const updatedThreadResponse = await this.client.updateThread(
  258. threadRelation.threadId,
  259. vectorStoreId,
  260. );
  261. logger.debug('Update thread', updatedThreadResponse);
  262. } catch (err) {
  263. logger.error(err);
  264. }
  265. }
  266. }
  267. async getThreadsByAiAssistantId(
  268. aiAssistantId: string,
  269. type: ThreadType = ThreadType.KNOWLEDGE,
  270. ): Promise<ThreadRelationDocument[]> {
  271. const threadRelations = await ThreadRelationModel.find({
  272. aiAssistant: aiAssistantId,
  273. type,
  274. }).sort({ updatedAt: -1 });
  275. return threadRelations;
  276. }
  277. async deleteThread(
  278. threadRelationId: string,
  279. ): Promise<ThreadRelationDocument> {
  280. const threadRelation = await ThreadRelationModel.findById(threadRelationId);
  281. if (threadRelation == null) {
  282. throw createError(404, 'ThreadRelation document does not exist');
  283. }
  284. try {
  285. const deletedThreadResponse = await this.client.deleteThread(
  286. threadRelation.threadId,
  287. );
  288. logger.debug('Delete thread', deletedThreadResponse);
  289. await threadRelation.remove();
  290. } catch (err) {
  291. await openaiApiErrorHandler(err, {
  292. notFoundError: async () => {
  293. await threadRelation.remove();
  294. },
  295. });
  296. throw err;
  297. }
  298. return threadRelation;
  299. }
  300. public async deleteExpiredThreads(
  301. limit: number,
  302. apiCallInterval: number,
  303. ): Promise<void> {
  304. const expiredThreadRelations =
  305. await ThreadRelationModel.getExpiredThreadRelations(limit);
  306. if (expiredThreadRelations == null) {
  307. return;
  308. }
  309. const deletedThreadIds: string[] = [];
  310. for await (const expiredThreadRelation of expiredThreadRelations) {
  311. try {
  312. const deleteThreadResponse = await this.client.deleteThread(
  313. expiredThreadRelation.threadId,
  314. );
  315. logger.debug('Delete thread', deleteThreadResponse);
  316. deletedThreadIds.push(expiredThreadRelation.threadId);
  317. // sleep
  318. await new Promise((resolve) => setTimeout(resolve, apiCallInterval));
  319. } catch (err) {
  320. logger.error(err);
  321. }
  322. }
  323. await ThreadRelationModel.deleteMany({
  324. threadId: { $in: deletedThreadIds },
  325. });
  326. }
  327. async getMessageData(
  328. threadId: string,
  329. lang?: Lang,
  330. options?: MessageListParams,
  331. ): Promise<OpenAI.Beta.Threads.Messages.MessagesPage> {
  332. const messages = await this.client.getMessages(threadId, options);
  333. for await (const message of messages.data) {
  334. for await (const content of message.content) {
  335. if (content.type === 'text') {
  336. await replaceAnnotationWithPageLink(content, lang);
  337. }
  338. }
  339. }
  340. return messages;
  341. }
  342. private async getVectorStoreRelationsByPageIds(
  343. pageIds: Types.ObjectId[],
  344. ): Promise<VectorStoreDocument[]> {
  345. const pipeline = [
  346. // Stage 1: Match documents with the given pageId
  347. {
  348. $match: {
  349. page: {
  350. $in: pageIds,
  351. },
  352. },
  353. },
  354. // Stage 2: Lookup VectorStore documents
  355. {
  356. $lookup: {
  357. from: 'vectorstores',
  358. localField: 'vectorStoreRelationId',
  359. foreignField: '_id',
  360. as: 'vectorStore',
  361. },
  362. },
  363. // Stage 3: Unwind the vectorStore array
  364. {
  365. $unwind: '$vectorStore',
  366. },
  367. // Stage 4: Match non-deleted vector stores
  368. {
  369. $match: {
  370. 'vectorStore.isDeleted': false,
  371. },
  372. },
  373. // Stage 5: Replace the root with vectorStore document
  374. {
  375. $replaceRoot: {
  376. newRoot: '$vectorStore',
  377. },
  378. },
  379. // Stage 6: Group by _id to remove duplicates
  380. {
  381. $group: {
  382. _id: '$_id',
  383. doc: { $first: '$$ROOT' },
  384. },
  385. },
  386. // Stage 7: Restore the document structure
  387. {
  388. $replaceRoot: {
  389. newRoot: '$doc',
  390. },
  391. },
  392. ];
  393. const vectorStoreRelations =
  394. await VectorStoreFileRelationModel.aggregate<VectorStoreDocument>(
  395. pipeline,
  396. );
  397. return vectorStoreRelations;
  398. }
  399. private async createVectorStore(name: string): Promise<VectorStoreDocument> {
  400. try {
  401. const newVectorStore = await this.client.createVectorStore(name);
  402. const newVectorStoreDocument = (await VectorStoreModel.create({
  403. vectorStoreId: newVectorStore.id,
  404. })) as VectorStoreDocument;
  405. return newVectorStoreDocument;
  406. } catch (err) {
  407. throw new Error(err);
  408. }
  409. }
  410. private async uploadFile(
  411. revisionBody: string,
  412. page: HydratedDocument<PageDocument>,
  413. ): Promise<OpenAI.Files.FileObject> {
  414. const siteUrl = configManager.getConfig('app:siteUrl');
  415. const convertedHtml = await convertMarkdownToHtml(revisionBody, {
  416. page,
  417. siteUrl,
  418. });
  419. const file = await toFile(Readable.from(convertedHtml), `${page._id}.html`);
  420. const uploadedFile = await this.client.uploadFile(file);
  421. return uploadedFile;
  422. }
  423. private async uploadFileForAttachment(
  424. fileName: string,
  425. readStream?: NodeJS.ReadableStream,
  426. filePath?: string,
  427. ): Promise<OpenAI.Files.FileObject> {
  428. const streamSource: NodeJS.ReadableStream = (() => {
  429. if (readStream != null) {
  430. return readStream;
  431. }
  432. if (filePath != null) {
  433. return fs.createReadStream(filePath);
  434. }
  435. throw new Error('readStream and filePath are both null');
  436. })();
  437. const uploadableFile = await toFile(streamSource, fileName);
  438. const uploadedFile = await this.client.uploadFile(uploadableFile);
  439. return uploadedFile;
  440. }
  441. async deleteVectorStore(vectorStoreRelationId: string): Promise<void> {
  442. const vectorStoreDocument: VectorStoreDocument | null =
  443. await VectorStoreModel.findOne({
  444. _id: vectorStoreRelationId,
  445. isDeleted: false,
  446. });
  447. if (vectorStoreDocument == null) {
  448. return;
  449. }
  450. try {
  451. const deleteVectorStoreResponse = await this.client.deleteVectorStore(
  452. vectorStoreDocument.vectorStoreId,
  453. );
  454. logger.debug('Delete vector store', deleteVectorStoreResponse);
  455. await vectorStoreDocument.markAsDeleted();
  456. } catch (err) {
  457. await openaiApiErrorHandler(err, {
  458. notFoundError: vectorStoreDocument.markAsDeleted,
  459. });
  460. throw new Error(err);
  461. }
  462. }
  463. private async createVectorStoreFileWithStreamForAttachment(
  464. pageId: Types.ObjectId,
  465. vectorStoreRelationId: Types.ObjectId,
  466. vectorStoreFileRelationsMap: VectorStoreFileRelationsMap,
  467. ): Promise<void> {
  468. const Attachment = mongoose.model<
  469. HydratedDocument<IAttachmentDocument>,
  470. IAttachmentModel
  471. >('Attachment');
  472. const attachmentsCursor = Attachment.find({ page: pageId }).cursor();
  473. const batchStream = createBatchStream(BATCH_SIZE);
  474. const uploadFileStreamForAttachment = new Writable({
  475. objectMode: true,
  476. write: async (
  477. attachments: HydratedDocument<IAttachmentDocument>[],
  478. _encoding,
  479. callback,
  480. ) => {
  481. for await (const attachment of attachments) {
  482. try {
  483. if (
  484. !isVectorStoreCompatible(
  485. attachment.originalName,
  486. attachment.fileFormat,
  487. )
  488. ) {
  489. continue;
  490. }
  491. const readStream =
  492. await this.crowi.fileUploadService.findDeliveryFile(attachment);
  493. const uploadedFileForAttachment =
  494. await this.uploadFileForAttachment(
  495. attachment.originalName,
  496. readStream,
  497. );
  498. prepareVectorStoreFileRelations(
  499. vectorStoreRelationId,
  500. pageId,
  501. uploadedFileForAttachment.id,
  502. vectorStoreFileRelationsMap,
  503. attachment._id,
  504. );
  505. } catch (err) {
  506. logger.error(err);
  507. }
  508. }
  509. callback();
  510. },
  511. final: (callback) => {
  512. logger.debug('Finished uploading attachments');
  513. callback();
  514. },
  515. });
  516. await pipeline(
  517. attachmentsCursor,
  518. batchStream,
  519. uploadFileStreamForAttachment,
  520. );
  521. }
  522. private async createVectorStoreFile(
  523. vectorStoreRelation: VectorStoreDocument,
  524. pages: Array<HydratedDocument<PageDocument>>,
  525. ignoreAttachments = false,
  526. ): Promise<void> {
  527. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  528. const processUploadFile = async (page: HydratedDocument<PageDocument>) => {
  529. if (page._id != null && page.revision != null) {
  530. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  531. const uploadedFile = await this.uploadFile(page.revision.body, page);
  532. prepareVectorStoreFileRelations(
  533. vectorStoreRelation._id,
  534. page._id,
  535. uploadedFile.id,
  536. vectorStoreFileRelationsMap,
  537. );
  538. if (!ignoreAttachments) {
  539. await this.createVectorStoreFileWithStreamForAttachment(
  540. page._id,
  541. vectorStoreRelation._id,
  542. vectorStoreFileRelationsMap,
  543. );
  544. }
  545. return;
  546. }
  547. const pagePopulatedToShowRevision =
  548. await page.populateDataToShowRevision();
  549. if (
  550. pagePopulatedToShowRevision.revision != null &&
  551. pagePopulatedToShowRevision.revision.body.length > 0
  552. ) {
  553. const uploadedFile = await this.uploadFile(
  554. pagePopulatedToShowRevision.revision.body,
  555. page,
  556. );
  557. prepareVectorStoreFileRelations(
  558. vectorStoreRelation._id,
  559. page._id,
  560. uploadedFile.id,
  561. vectorStoreFileRelationsMap,
  562. );
  563. if (!ignoreAttachments) {
  564. await this.createVectorStoreFileWithStreamForAttachment(
  565. page._id,
  566. vectorStoreRelation._id,
  567. vectorStoreFileRelationsMap,
  568. );
  569. }
  570. }
  571. }
  572. };
  573. // Start workers to process results
  574. const workers = pages.map(processUploadFile);
  575. // Wait for all processing to complete.
  576. assert(
  577. workers.length <= BATCH_SIZE,
  578. 'workers.length must be less than or equal to BATCH_SIZE',
  579. );
  580. const fileUploadResult = await Promise.allSettled(workers);
  581. fileUploadResult.forEach((result) => {
  582. if (result.status === 'rejected') {
  583. logger.error(result.reason);
  584. }
  585. });
  586. const vectorStoreFileRelations = Array.from(
  587. vectorStoreFileRelationsMap.values(),
  588. );
  589. const uploadedFileIds = vectorStoreFileRelations.flatMap(
  590. (data) => data.fileIds,
  591. );
  592. if (uploadedFileIds.length === 0) {
  593. return;
  594. }
  595. const pageIds = pages.map((page) => page._id);
  596. try {
  597. // Save vector store file relation
  598. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(
  599. vectorStoreFileRelations,
  600. );
  601. // Create vector store file
  602. const createVectorStoreFileBatchResponse =
  603. await this.client.createVectorStoreFileBatch(
  604. vectorStoreRelation.vectorStoreId,
  605. uploadedFileIds,
  606. );
  607. logger.debug(
  608. 'Create vector store file',
  609. createVectorStoreFileBatchResponse,
  610. );
  611. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  612. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  613. } catch (err) {
  614. logger.error(err);
  615. // Delete all uploaded files if createVectorStoreFileBatch fails
  616. for await (const pageId of pageIds) {
  617. await this.deleteVectorStoreFile(vectorStoreRelation._id, pageId);
  618. }
  619. }
  620. }
  621. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  622. async deleteObsoletedVectorStoreRelations(): Promise<void> {
  623. const deletedVectorStoreRelations = await VectorStoreModel.find({
  624. isDeleted: true,
  625. });
  626. if (deletedVectorStoreRelations.length === 0) {
  627. return;
  628. }
  629. const currentVectorStoreRelationIds: Types.ObjectId[] =
  630. await VectorStoreFileRelationModel.aggregate([
  631. {
  632. $group: {
  633. _id: '$vectorStoreRelationId',
  634. relationCount: { $sum: 1 },
  635. },
  636. },
  637. { $match: { relationCount: { $gt: 0 } } },
  638. { $project: { _id: 1 } },
  639. ]);
  640. if (currentVectorStoreRelationIds.length === 0) {
  641. return;
  642. }
  643. await VectorStoreModel.deleteMany({
  644. _id: { $nin: currentVectorStoreRelationIds },
  645. isDeleted: true,
  646. });
  647. }
  648. private async deleteVectorStoreFileForAttachment(
  649. vectorStoreFileRelation: VectorStoreFileRelation,
  650. ): Promise<void> {
  651. if (vectorStoreFileRelation.attachment == null) {
  652. return;
  653. }
  654. const deleteAllAttachmentVectorStoreFileRelations = async () => {
  655. await VectorStoreFileRelationModel.deleteMany({
  656. attachment: vectorStoreFileRelation.attachment,
  657. });
  658. };
  659. try {
  660. // Delete entities in VectorStoreFile
  661. const fileId = vectorStoreFileRelation.fileIds[0];
  662. const deleteFileResponse = await this.client.deleteFile(fileId);
  663. logger.debug(
  664. 'Delete vector store file (attachment) ',
  665. deleteFileResponse,
  666. );
  667. // Delete related VectorStoreFileRelation document
  668. const attachmentId = vectorStoreFileRelation.attachment;
  669. if (attachmentId != null) {
  670. await deleteAllAttachmentVectorStoreFileRelations();
  671. }
  672. } catch (err) {
  673. logger.error(err);
  674. await openaiApiErrorHandler(err, {
  675. notFoundError: () => deleteAllAttachmentVectorStoreFileRelations(),
  676. });
  677. }
  678. }
  679. private async deleteVectorStoreFile(
  680. vectorStoreRelationId: Types.ObjectId,
  681. pageId: Types.ObjectId,
  682. ignoreAttachments = false,
  683. apiCallInterval?: number,
  684. ): Promise<void> {
  685. if (!ignoreAttachments) {
  686. // Get all VectorStoreFIleDocument (attachments) associated with the page
  687. const vectorStoreFileRelationsForAttachment =
  688. await VectorStoreFileRelationModel.find({
  689. vectorStoreRelationId,
  690. page: pageId,
  691. attachment: { $exists: true },
  692. });
  693. if (vectorStoreFileRelationsForAttachment.length !== 0) {
  694. for await (const vectorStoreFileRelation of vectorStoreFileRelationsForAttachment) {
  695. try {
  696. await this.deleteVectorStoreFileForAttachment(
  697. vectorStoreFileRelation,
  698. );
  699. } catch (err) {
  700. logger.error(err);
  701. }
  702. }
  703. }
  704. }
  705. // Delete vector store file and delete vector store file relation
  706. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({
  707. vectorStoreRelationId,
  708. page: pageId,
  709. });
  710. if (vectorStoreFileRelation == null) {
  711. return;
  712. }
  713. const deletedFileIds: string[] = [];
  714. for await (const fileId of vectorStoreFileRelation.fileIds) {
  715. try {
  716. const deleteFileResponse = await this.client.deleteFile(fileId);
  717. logger.debug('Delete vector store file', deleteFileResponse);
  718. deletedFileIds.push(fileId);
  719. if (apiCallInterval != null) {
  720. // sleep
  721. await new Promise((resolve) => setTimeout(resolve, apiCallInterval));
  722. }
  723. } catch (err) {
  724. await openaiApiErrorHandler(err, {
  725. notFoundError: async () => {
  726. deletedFileIds.push(fileId);
  727. },
  728. });
  729. logger.error(err);
  730. }
  731. }
  732. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(
  733. (fileId) => !deletedFileIds.includes(fileId),
  734. );
  735. if (undeletedFileIds.length === 0) {
  736. await vectorStoreFileRelation.remove();
  737. return;
  738. }
  739. vectorStoreFileRelation.fileIds = undeletedFileIds;
  740. await vectorStoreFileRelation.save();
  741. }
  742. async deleteVectorStoreFilesByPageIds(
  743. pageIds: Types.ObjectId[],
  744. ): Promise<void> {
  745. const vectorStoreRelations =
  746. await this.getVectorStoreRelationsByPageIds(pageIds);
  747. if (vectorStoreRelations != null && vectorStoreRelations.length !== 0) {
  748. for await (const pageId of pageIds) {
  749. const deleteVectorStoreFilePromises = vectorStoreRelations.map(
  750. (vectorStoreRelation) =>
  751. this.deleteVectorStoreFile(vectorStoreRelation._id, pageId),
  752. );
  753. await Promise.allSettled(deleteVectorStoreFilePromises);
  754. }
  755. }
  756. }
  757. async deleteObsoleteVectorStoreFile(
  758. limit: number,
  759. apiCallInterval: number,
  760. ): Promise<void> {
  761. // Retrieves all VectorStore documents that are marked as deleted
  762. const deletedVectorStoreRelations = await VectorStoreModel.find({
  763. isDeleted: true,
  764. });
  765. if (deletedVectorStoreRelations.length === 0) {
  766. return;
  767. }
  768. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  769. const obsoleteVectorStoreFileRelations =
  770. await VectorStoreFileRelationModel.find({
  771. vectorStoreRelationId: {
  772. $in: deletedVectorStoreRelations.map(
  773. (deletedVectorStoreRelation) => deletedVectorStoreRelation._id,
  774. ),
  775. },
  776. }).limit(limit);
  777. if (obsoleteVectorStoreFileRelations.length === 0) {
  778. return;
  779. }
  780. // Delete obsolete VectorStoreFile
  781. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  782. try {
  783. await this.deleteVectorStoreFile(
  784. vectorStoreFileRelation.vectorStoreRelationId,
  785. vectorStoreFileRelation.page,
  786. false,
  787. apiCallInterval,
  788. );
  789. } catch (err) {
  790. logger.error(err);
  791. }
  792. }
  793. }
  794. private async deleteVectorStoreFileOnDeleteAttachment(attachmentId: string) {
  795. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({
  796. attachment: attachmentId,
  797. });
  798. if (vectorStoreFileRelation == null) {
  799. return;
  800. }
  801. try {
  802. await this.deleteVectorStoreFileForAttachment(vectorStoreFileRelation);
  803. } catch (err) {
  804. logger.error(err);
  805. }
  806. }
  807. private async filterPagesByAccessScope(
  808. aiAssistant: AiAssistantDocument,
  809. pages: HydratedDocument<PageDocument>[],
  810. ) {
  811. const isPublicPage = (page: HydratedDocument<PageDocument>) =>
  812. page.grant === PageGrant.GRANT_PUBLIC;
  813. const isUserGroupAccessible = (
  814. page: HydratedDocument<PageDocument>,
  815. ownerUserGroupIds: string[],
  816. ) => {
  817. if (page.grant !== PageGrant.GRANT_USER_GROUP) return false;
  818. return page.grantedGroups.some((group) =>
  819. ownerUserGroupIds.includes(getIdStringForRef(group.item)),
  820. );
  821. };
  822. const isOwnerAccessible = (
  823. page: HydratedDocument<PageDocument>,
  824. ownerId: Ref<IUser>,
  825. ) => {
  826. if (page.grant !== PageGrant.GRANT_OWNER) return false;
  827. return page.grantedUsers.some(
  828. (user) => getIdStringForRef(user) === getIdStringForRef(ownerId),
  829. );
  830. };
  831. const getOwnerUserGroupIds = async (owner: Ref<IUser>) => {
  832. const userGroups =
  833. await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  834. const externalGroups =
  835. await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  836. return [...userGroups, ...externalGroups].map((group) =>
  837. getIdStringForRef(group),
  838. );
  839. };
  840. switch (aiAssistant.accessScope) {
  841. case AiAssistantAccessScope.PUBLIC_ONLY:
  842. return pages.filter(isPublicPage);
  843. case AiAssistantAccessScope.GROUPS: {
  844. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  845. return pages.filter(
  846. (page) =>
  847. isPublicPage(page) ||
  848. isUserGroupAccessible(page, ownerUserGroupIds),
  849. );
  850. }
  851. case AiAssistantAccessScope.OWNER: {
  852. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  853. return pages.filter(
  854. (page) =>
  855. isPublicPage(page) ||
  856. isOwnerAccessible(page, aiAssistant.owner) ||
  857. isUserGroupAccessible(page, ownerUserGroupIds),
  858. );
  859. }
  860. default:
  861. return [];
  862. }
  863. }
  864. async createVectorStoreFileOnPageCreate(
  865. pages: HydratedDocument<PageDocument>[],
  866. ): Promise<void> {
  867. const pagePaths = pages.map((page) => page.path);
  868. const aiAssistants = await this.findAiAssistantByPagePath(pagePaths, {
  869. shouldPopulateOwner: true,
  870. shouldPopulateVectorStore: true,
  871. });
  872. if (aiAssistants.length === 0) {
  873. return;
  874. }
  875. for await (const aiAssistant of aiAssistants) {
  876. if (!isPopulated(aiAssistant.owner)) {
  877. continue;
  878. }
  879. const isLearnablePageLimitExceeded =
  880. await this.isLearnablePageLimitExceeded(
  881. aiAssistant.owner,
  882. aiAssistant.pagePathPatterns,
  883. );
  884. if (isLearnablePageLimitExceeded) {
  885. continue;
  886. }
  887. const pagesToVectorize = await this.filterPagesByAccessScope(
  888. aiAssistant,
  889. pages,
  890. );
  891. const vectorStoreRelation = aiAssistant.vectorStore;
  892. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  893. continue;
  894. }
  895. logger.debug('--------- createVectorStoreFileOnPageCreate ---------');
  896. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  897. logger.debug(
  898. 'VectorStoreFile pagePath to be created: ',
  899. pagesToVectorize.map((page) => page.path),
  900. );
  901. logger.debug('-----------------------------------------------------');
  902. await this.createVectorStoreFile(
  903. vectorStoreRelation as VectorStoreDocument,
  904. pagesToVectorize,
  905. );
  906. }
  907. }
  908. async updateVectorStoreFileOnPageUpdate(
  909. page: HydratedDocument<PageDocument>,
  910. ) {
  911. const aiAssistants = await this.findAiAssistantByPagePath([page.path], {
  912. shouldPopulateVectorStore: true,
  913. });
  914. if (aiAssistants.length === 0) {
  915. return;
  916. }
  917. for await (const aiAssistant of aiAssistants) {
  918. const pagesToVectorize = await this.filterPagesByAccessScope(
  919. aiAssistant,
  920. [page],
  921. );
  922. const vectorStoreRelation = aiAssistant.vectorStore;
  923. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  924. continue;
  925. }
  926. logger.debug('---------- updateVectorStoreOnPageUpdate ------------');
  927. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  928. logger.debug('PagePath of VectorStoreFile to be deleted: ', page.path);
  929. logger.debug(
  930. 'pagePath of VectorStoreFile to be created: ',
  931. pagesToVectorize.map((page) => page.path),
  932. );
  933. logger.debug('-----------------------------------------------------');
  934. // Do not create a new VectorStoreFile if page is changed to a permission that AiAssistant does not have access to
  935. await this.deleteVectorStoreFile(
  936. (vectorStoreRelation as VectorStoreDocument)._id,
  937. page._id,
  938. true, // ignoreAttachments = true
  939. );
  940. await this.createVectorStoreFile(
  941. vectorStoreRelation as VectorStoreDocument,
  942. pagesToVectorize,
  943. true, // ignoreAttachments = true
  944. );
  945. }
  946. }
  947. private async createVectorStoreFileOnUploadAttachment(
  948. pageId: string,
  949. attachment: HydratedDocument<IAttachmentDocument>,
  950. file: Express.Multer.File,
  951. ): Promise<void> {
  952. if (!isVectorStoreCompatible(file.originalname, file.mimetype)) {
  953. return;
  954. }
  955. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>(
  956. 'Page',
  957. );
  958. const page = await Page.findById(pageId);
  959. if (page == null) {
  960. return;
  961. }
  962. const aiAssistants = await this.findAiAssistantByPagePath([page.path], {
  963. shouldPopulateVectorStore: true,
  964. });
  965. if (aiAssistants.length === 0) {
  966. return;
  967. }
  968. const uploadedFile = await this.uploadFileForAttachment(
  969. file.originalname,
  970. undefined,
  971. file.path,
  972. );
  973. logger.debug('Uploaded file', uploadedFile);
  974. for await (const aiAssistant of aiAssistants) {
  975. const pagesToVectorize = await this.filterPagesByAccessScope(
  976. aiAssistant,
  977. [page],
  978. );
  979. if (pagesToVectorize.length === 0) {
  980. continue;
  981. }
  982. const vectorStoreRelation = aiAssistant.vectorStore;
  983. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  984. continue;
  985. }
  986. await this.client.createVectorStoreFile(
  987. vectorStoreRelation.vectorStoreId,
  988. uploadedFile.id,
  989. );
  990. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap =
  991. new Map();
  992. prepareVectorStoreFileRelations(
  993. vectorStoreRelation._id as Types.ObjectId,
  994. page._id,
  995. uploadedFile.id,
  996. vectorStoreFileRelationsMap,
  997. attachment._id,
  998. );
  999. const vectorStoreFileRelations = Array.from(
  1000. vectorStoreFileRelationsMap.values(),
  1001. );
  1002. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(
  1003. vectorStoreFileRelations,
  1004. );
  1005. }
  1006. }
  1007. private async createVectorStoreFileWithStream(
  1008. vectorStoreRelation: VectorStoreDocument,
  1009. conditions: mongoose.FilterQuery<PageDocument>,
  1010. ): Promise<void> {
  1011. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>(
  1012. 'Page',
  1013. );
  1014. const pagesStream = Page.find({ ...conditions })
  1015. .populate('revision')
  1016. .cursor({ batchSize: BATCH_SIZE });
  1017. const batchStream = createBatchStream(BATCH_SIZE);
  1018. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  1019. const createVectorStoreFileStream = new Transform({
  1020. objectMode: true,
  1021. async transform(
  1022. chunk: HydratedDocument<PageDocument>[],
  1023. encoding,
  1024. callback,
  1025. ) {
  1026. try {
  1027. logger.debug(
  1028. 'Target page path for VectorStoreFile generation: ',
  1029. chunk.map((page) => page.path),
  1030. );
  1031. await createVectorStoreFile(vectorStoreRelation, chunk);
  1032. this.push(chunk);
  1033. callback();
  1034. } catch (error) {
  1035. callback(error);
  1036. }
  1037. },
  1038. });
  1039. await pipeline(pagesStream, batchStream, createVectorStoreFileStream);
  1040. }
  1041. private async createConditionForCreateVectorStoreFile(
  1042. owner: AiAssistant['owner'],
  1043. accessScope: AiAssistant['accessScope'],
  1044. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  1045. pagePathPatterns: AiAssistant['pagePathPatterns'],
  1046. ): Promise<mongoose.FilterQuery<PageDocument>> {
  1047. const convertedPagePathPatterns =
  1048. convertPathPatternsToRegExp(pagePathPatterns);
  1049. // Include pages in search targets when their paths with 'Anyone with the link' permission are directly specified instead of using glob pattern
  1050. const nonGrabPagePathPatterns = pagePathPatterns.filter(
  1051. (pagePathPattern) => !isGlobPatternPath(pagePathPattern),
  1052. );
  1053. const baseCondition: mongoose.FilterQuery<PageDocument> = {
  1054. grant: PageGrant.GRANT_RESTRICTED,
  1055. path: { $in: nonGrabPagePathPatterns },
  1056. };
  1057. if (accessScope === AiAssistantAccessScope.PUBLIC_ONLY) {
  1058. return {
  1059. $or: [
  1060. baseCondition,
  1061. {
  1062. grant: PageGrant.GRANT_PUBLIC,
  1063. path: { $in: convertedPagePathPatterns },
  1064. },
  1065. ],
  1066. };
  1067. }
  1068. if (accessScope === AiAssistantAccessScope.GROUPS) {
  1069. if (
  1070. grantedGroupsForAccessScope == null ||
  1071. grantedGroupsForAccessScope.length === 0
  1072. ) {
  1073. throw new Error('grantedGroups is required when accessScope is GROUPS');
  1074. }
  1075. const extractedGrantedGroupIdsForAccessScope =
  1076. grantedGroupsForAccessScope.map((group) =>
  1077. getIdForRef(group.item).toString(),
  1078. );
  1079. return {
  1080. $or: [
  1081. baseCondition,
  1082. {
  1083. grant: {
  1084. $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP],
  1085. },
  1086. path: { $in: convertedPagePathPatterns },
  1087. $or: [
  1088. {
  1089. 'grantedGroups.item': {
  1090. $in: extractedGrantedGroupIdsForAccessScope,
  1091. },
  1092. },
  1093. { grant: PageGrant.GRANT_PUBLIC },
  1094. ],
  1095. },
  1096. ],
  1097. };
  1098. }
  1099. if (accessScope === AiAssistantAccessScope.OWNER) {
  1100. const ownerUserGroups = [
  1101. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  1102. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
  1103. owner,
  1104. )),
  1105. ].map((group) => group.toString());
  1106. return {
  1107. $or: [
  1108. baseCondition,
  1109. {
  1110. grant: {
  1111. $in: [
  1112. PageGrant.GRANT_PUBLIC,
  1113. PageGrant.GRANT_USER_GROUP,
  1114. PageGrant.GRANT_OWNER,
  1115. ],
  1116. },
  1117. path: { $in: convertedPagePathPatterns },
  1118. $or: [
  1119. { 'grantedGroups.item': { $in: ownerUserGroups } },
  1120. { grantedUsers: { $in: [getIdForRef(owner)] } },
  1121. { grant: PageGrant.GRANT_PUBLIC },
  1122. ],
  1123. },
  1124. ],
  1125. };
  1126. }
  1127. throw new Error('Invalid accessScope value');
  1128. }
  1129. private async validateGrantedUserGroupsForAiAssistant(
  1130. owner: AiAssistant['owner'],
  1131. shareScope: AiAssistant['shareScope'],
  1132. accessScope: AiAssistant['accessScope'],
  1133. grantedGroupsForShareScope: AiAssistant['grantedGroupsForShareScope'],
  1134. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  1135. ) {
  1136. // Check if grantedGroupsForShareScope is not specified when shareScope is not a “group”
  1137. if (
  1138. shareScope !== AiAssistantShareScope.GROUPS &&
  1139. grantedGroupsForShareScope != null
  1140. ) {
  1141. throw new Error(
  1142. 'grantedGroupsForShareScope is specified when shareScope is not “groups”.',
  1143. );
  1144. }
  1145. // Check if grantedGroupsForAccessScope is not specified when accessScope is not a “group”
  1146. if (
  1147. accessScope !== AiAssistantAccessScope.GROUPS &&
  1148. grantedGroupsForAccessScope != null
  1149. ) {
  1150. throw new Error(
  1151. 'grantedGroupsForAccessScope is specified when accsessScope is not “groups”.',
  1152. );
  1153. }
  1154. const ownerUserGroupIds = [
  1155. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  1156. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
  1157. owner,
  1158. )),
  1159. ].map((group) => group.toString());
  1160. // Check if the owner belongs to the group specified in grantedGroupsForShareScope
  1161. if (
  1162. grantedGroupsForShareScope != null &&
  1163. grantedGroupsForShareScope.length > 0
  1164. ) {
  1165. const extractedGrantedGroupIdsForShareScope =
  1166. grantedGroupsForShareScope.map((group) =>
  1167. getIdForRef(group.item).toString(),
  1168. );
  1169. const isValid = extractedGrantedGroupIdsForShareScope.every((groupId) =>
  1170. ownerUserGroupIds.includes(groupId),
  1171. );
  1172. if (!isValid) {
  1173. throw new Error(
  1174. 'A userGroup to which the owner does not belong is specified in grantedGroupsForShareScope',
  1175. );
  1176. }
  1177. }
  1178. // Check if the owner belongs to the group specified in grantedGroupsForAccessScope
  1179. if (
  1180. grantedGroupsForAccessScope != null &&
  1181. grantedGroupsForAccessScope.length > 0
  1182. ) {
  1183. const extractedGrantedGroupIdsForAccessScope =
  1184. grantedGroupsForAccessScope.map((group) =>
  1185. getIdForRef(group.item).toString(),
  1186. );
  1187. const isValid = extractedGrantedGroupIdsForAccessScope.every((groupId) =>
  1188. ownerUserGroupIds.includes(groupId),
  1189. );
  1190. if (!isValid) {
  1191. throw new Error(
  1192. 'A userGroup to which the owner does not belong is specified in grantedGroupsForAccessScope',
  1193. );
  1194. }
  1195. }
  1196. }
  1197. async isAiAssistantUsable(
  1198. aiAssistantId: string,
  1199. user: IUserHasId,
  1200. ): Promise<boolean> {
  1201. const aiAssistant = await AiAssistantModel.findOne({
  1202. _id: { $eq: aiAssistantId },
  1203. });
  1204. if (aiAssistant == null) {
  1205. throw createError(404, 'AiAssistant document does not exist');
  1206. }
  1207. const isOwner =
  1208. getIdStringForRef(aiAssistant.owner) === getIdStringForRef(user._id);
  1209. if (aiAssistant.shareScope === AiAssistantShareScope.PUBLIC_ONLY) {
  1210. return true;
  1211. }
  1212. if (aiAssistant.shareScope === AiAssistantShareScope.OWNER && isOwner) {
  1213. return true;
  1214. }
  1215. if (
  1216. aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE &&
  1217. aiAssistant.accessScope === AiAssistantAccessScope.OWNER &&
  1218. isOwner
  1219. ) {
  1220. return true;
  1221. }
  1222. if (
  1223. aiAssistant.shareScope === AiAssistantShareScope.GROUPS ||
  1224. (aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE &&
  1225. aiAssistant.accessScope === AiAssistantAccessScope.GROUPS)
  1226. ) {
  1227. const userGroupIds = [
  1228. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  1229. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
  1230. user,
  1231. )),
  1232. ].map((group) => group.toString());
  1233. const grantedGroupIdsForShareScope =
  1234. aiAssistant.grantedGroupsForShareScope?.map((group) =>
  1235. getIdStringForRef(group.item),
  1236. ) ?? [];
  1237. const isShared = userGroupIds.some((userGroupId) =>
  1238. grantedGroupIdsForShareScope.includes(userGroupId),
  1239. );
  1240. return isShared;
  1241. }
  1242. return false;
  1243. }
  1244. async createAiAssistant(
  1245. data: UpsertAiAssistantData,
  1246. user: IUserHasId,
  1247. ): Promise<AiAssistantDocument> {
  1248. await this.validateGrantedUserGroupsForAiAssistant(
  1249. user,
  1250. data.shareScope,
  1251. data.accessScope,
  1252. data.grantedGroupsForShareScope,
  1253. data.grantedGroupsForAccessScope,
  1254. );
  1255. const conditions = await this.createConditionForCreateVectorStoreFile(
  1256. user,
  1257. data.accessScope,
  1258. data.grantedGroupsForAccessScope,
  1259. data.pagePathPatterns,
  1260. );
  1261. const vectorStoreRelation = await this.createVectorStore(data.name);
  1262. const aiAssistant = await AiAssistantModel.create({
  1263. ...data,
  1264. owner: user,
  1265. vectorStore: vectorStoreRelation,
  1266. });
  1267. // VectorStore creation process does not await
  1268. this.createVectorStoreFileWithStream(vectorStoreRelation, conditions);
  1269. return aiAssistant;
  1270. }
  1271. async updateAiAssistant(
  1272. aiAssistantId: string,
  1273. data: UpsertAiAssistantData,
  1274. user: IUserHasId,
  1275. ): Promise<AiAssistantDocument> {
  1276. const aiAssistant = await AiAssistantModel.findOne({
  1277. owner: user,
  1278. _id: aiAssistantId,
  1279. });
  1280. if (aiAssistant == null) {
  1281. throw createError(404, 'AiAssistant document does not exist');
  1282. }
  1283. await this.validateGrantedUserGroupsForAiAssistant(
  1284. user,
  1285. data.shareScope,
  1286. data.accessScope,
  1287. data.grantedGroupsForShareScope,
  1288. data.grantedGroupsForAccessScope,
  1289. );
  1290. const grantedGroupIdsForAccessScopeFromReq =
  1291. data.grantedGroupsForAccessScope?.map((group) =>
  1292. getIdStringForRef(group.item),
  1293. ) ?? []; // ObjectId[] -> string[]
  1294. const grantedGroupIdsForAccessScopeFromDb =
  1295. aiAssistant.grantedGroupsForAccessScope?.map((group) =>
  1296. getIdStringForRef(group.item),
  1297. ) ?? []; // ObjectId[] -> string[]
  1298. // If accessScope, pagePathPatterns, grantedGroupsForAccessScope have not changed, do not build VectorStore
  1299. const shouldRebuildVectorStore =
  1300. data.accessScope !== aiAssistant.accessScope ||
  1301. !isDeepEquals(data.pagePathPatterns, aiAssistant.pagePathPatterns) ||
  1302. !isDeepEquals(
  1303. grantedGroupIdsForAccessScopeFromReq,
  1304. grantedGroupIdsForAccessScopeFromDb,
  1305. );
  1306. let newVectorStoreRelation: VectorStoreDocument | undefined;
  1307. if (shouldRebuildVectorStore) {
  1308. const conditions = await this.createConditionForCreateVectorStoreFile(
  1309. user,
  1310. data.accessScope,
  1311. data.grantedGroupsForAccessScope,
  1312. data.pagePathPatterns,
  1313. );
  1314. // Delete obsoleted VectorStore
  1315. const obsoletedVectorStoreRelationId = getIdStringForRef(
  1316. aiAssistant.vectorStore,
  1317. );
  1318. await this.deleteVectorStore(obsoletedVectorStoreRelationId);
  1319. newVectorStoreRelation = await this.createVectorStore(data.name);
  1320. this.updateThreads(aiAssistantId, newVectorStoreRelation.vectorStoreId);
  1321. // VectorStore creation process does not await
  1322. this.createVectorStoreFileWithStream(newVectorStoreRelation, conditions);
  1323. }
  1324. const newData = {
  1325. ...data,
  1326. vectorStore: newVectorStoreRelation ?? aiAssistant.vectorStore,
  1327. };
  1328. aiAssistant.set({ ...newData });
  1329. let updatedAiAssistant: AiAssistantDocument = await aiAssistant.save();
  1330. if (
  1331. data.shareScope !== AiAssistantShareScope.PUBLIC_ONLY &&
  1332. aiAssistant.isDefault
  1333. ) {
  1334. updatedAiAssistant = await AiAssistantModel.setDefault(
  1335. aiAssistant._id,
  1336. false,
  1337. );
  1338. }
  1339. return updatedAiAssistant;
  1340. }
  1341. async getAccessibleAiAssistants(
  1342. user: IUserHasId,
  1343. ): Promise<AccessibleAiAssistants> {
  1344. const userGroupIds = [
  1345. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  1346. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
  1347. user,
  1348. )),
  1349. ];
  1350. const assistants = await AiAssistantModel.find({
  1351. $or: [
  1352. // Case 1: Assistants owned by the user
  1353. { owner: user },
  1354. // Case 2: Public assistants owned by others
  1355. {
  1356. $and: [
  1357. { owner: { $ne: user } },
  1358. { shareScope: AiAssistantShareScope.PUBLIC_ONLY },
  1359. ],
  1360. },
  1361. // Case 3: Group-restricted assistants where user is in granted groups
  1362. {
  1363. $and: [
  1364. { owner: { $ne: user } },
  1365. { shareScope: AiAssistantShareScope.GROUPS },
  1366. { 'grantedGroupsForShareScope.item': { $in: userGroupIds } },
  1367. ],
  1368. },
  1369. ],
  1370. })
  1371. .populate('grantedGroupsForShareScope.item')
  1372. .populate('grantedGroupsForAccessScope.item');
  1373. return {
  1374. myAiAssistants:
  1375. assistants.filter(
  1376. (assistant) => assistant.owner.toString() === user._id.toString(),
  1377. ) ?? [],
  1378. teamAiAssistants:
  1379. assistants.filter(
  1380. (assistant) => assistant.owner.toString() !== user._id.toString(),
  1381. ) ?? [],
  1382. };
  1383. }
  1384. async isLearnablePageLimitExceeded(
  1385. user: IUserHasId,
  1386. pagePathPatterns: string[],
  1387. ): Promise<boolean> {
  1388. const normalizedPagePathPatterns = removeGlobPath(pagePathPatterns);
  1389. const PageModel = mongoose.model<IPage, PageModel>('Page');
  1390. const pagePathsWithDescendantCount = await PageModel.descendantCountByPaths(
  1391. normalizedPagePathPatterns,
  1392. user,
  1393. null,
  1394. true,
  1395. true,
  1396. );
  1397. const totalPageCount = pagePathsWithDescendantCount.reduce(
  1398. (total, pagePathWithDescendantCount) => {
  1399. const descendantCount = pagePathPatterns.includes(
  1400. pagePathWithDescendantCount.path,
  1401. )
  1402. ? 0 // Treat as single page when included in "pagePathPatterns"
  1403. : pagePathWithDescendantCount.descendantCount;
  1404. const pageCount = descendantCount + 1;
  1405. return total + pageCount;
  1406. },
  1407. 0,
  1408. );
  1409. logger.debug('TotalPageCount: ', totalPageCount);
  1410. const limitLearnablePageCountPerAssistant = configManager.getConfig(
  1411. 'openai:limitLearnablePageCountPerAssistant',
  1412. );
  1413. return totalPageCount > limitLearnablePageCountPerAssistant;
  1414. }
  1415. private async findAiAssistantByPagePath(
  1416. pagePaths: string[],
  1417. options?: {
  1418. shouldPopulateOwner?: boolean;
  1419. shouldPopulateVectorStore?: boolean;
  1420. },
  1421. ): Promise<AiAssistantDocument[]> {
  1422. const pagePathsWithGlobPattern = pagePaths.flatMap((pagePath) =>
  1423. generateGlobPatterns(pagePath),
  1424. );
  1425. const query = AiAssistantModel.find({
  1426. $or: [
  1427. // Case 1: Exact match
  1428. { pagePathPatterns: { $in: pagePaths } },
  1429. // Case 2: Glob pattern match
  1430. { pagePathPatterns: { $in: pagePathsWithGlobPattern } },
  1431. ],
  1432. });
  1433. if (options?.shouldPopulateOwner) {
  1434. query.populate('owner');
  1435. }
  1436. if (options?.shouldPopulateVectorStore) {
  1437. query.populate('vectorStore');
  1438. }
  1439. const aiAssistants = await query.exec();
  1440. return aiAssistants;
  1441. }
  1442. }
  1443. let instance: OpenaiService;
  1444. export const initializeOpenaiService = (crowi: Crowi): void => {
  1445. const aiEnabled = configManager.getConfig('app:aiEnabled');
  1446. const openaiServiceType = configManager.getConfig('openai:serviceType');
  1447. if (
  1448. aiEnabled &&
  1449. openaiServiceType != null &&
  1450. OpenaiServiceTypes.includes(openaiServiceType)
  1451. ) {
  1452. instance = new OpenaiService(crowi);
  1453. }
  1454. };
  1455. export const getOpenaiService = (): IOpenaiService | undefined => {
  1456. if (instance != null) {
  1457. return instance;
  1458. }
  1459. return;
  1460. };