openai.ts 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649
  1. /// <reference types="multer" />
  2. import assert from 'node:assert';
  3. import fs from 'node:fs';
  4. import { Readable, Transform, Writable } from 'node:stream';
  5. import { pipeline } from 'node:stream/promises';
  6. import type { IPage, IUser, Lang, Nullable, Ref } from '@growi/core';
  7. import {
  8. getIdForRef,
  9. getIdStringForRef,
  10. type IUserHasId,
  11. isPopulated,
  12. PageGrant,
  13. } from '@growi/core';
  14. import { deepEquals } from '@growi/core/dist/utils';
  15. import { isGlobPatternPath } from '@growi/core/dist/utils/page-path-utils';
  16. import escapeStringRegexp from 'escape-string-regexp';
  17. import createError from 'http-errors';
  18. import mongoose, { type HydratedDocument, type Types } from 'mongoose';
  19. import type { OpenAI } from 'openai';
  20. import { toFile } from 'openai';
  21. import type { ChatCompletionChunk } from 'openai/resources/chat/completions';
  22. import ExternalUserGroupRelation from '~/features/external-user-group/server/models/external-user-group-relation';
  23. import ThreadRelationModel, {
  24. type ThreadRelationDocument,
  25. } from '~/features/openai/server/models/thread-relation';
  26. import VectorStoreModel, {
  27. type VectorStoreDocument,
  28. } from '~/features/openai/server/models/vector-store';
  29. import VectorStoreFileRelationModel, {
  30. prepareVectorStoreFileRelations,
  31. type VectorStoreFileRelation,
  32. } from '~/features/openai/server/models/vector-store-file-relation';
  33. import type Crowi from '~/server/crowi';
  34. import type {
  35. IAttachmentDocument,
  36. IAttachmentModel,
  37. } from '~/server/models/attachment';
  38. import type { PageDocument, PageModel } from '~/server/models/page';
  39. import UserGroupRelation from '~/server/models/user-group-relation';
  40. import { configManager } from '~/server/service/config-manager';
  41. import { createBatchStream } from '~/server/util/batch-stream';
  42. import loggerFactory from '~/utils/logger';
  43. import { OpenaiServiceTypes } from '../../interfaces/ai';
  44. import type { UpsertAiAssistantData } from '../../interfaces/ai-assistant';
  45. import {
  46. type AccessibleAiAssistants,
  47. type AiAssistant,
  48. AiAssistantAccessScope,
  49. AiAssistantShareScope,
  50. } from '../../interfaces/ai-assistant';
  51. import type { MessageListParams } from '../../interfaces/message';
  52. import { ThreadType } from '../../interfaces/thread-relation';
  53. import type { IVectorStore } from '../../interfaces/vector-store';
  54. import { removeGlobPath } from '../../utils/remove-glob-path';
  55. import AiAssistantModel, {
  56. type AiAssistantDocument,
  57. } from '../models/ai-assistant';
  58. import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
  59. import { generateGlobPatterns } from '../utils/generate-glob-patterns';
  60. import { isVectorStoreCompatible } from '../utils/is-vector-store-compatible';
  61. import { getClient, isStreamResponse } from './client-delegator';
  62. import { openaiApiErrorHandler } from './openai-api-error-handler';
  63. import { replaceAnnotationWithPageLink } from './replace-annotation-with-page-link';
  64. const { isDeepEquals } = deepEquals;
  65. const BATCH_SIZE = 100;
  66. const logger = loggerFactory('growi:service:openai');
  67. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>;
  68. const convertPathPatternsToRegExp = (
  69. pagePathPatterns: string[],
  70. ): Array<string | RegExp> => {
  71. return pagePathPatterns.map((pagePathPattern) => {
  72. if (isGlobPatternPath(pagePathPattern)) {
  73. const trimedPagePathPattern = pagePathPattern.replace('/*', '');
  74. const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
  75. // https://regex101.com/r/x5KIZL/1
  76. return new RegExp(`^${escapedPagePathPattern}($|/)`);
  77. }
  78. return pagePathPattern;
  79. });
  80. };
  81. export interface IOpenaiService {
  82. generateAndProcessPreMessage(
  83. message: string,
  84. chunkProcessor: (chunk: ChatCompletionChunk) => void,
  85. ): Promise<void>;
  86. createThread(
  87. userId: string,
  88. type: ThreadType,
  89. aiAssistantId?: string,
  90. initialUserMessage?: string,
  91. ): Promise<ThreadRelationDocument>;
  92. getThreadsByAiAssistantId(
  93. aiAssistantId: string,
  94. userId?: string,
  95. ): Promise<ThreadRelationDocument[]>;
  96. deleteThread(threadRelationId: string): Promise<ThreadRelationDocument>;
  97. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  98. deleteObsoletedVectorStoreRelations(): Promise<void>; // for CronJob
  99. deleteVectorStore(vectorStoreRelationId: string): Promise<void>;
  100. getMessageData(
  101. threadId: string,
  102. lang?: Lang,
  103. options?: MessageListParams,
  104. ): Promise<OpenAI.Beta.Threads.Messages.MessagesPage>;
  105. createVectorStoreFileOnPageCreate(pages: PageDocument[]): Promise<void>;
  106. updateVectorStoreFileOnPageUpdate(
  107. page: HydratedDocument<PageDocument>,
  108. ): Promise<void>;
  109. deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise<void>;
  110. deleteObsoleteVectorStoreFile(
  111. limit: number,
  112. apiCallInterval: number,
  113. ): Promise<void>; // for CronJob
  114. isAiAssistantUsable(
  115. aiAssistantId: string,
  116. user: IUserHasId,
  117. ): Promise<boolean>;
  118. createAiAssistant(
  119. data: UpsertAiAssistantData,
  120. user: IUserHasId,
  121. ): Promise<AiAssistantDocument>;
  122. updateAiAssistant(
  123. aiAssistantId: string,
  124. data: UpsertAiAssistantData,
  125. user: IUserHasId,
  126. ): Promise<AiAssistantDocument>;
  127. getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants>;
  128. isLearnablePageLimitExceeded(
  129. user: IUserHasId,
  130. pagePathPatterns: string[],
  131. ): Promise<boolean>;
  132. }
  133. class OpenaiService implements IOpenaiService {
  134. private crowi: Crowi;
  135. constructor(crowi: Crowi) {
  136. this.crowi = crowi;
  137. this.createVectorStoreFileOnUploadAttachment =
  138. this.createVectorStoreFileOnUploadAttachment.bind(this);
  139. crowi.attachmentService.addAttachHandler(
  140. this.createVectorStoreFileOnUploadAttachment,
  141. );
  142. this.deleteVectorStoreFileOnDeleteAttachment =
  143. this.deleteVectorStoreFileOnDeleteAttachment.bind(this);
  144. crowi.attachmentService.addDetachHandler(
  145. this.deleteVectorStoreFileOnDeleteAttachment,
  146. );
  147. }
  148. private get client() {
  149. const openaiServiceType = configManager.getConfig('openai:serviceType');
  150. return getClient({ openaiServiceType });
  151. }
  152. async generateAndProcessPreMessage(
  153. message: string,
  154. chunkProcessor: (delta: ChatCompletionChunk) => void,
  155. ): Promise<void> {
  156. const systemMessage = [
  157. "Generate a message briefly confirming the user's question.",
  158. 'Please generate up to 20 characters',
  159. ].join('');
  160. const preMessageCompletion = await this.client.chatCompletion({
  161. stream: true,
  162. model: 'gpt-4.1-nano',
  163. messages: [
  164. {
  165. role: 'system',
  166. content: systemMessage,
  167. },
  168. {
  169. role: 'user',
  170. content: message,
  171. },
  172. ],
  173. });
  174. if (!isStreamResponse(preMessageCompletion)) {
  175. return;
  176. }
  177. for await (const chunk of preMessageCompletion) {
  178. chunkProcessor(chunk);
  179. }
  180. }
  181. private async generateThreadTitle(
  182. message: string,
  183. ): Promise<Nullable<string>> {
  184. const systemMessage = [
  185. 'Create a brief title (max 5 words) from your message.',
  186. 'Respond in the same language the user uses in their input.',
  187. 'Response should only contain the title.',
  188. ].join('');
  189. const threadTitleCompletion = await this.client.chatCompletion({
  190. model: 'gpt-4.1-nano',
  191. messages: [
  192. {
  193. role: 'system',
  194. content: systemMessage,
  195. },
  196. {
  197. role: 'user',
  198. content: message,
  199. },
  200. ],
  201. });
  202. if (!isStreamResponse(threadTitleCompletion)) {
  203. const threadTitle = threadTitleCompletion.choices[0].message.content;
  204. return threadTitle;
  205. }
  206. }
  207. async createThread(
  208. userId: string,
  209. type: ThreadType,
  210. aiAssistantId?: string,
  211. initialUserMessage?: string,
  212. ): Promise<ThreadRelationDocument> {
  213. try {
  214. const aiAssistant =
  215. aiAssistantId != null
  216. ? await AiAssistantModel.findOne({
  217. _id: { $eq: aiAssistantId },
  218. }).populate<{ vectorStore: IVectorStore }>('vectorStore')
  219. : null;
  220. const thread = await this.client.createThread(
  221. aiAssistant?.vectorStore?.vectorStoreId,
  222. );
  223. const threadRelation = await ThreadRelationModel.create({
  224. userId,
  225. type,
  226. aiAssistant: aiAssistantId,
  227. threadId: thread.id,
  228. title: null, // Initialize title as null
  229. });
  230. if (initialUserMessage != null) {
  231. // Do not await, run in background
  232. this.generateThreadTitle(initialUserMessage)
  233. .then(async (generatedTitle) => {
  234. if (generatedTitle != null) {
  235. threadRelation.title = generatedTitle;
  236. await threadRelation.save();
  237. }
  238. })
  239. .catch((err) => {
  240. logger.error(
  241. `Failed to generate thread title for threadId ${thread.id}:`,
  242. err,
  243. );
  244. });
  245. }
  246. return threadRelation;
  247. } catch (err) {
  248. throw err;
  249. }
  250. }
  251. private async updateThreads(
  252. aiAssistantId: string,
  253. vectorStoreId: string,
  254. ): Promise<void> {
  255. const threadRelations = await this.getThreadsByAiAssistantId(aiAssistantId);
  256. for await (const threadRelation of threadRelations) {
  257. try {
  258. const updatedThreadResponse = await this.client.updateThread(
  259. threadRelation.threadId,
  260. vectorStoreId,
  261. );
  262. logger.debug('Update thread', updatedThreadResponse);
  263. } catch (err) {
  264. logger.error(err);
  265. }
  266. }
  267. }
  268. async getThreadsByAiAssistantId(
  269. aiAssistantId: string,
  270. userId?: string,
  271. type: ThreadType = ThreadType.KNOWLEDGE,
  272. ): Promise<ThreadRelationDocument[]> {
  273. const query: { aiAssistant: string; type: ThreadType; userId?: string } = {
  274. aiAssistant: aiAssistantId,
  275. type,
  276. };
  277. if (userId != null) {
  278. query.userId = userId;
  279. }
  280. const threadRelations = await ThreadRelationModel.find(query).sort({
  281. updatedAt: -1,
  282. });
  283. return threadRelations;
  284. }
  285. async deleteThread(
  286. threadRelationId: string,
  287. ): Promise<ThreadRelationDocument> {
  288. const threadRelation = await ThreadRelationModel.findById(threadRelationId);
  289. if (threadRelation == null) {
  290. throw createError(404, 'ThreadRelation document does not exist');
  291. }
  292. try {
  293. const deletedThreadResponse = await this.client.deleteThread(
  294. threadRelation.threadId,
  295. );
  296. logger.debug('Delete thread', deletedThreadResponse);
  297. await threadRelation.remove();
  298. } catch (err) {
  299. await openaiApiErrorHandler(err, {
  300. notFoundError: async () => {
  301. await threadRelation.remove();
  302. },
  303. });
  304. throw err;
  305. }
  306. return threadRelation;
  307. }
  308. public async deleteExpiredThreads(
  309. limit: number,
  310. apiCallInterval: number,
  311. ): Promise<void> {
  312. const expiredThreadRelations =
  313. await ThreadRelationModel.getExpiredThreadRelations(limit);
  314. if (expiredThreadRelations == null) {
  315. return;
  316. }
  317. const deletedThreadIds: string[] = [];
  318. for await (const expiredThreadRelation of expiredThreadRelations) {
  319. try {
  320. const deleteThreadResponse = await this.client.deleteThread(
  321. expiredThreadRelation.threadId,
  322. );
  323. logger.debug('Delete thread', deleteThreadResponse);
  324. deletedThreadIds.push(expiredThreadRelation.threadId);
  325. // sleep
  326. await new Promise((resolve) => setTimeout(resolve, apiCallInterval));
  327. } catch (err) {
  328. logger.error(err);
  329. }
  330. }
  331. await ThreadRelationModel.deleteMany({
  332. threadId: { $in: deletedThreadIds },
  333. });
  334. }
  335. async getMessageData(
  336. threadId: string,
  337. lang?: Lang,
  338. options?: MessageListParams,
  339. ): Promise<OpenAI.Beta.Threads.Messages.MessagesPage> {
  340. const messages = await this.client.getMessages(threadId, options);
  341. for await (const message of messages.data) {
  342. for await (const content of message.content) {
  343. if (content.type === 'text') {
  344. await replaceAnnotationWithPageLink(content, lang);
  345. }
  346. }
  347. }
  348. return messages;
  349. }
  350. private async getVectorStoreRelationsByPageIds(
  351. pageIds: Types.ObjectId[],
  352. ): Promise<VectorStoreDocument[]> {
  353. const pipeline = [
  354. // Stage 1: Match documents with the given pageId
  355. {
  356. $match: {
  357. page: {
  358. $in: pageIds,
  359. },
  360. },
  361. },
  362. // Stage 2: Lookup VectorStore documents
  363. {
  364. $lookup: {
  365. from: 'vectorstores',
  366. localField: 'vectorStoreRelationId',
  367. foreignField: '_id',
  368. as: 'vectorStore',
  369. },
  370. },
  371. // Stage 3: Unwind the vectorStore array
  372. {
  373. $unwind: '$vectorStore',
  374. },
  375. // Stage 4: Match non-deleted vector stores
  376. {
  377. $match: {
  378. 'vectorStore.isDeleted': false,
  379. },
  380. },
  381. // Stage 5: Replace the root with vectorStore document
  382. {
  383. $replaceRoot: {
  384. newRoot: '$vectorStore',
  385. },
  386. },
  387. // Stage 6: Group by _id to remove duplicates
  388. {
  389. $group: {
  390. _id: '$_id',
  391. doc: { $first: '$$ROOT' },
  392. },
  393. },
  394. // Stage 7: Restore the document structure
  395. {
  396. $replaceRoot: {
  397. newRoot: '$doc',
  398. },
  399. },
  400. ];
  401. const vectorStoreRelations =
  402. await VectorStoreFileRelationModel.aggregate<VectorStoreDocument>(
  403. pipeline,
  404. );
  405. return vectorStoreRelations;
  406. }
  407. private async createVectorStore(name: string): Promise<VectorStoreDocument> {
  408. try {
  409. const newVectorStore = await this.client.createVectorStore(name);
  410. const newVectorStoreDocument = (await VectorStoreModel.create({
  411. vectorStoreId: newVectorStore.id,
  412. })) as VectorStoreDocument;
  413. return newVectorStoreDocument;
  414. } catch (err) {
  415. throw new Error(err);
  416. }
  417. }
  418. private async uploadFile(
  419. revisionBody: string,
  420. page: HydratedDocument<PageDocument>,
  421. ): Promise<OpenAI.Files.FileObject> {
  422. const siteUrl = configManager.getConfig('app:siteUrl');
  423. const convertedHtml = await convertMarkdownToHtml(revisionBody, {
  424. page,
  425. siteUrl,
  426. });
  427. const file = await toFile(Readable.from(convertedHtml), `${page._id}.html`);
  428. const uploadedFile = await this.client.uploadFile(file);
  429. return uploadedFile;
  430. }
  431. private async uploadFileForAttachment(
  432. fileName: string,
  433. readStream?: NodeJS.ReadableStream,
  434. filePath?: string,
  435. ): Promise<OpenAI.Files.FileObject> {
  436. const streamSource: NodeJS.ReadableStream = (() => {
  437. if (readStream != null) {
  438. return readStream;
  439. }
  440. if (filePath != null) {
  441. return fs.createReadStream(filePath);
  442. }
  443. throw new Error('readStream and filePath are both null');
  444. })();
  445. const uploadableFile = await toFile(streamSource, fileName);
  446. const uploadedFile = await this.client.uploadFile(uploadableFile);
  447. return uploadedFile;
  448. }
  449. async deleteVectorStore(vectorStoreRelationId: string): Promise<void> {
  450. const vectorStoreDocument: VectorStoreDocument | null =
  451. await VectorStoreModel.findOne({
  452. _id: vectorStoreRelationId,
  453. isDeleted: false,
  454. });
  455. if (vectorStoreDocument == null) {
  456. return;
  457. }
  458. try {
  459. const deleteVectorStoreResponse = await this.client.deleteVectorStore(
  460. vectorStoreDocument.vectorStoreId,
  461. );
  462. logger.debug('Delete vector store', deleteVectorStoreResponse);
  463. await vectorStoreDocument.markAsDeleted();
  464. } catch (err) {
  465. await openaiApiErrorHandler(err, {
  466. notFoundError: vectorStoreDocument.markAsDeleted,
  467. });
  468. throw new Error(err);
  469. }
  470. }
  471. private async createVectorStoreFileWithStreamForAttachment(
  472. pageId: Types.ObjectId,
  473. vectorStoreRelationId: Types.ObjectId,
  474. vectorStoreFileRelationsMap: VectorStoreFileRelationsMap,
  475. ): Promise<void> {
  476. const Attachment = mongoose.model<
  477. HydratedDocument<IAttachmentDocument>,
  478. IAttachmentModel
  479. >('Attachment');
  480. const attachmentsCursor = Attachment.find({ page: pageId }).cursor();
  481. const batchStream = createBatchStream(BATCH_SIZE);
  482. const uploadFileStreamForAttachment = new Writable({
  483. objectMode: true,
  484. write: async (
  485. attachments: HydratedDocument<IAttachmentDocument>[],
  486. _encoding,
  487. callback,
  488. ) => {
  489. for await (const attachment of attachments) {
  490. try {
  491. if (
  492. !isVectorStoreCompatible(
  493. attachment.originalName,
  494. attachment.fileFormat,
  495. )
  496. ) {
  497. continue;
  498. }
  499. const readStream =
  500. await this.crowi.fileUploadService.findDeliveryFile(attachment);
  501. const uploadedFileForAttachment =
  502. await this.uploadFileForAttachment(
  503. attachment.originalName,
  504. readStream,
  505. );
  506. prepareVectorStoreFileRelations(
  507. vectorStoreRelationId,
  508. pageId,
  509. uploadedFileForAttachment.id,
  510. vectorStoreFileRelationsMap,
  511. attachment._id,
  512. );
  513. } catch (err) {
  514. logger.error(err);
  515. }
  516. }
  517. callback();
  518. },
  519. final: (callback) => {
  520. logger.debug('Finished uploading attachments');
  521. callback();
  522. },
  523. });
  524. await pipeline(
  525. attachmentsCursor,
  526. batchStream,
  527. uploadFileStreamForAttachment,
  528. );
  529. }
  530. private async createVectorStoreFile(
  531. vectorStoreRelation: VectorStoreDocument,
  532. pages: Array<HydratedDocument<PageDocument>>,
  533. ignoreAttachments = false,
  534. ): Promise<void> {
  535. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  536. const processUploadFile = async (page: HydratedDocument<PageDocument>) => {
  537. if (page._id != null && page.revision != null) {
  538. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  539. const uploadedFile = await this.uploadFile(page.revision.body, page);
  540. prepareVectorStoreFileRelations(
  541. vectorStoreRelation._id,
  542. page._id,
  543. uploadedFile.id,
  544. vectorStoreFileRelationsMap,
  545. );
  546. if (!ignoreAttachments) {
  547. await this.createVectorStoreFileWithStreamForAttachment(
  548. page._id,
  549. vectorStoreRelation._id,
  550. vectorStoreFileRelationsMap,
  551. );
  552. }
  553. return;
  554. }
  555. const pagePopulatedToShowRevision =
  556. await page.populateDataToShowRevision();
  557. if (
  558. pagePopulatedToShowRevision.revision != null &&
  559. pagePopulatedToShowRevision.revision.body.length > 0
  560. ) {
  561. const uploadedFile = await this.uploadFile(
  562. pagePopulatedToShowRevision.revision.body,
  563. page,
  564. );
  565. prepareVectorStoreFileRelations(
  566. vectorStoreRelation._id,
  567. page._id,
  568. uploadedFile.id,
  569. vectorStoreFileRelationsMap,
  570. );
  571. if (!ignoreAttachments) {
  572. await this.createVectorStoreFileWithStreamForAttachment(
  573. page._id,
  574. vectorStoreRelation._id,
  575. vectorStoreFileRelationsMap,
  576. );
  577. }
  578. }
  579. }
  580. };
  581. // Start workers to process results
  582. const workers = pages.map(processUploadFile);
  583. // Wait for all processing to complete.
  584. assert(
  585. workers.length <= BATCH_SIZE,
  586. 'workers.length must be less than or equal to BATCH_SIZE',
  587. );
  588. const fileUploadResult = await Promise.allSettled(workers);
  589. fileUploadResult.forEach((result) => {
  590. if (result.status === 'rejected') {
  591. logger.error(result.reason);
  592. }
  593. });
  594. const vectorStoreFileRelations = Array.from(
  595. vectorStoreFileRelationsMap.values(),
  596. );
  597. const uploadedFileIds = vectorStoreFileRelations.flatMap(
  598. (data) => data.fileIds,
  599. );
  600. if (uploadedFileIds.length === 0) {
  601. return;
  602. }
  603. const pageIds = pages.map((page) => page._id);
  604. try {
  605. // Save vector store file relation
  606. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(
  607. vectorStoreFileRelations,
  608. );
  609. // Create vector store file
  610. const createVectorStoreFileBatchResponse =
  611. await this.client.createVectorStoreFileBatch(
  612. vectorStoreRelation.vectorStoreId,
  613. uploadedFileIds,
  614. );
  615. logger.debug(
  616. 'Create vector store file',
  617. createVectorStoreFileBatchResponse,
  618. );
  619. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  620. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  621. } catch (err) {
  622. logger.error(err);
  623. // Delete all uploaded files if createVectorStoreFileBatch fails
  624. for await (const pageId of pageIds) {
  625. await this.deleteVectorStoreFile(vectorStoreRelation._id, pageId);
  626. }
  627. }
  628. }
  629. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  630. async deleteObsoletedVectorStoreRelations(): Promise<void> {
  631. const deletedVectorStoreRelations = await VectorStoreModel.find({
  632. isDeleted: true,
  633. });
  634. if (deletedVectorStoreRelations.length === 0) {
  635. return;
  636. }
  637. const currentVectorStoreRelationIds: Types.ObjectId[] =
  638. await VectorStoreFileRelationModel.aggregate([
  639. {
  640. $group: {
  641. _id: '$vectorStoreRelationId',
  642. relationCount: { $sum: 1 },
  643. },
  644. },
  645. { $match: { relationCount: { $gt: 0 } } },
  646. { $project: { _id: 1 } },
  647. ]);
  648. if (currentVectorStoreRelationIds.length === 0) {
  649. return;
  650. }
  651. await VectorStoreModel.deleteMany({
  652. _id: { $nin: currentVectorStoreRelationIds },
  653. isDeleted: true,
  654. });
  655. }
  656. private async deleteVectorStoreFileForAttachment(
  657. vectorStoreFileRelation: VectorStoreFileRelation,
  658. ): Promise<void> {
  659. if (vectorStoreFileRelation.attachment == null) {
  660. return;
  661. }
  662. const deleteAllAttachmentVectorStoreFileRelations = async () => {
  663. await VectorStoreFileRelationModel.deleteMany({
  664. attachment: vectorStoreFileRelation.attachment,
  665. });
  666. };
  667. try {
  668. // Delete entities in VectorStoreFile
  669. const fileId = vectorStoreFileRelation.fileIds[0];
  670. const deleteFileResponse = await this.client.deleteFile(fileId);
  671. logger.debug(
  672. 'Delete vector store file (attachment) ',
  673. deleteFileResponse,
  674. );
  675. // Delete related VectorStoreFileRelation document
  676. const attachmentId = vectorStoreFileRelation.attachment;
  677. if (attachmentId != null) {
  678. await deleteAllAttachmentVectorStoreFileRelations();
  679. }
  680. } catch (err) {
  681. logger.error(err);
  682. await openaiApiErrorHandler(err, {
  683. notFoundError: () => deleteAllAttachmentVectorStoreFileRelations(),
  684. });
  685. }
  686. }
  687. private async deleteVectorStoreFile(
  688. vectorStoreRelationId: Types.ObjectId,
  689. pageId: Types.ObjectId,
  690. ignoreAttachments = false,
  691. apiCallInterval?: number,
  692. ): Promise<void> {
  693. if (!ignoreAttachments) {
  694. // Get all VectorStoreFIleDocument (attachments) associated with the page
  695. const vectorStoreFileRelationsForAttachment =
  696. await VectorStoreFileRelationModel.find({
  697. vectorStoreRelationId,
  698. page: pageId,
  699. attachment: { $exists: true },
  700. });
  701. if (vectorStoreFileRelationsForAttachment.length !== 0) {
  702. for await (const vectorStoreFileRelation of vectorStoreFileRelationsForAttachment) {
  703. try {
  704. await this.deleteVectorStoreFileForAttachment(
  705. vectorStoreFileRelation,
  706. );
  707. } catch (err) {
  708. logger.error(err);
  709. }
  710. }
  711. }
  712. }
  713. // Delete vector store file and delete vector store file relation
  714. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({
  715. vectorStoreRelationId,
  716. page: pageId,
  717. });
  718. if (vectorStoreFileRelation == null) {
  719. return;
  720. }
  721. const deletedFileIds: string[] = [];
  722. for await (const fileId of vectorStoreFileRelation.fileIds) {
  723. try {
  724. const deleteFileResponse = await this.client.deleteFile(fileId);
  725. logger.debug('Delete vector store file', deleteFileResponse);
  726. deletedFileIds.push(fileId);
  727. if (apiCallInterval != null) {
  728. // sleep
  729. await new Promise((resolve) => setTimeout(resolve, apiCallInterval));
  730. }
  731. } catch (err) {
  732. await openaiApiErrorHandler(err, {
  733. notFoundError: async () => {
  734. deletedFileIds.push(fileId);
  735. },
  736. });
  737. logger.error(err);
  738. }
  739. }
  740. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(
  741. (fileId) => !deletedFileIds.includes(fileId),
  742. );
  743. if (undeletedFileIds.length === 0) {
  744. await vectorStoreFileRelation.remove();
  745. return;
  746. }
  747. vectorStoreFileRelation.fileIds = undeletedFileIds;
  748. await vectorStoreFileRelation.save();
  749. }
  750. async deleteVectorStoreFilesByPageIds(
  751. pageIds: Types.ObjectId[],
  752. ): Promise<void> {
  753. const vectorStoreRelations =
  754. await this.getVectorStoreRelationsByPageIds(pageIds);
  755. if (vectorStoreRelations != null && vectorStoreRelations.length !== 0) {
  756. for await (const pageId of pageIds) {
  757. const deleteVectorStoreFilePromises = vectorStoreRelations.map(
  758. (vectorStoreRelation) =>
  759. this.deleteVectorStoreFile(vectorStoreRelation._id, pageId),
  760. );
  761. await Promise.allSettled(deleteVectorStoreFilePromises);
  762. }
  763. }
  764. }
  765. async deleteObsoleteVectorStoreFile(
  766. limit: number,
  767. apiCallInterval: number,
  768. ): Promise<void> {
  769. // Retrieves all VectorStore documents that are marked as deleted
  770. const deletedVectorStoreRelations = await VectorStoreModel.find({
  771. isDeleted: true,
  772. });
  773. if (deletedVectorStoreRelations.length === 0) {
  774. return;
  775. }
  776. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  777. const obsoleteVectorStoreFileRelations =
  778. await VectorStoreFileRelationModel.find({
  779. vectorStoreRelationId: {
  780. $in: deletedVectorStoreRelations.map(
  781. (deletedVectorStoreRelation) => deletedVectorStoreRelation._id,
  782. ),
  783. },
  784. }).limit(limit);
  785. if (obsoleteVectorStoreFileRelations.length === 0) {
  786. return;
  787. }
  788. // Delete obsolete VectorStoreFile
  789. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  790. try {
  791. await this.deleteVectorStoreFile(
  792. vectorStoreFileRelation.vectorStoreRelationId,
  793. vectorStoreFileRelation.page,
  794. false,
  795. apiCallInterval,
  796. );
  797. } catch (err) {
  798. logger.error(err);
  799. }
  800. }
  801. }
  802. private async deleteVectorStoreFileOnDeleteAttachment(attachmentId: string) {
  803. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({
  804. attachment: attachmentId,
  805. });
  806. if (vectorStoreFileRelation == null) {
  807. return;
  808. }
  809. try {
  810. await this.deleteVectorStoreFileForAttachment(vectorStoreFileRelation);
  811. } catch (err) {
  812. logger.error(err);
  813. }
  814. }
  815. private async filterPagesByAccessScope(
  816. aiAssistant: AiAssistantDocument,
  817. pages: HydratedDocument<PageDocument>[],
  818. ) {
  819. const isPublicPage = (page: HydratedDocument<PageDocument>) =>
  820. page.grant === PageGrant.GRANT_PUBLIC;
  821. const isUserGroupAccessible = (
  822. page: HydratedDocument<PageDocument>,
  823. ownerUserGroupIds: string[],
  824. ) => {
  825. if (page.grant !== PageGrant.GRANT_USER_GROUP) return false;
  826. return page.grantedGroups.some((group) =>
  827. ownerUserGroupIds.includes(getIdStringForRef(group.item)),
  828. );
  829. };
  830. const isOwnerAccessible = (
  831. page: HydratedDocument<PageDocument>,
  832. ownerId: Ref<IUser>,
  833. ) => {
  834. if (page.grant !== PageGrant.GRANT_OWNER) return false;
  835. return page.grantedUsers.some(
  836. (user) => getIdStringForRef(user) === getIdStringForRef(ownerId),
  837. );
  838. };
  839. const getOwnerUserGroupIds = async (owner: Ref<IUser>) => {
  840. const userGroups =
  841. await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  842. const externalGroups =
  843. await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
  844. return [...userGroups, ...externalGroups].map((group) =>
  845. getIdStringForRef(group),
  846. );
  847. };
  848. switch (aiAssistant.accessScope) {
  849. case AiAssistantAccessScope.PUBLIC_ONLY:
  850. return pages.filter(isPublicPage);
  851. case AiAssistantAccessScope.GROUPS: {
  852. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  853. return pages.filter(
  854. (page) =>
  855. isPublicPage(page) ||
  856. isUserGroupAccessible(page, ownerUserGroupIds),
  857. );
  858. }
  859. case AiAssistantAccessScope.OWNER: {
  860. const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
  861. return pages.filter(
  862. (page) =>
  863. isPublicPage(page) ||
  864. isOwnerAccessible(page, aiAssistant.owner) ||
  865. isUserGroupAccessible(page, ownerUserGroupIds),
  866. );
  867. }
  868. default:
  869. return [];
  870. }
  871. }
  872. async createVectorStoreFileOnPageCreate(
  873. pages: HydratedDocument<PageDocument>[],
  874. ): Promise<void> {
  875. const pagePaths = pages.map((page) => page.path);
  876. const aiAssistants = await this.findAiAssistantByPagePath(pagePaths, {
  877. shouldPopulateOwner: true,
  878. shouldPopulateVectorStore: true,
  879. });
  880. if (aiAssistants.length === 0) {
  881. return;
  882. }
  883. for await (const aiAssistant of aiAssistants) {
  884. if (!isPopulated(aiAssistant.owner)) {
  885. continue;
  886. }
  887. const isLearnablePageLimitExceeded =
  888. await this.isLearnablePageLimitExceeded(
  889. aiAssistant.owner,
  890. aiAssistant.pagePathPatterns,
  891. );
  892. if (isLearnablePageLimitExceeded) {
  893. continue;
  894. }
  895. const pagesToVectorize = await this.filterPagesByAccessScope(
  896. aiAssistant,
  897. pages,
  898. );
  899. const vectorStoreRelation = aiAssistant.vectorStore;
  900. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  901. continue;
  902. }
  903. logger.debug('--------- createVectorStoreFileOnPageCreate ---------');
  904. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  905. logger.debug(
  906. 'VectorStoreFile pagePath to be created: ',
  907. pagesToVectorize.map((page) => page.path),
  908. );
  909. logger.debug('-----------------------------------------------------');
  910. await this.createVectorStoreFile(
  911. vectorStoreRelation as VectorStoreDocument,
  912. pagesToVectorize,
  913. );
  914. }
  915. }
  916. async updateVectorStoreFileOnPageUpdate(
  917. page: HydratedDocument<PageDocument>,
  918. ) {
  919. const aiAssistants = await this.findAiAssistantByPagePath([page.path], {
  920. shouldPopulateVectorStore: true,
  921. });
  922. if (aiAssistants.length === 0) {
  923. return;
  924. }
  925. for await (const aiAssistant of aiAssistants) {
  926. const pagesToVectorize = await this.filterPagesByAccessScope(
  927. aiAssistant,
  928. [page],
  929. );
  930. const vectorStoreRelation = aiAssistant.vectorStore;
  931. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  932. continue;
  933. }
  934. logger.debug('---------- updateVectorStoreOnPageUpdate ------------');
  935. logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
  936. logger.debug('PagePath of VectorStoreFile to be deleted: ', page.path);
  937. logger.debug(
  938. 'pagePath of VectorStoreFile to be created: ',
  939. pagesToVectorize.map((page) => page.path),
  940. );
  941. logger.debug('-----------------------------------------------------');
  942. // Do not create a new VectorStoreFile if page is changed to a permission that AiAssistant does not have access to
  943. await this.deleteVectorStoreFile(
  944. (vectorStoreRelation as VectorStoreDocument)._id,
  945. page._id,
  946. true, // ignoreAttachments = true
  947. );
  948. await this.createVectorStoreFile(
  949. vectorStoreRelation as VectorStoreDocument,
  950. pagesToVectorize,
  951. true, // ignoreAttachments = true
  952. );
  953. }
  954. }
  955. private async createVectorStoreFileOnUploadAttachment(
  956. pageId: string,
  957. attachment: HydratedDocument<IAttachmentDocument>,
  958. file: Express.Multer.File,
  959. ): Promise<void> {
  960. if (!isVectorStoreCompatible(file.originalname, file.mimetype)) {
  961. return;
  962. }
  963. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>(
  964. 'Page',
  965. );
  966. const page = await Page.findById(pageId);
  967. if (page == null) {
  968. return;
  969. }
  970. const aiAssistants = await this.findAiAssistantByPagePath([page.path], {
  971. shouldPopulateVectorStore: true,
  972. });
  973. if (aiAssistants.length === 0) {
  974. return;
  975. }
  976. const uploadedFile = await this.uploadFileForAttachment(
  977. file.originalname,
  978. undefined,
  979. file.path,
  980. );
  981. logger.debug('Uploaded file', uploadedFile);
  982. for await (const aiAssistant of aiAssistants) {
  983. const pagesToVectorize = await this.filterPagesByAccessScope(
  984. aiAssistant,
  985. [page],
  986. );
  987. if (pagesToVectorize.length === 0) {
  988. continue;
  989. }
  990. const vectorStoreRelation = aiAssistant.vectorStore;
  991. if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
  992. continue;
  993. }
  994. await this.client.createVectorStoreFile(
  995. vectorStoreRelation.vectorStoreId,
  996. uploadedFile.id,
  997. );
  998. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap =
  999. new Map();
  1000. prepareVectorStoreFileRelations(
  1001. vectorStoreRelation._id as Types.ObjectId,
  1002. page._id,
  1003. uploadedFile.id,
  1004. vectorStoreFileRelationsMap,
  1005. attachment._id,
  1006. );
  1007. const vectorStoreFileRelations = Array.from(
  1008. vectorStoreFileRelationsMap.values(),
  1009. );
  1010. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(
  1011. vectorStoreFileRelations,
  1012. );
  1013. }
  1014. }
  1015. private async createVectorStoreFileWithStream(
  1016. vectorStoreRelation: VectorStoreDocument,
  1017. conditions: mongoose.FilterQuery<PageDocument>,
  1018. ): Promise<void> {
  1019. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>(
  1020. 'Page',
  1021. );
  1022. const pagesStream = Page.find({ ...conditions })
  1023. .populate('revision')
  1024. .cursor({ batchSize: BATCH_SIZE });
  1025. const batchStream = createBatchStream(BATCH_SIZE);
  1026. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  1027. const createVectorStoreFileStream = new Transform({
  1028. objectMode: true,
  1029. async transform(
  1030. chunk: HydratedDocument<PageDocument>[],
  1031. encoding,
  1032. callback,
  1033. ) {
  1034. try {
  1035. logger.debug(
  1036. 'Target page path for VectorStoreFile generation: ',
  1037. chunk.map((page) => page.path),
  1038. );
  1039. await createVectorStoreFile(vectorStoreRelation, chunk);
  1040. this.push(chunk);
  1041. callback();
  1042. } catch (error) {
  1043. callback(error);
  1044. }
  1045. },
  1046. });
  1047. await pipeline(pagesStream, batchStream, createVectorStoreFileStream);
  1048. }
  1049. private async createConditionForCreateVectorStoreFile(
  1050. owner: AiAssistant['owner'],
  1051. accessScope: AiAssistant['accessScope'],
  1052. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  1053. pagePathPatterns: AiAssistant['pagePathPatterns'],
  1054. ): Promise<mongoose.FilterQuery<PageDocument>> {
  1055. const convertedPagePathPatterns =
  1056. convertPathPatternsToRegExp(pagePathPatterns);
  1057. // Include pages in search targets when their paths with 'Anyone with the link' permission are directly specified instead of using glob pattern
  1058. const nonGrabPagePathPatterns = pagePathPatterns.filter(
  1059. (pagePathPattern) => !isGlobPatternPath(pagePathPattern),
  1060. );
  1061. const baseCondition: mongoose.FilterQuery<PageDocument> = {
  1062. grant: PageGrant.GRANT_RESTRICTED,
  1063. path: { $in: nonGrabPagePathPatterns },
  1064. };
  1065. if (accessScope === AiAssistantAccessScope.PUBLIC_ONLY) {
  1066. return {
  1067. $or: [
  1068. baseCondition,
  1069. {
  1070. grant: PageGrant.GRANT_PUBLIC,
  1071. path: { $in: convertedPagePathPatterns },
  1072. },
  1073. ],
  1074. };
  1075. }
  1076. if (accessScope === AiAssistantAccessScope.GROUPS) {
  1077. if (
  1078. grantedGroupsForAccessScope == null ||
  1079. grantedGroupsForAccessScope.length === 0
  1080. ) {
  1081. throw new Error('grantedGroups is required when accessScope is GROUPS');
  1082. }
  1083. const extractedGrantedGroupIdsForAccessScope =
  1084. grantedGroupsForAccessScope.map((group) =>
  1085. getIdForRef(group.item).toString(),
  1086. );
  1087. return {
  1088. $or: [
  1089. baseCondition,
  1090. {
  1091. grant: {
  1092. $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP],
  1093. },
  1094. path: { $in: convertedPagePathPatterns },
  1095. $or: [
  1096. {
  1097. 'grantedGroups.item': {
  1098. $in: extractedGrantedGroupIdsForAccessScope,
  1099. },
  1100. },
  1101. { grant: PageGrant.GRANT_PUBLIC },
  1102. ],
  1103. },
  1104. ],
  1105. };
  1106. }
  1107. if (accessScope === AiAssistantAccessScope.OWNER) {
  1108. const ownerUserGroups = [
  1109. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  1110. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
  1111. owner,
  1112. )),
  1113. ].map((group) => group.toString());
  1114. return {
  1115. $or: [
  1116. baseCondition,
  1117. {
  1118. grant: {
  1119. $in: [
  1120. PageGrant.GRANT_PUBLIC,
  1121. PageGrant.GRANT_USER_GROUP,
  1122. PageGrant.GRANT_OWNER,
  1123. ],
  1124. },
  1125. path: { $in: convertedPagePathPatterns },
  1126. $or: [
  1127. { 'grantedGroups.item': { $in: ownerUserGroups } },
  1128. { grantedUsers: { $in: [getIdForRef(owner)] } },
  1129. { grant: PageGrant.GRANT_PUBLIC },
  1130. ],
  1131. },
  1132. ],
  1133. };
  1134. }
  1135. throw new Error('Invalid accessScope value');
  1136. }
  1137. private async validateGrantedUserGroupsForAiAssistant(
  1138. owner: AiAssistant['owner'],
  1139. shareScope: AiAssistant['shareScope'],
  1140. accessScope: AiAssistant['accessScope'],
  1141. grantedGroupsForShareScope: AiAssistant['grantedGroupsForShareScope'],
  1142. grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
  1143. ) {
  1144. // Check if grantedGroupsForShareScope is not specified when shareScope is not a “group”
  1145. if (
  1146. shareScope !== AiAssistantShareScope.GROUPS &&
  1147. grantedGroupsForShareScope != null
  1148. ) {
  1149. throw new Error(
  1150. 'grantedGroupsForShareScope is specified when shareScope is not “groups”.',
  1151. );
  1152. }
  1153. // Check if grantedGroupsForAccessScope is not specified when accessScope is not a “group”
  1154. if (
  1155. accessScope !== AiAssistantAccessScope.GROUPS &&
  1156. grantedGroupsForAccessScope != null
  1157. ) {
  1158. throw new Error(
  1159. 'grantedGroupsForAccessScope is specified when accsessScope is not “groups”.',
  1160. );
  1161. }
  1162. const ownerUserGroupIds = [
  1163. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  1164. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
  1165. owner,
  1166. )),
  1167. ].map((group) => group.toString());
  1168. // Check if the owner belongs to the group specified in grantedGroupsForShareScope
  1169. if (
  1170. grantedGroupsForShareScope != null &&
  1171. grantedGroupsForShareScope.length > 0
  1172. ) {
  1173. const extractedGrantedGroupIdsForShareScope =
  1174. grantedGroupsForShareScope.map((group) =>
  1175. getIdForRef(group.item).toString(),
  1176. );
  1177. const isValid = extractedGrantedGroupIdsForShareScope.every((groupId) =>
  1178. ownerUserGroupIds.includes(groupId),
  1179. );
  1180. if (!isValid) {
  1181. throw new Error(
  1182. 'A userGroup to which the owner does not belong is specified in grantedGroupsForShareScope',
  1183. );
  1184. }
  1185. }
  1186. // Check if the owner belongs to the group specified in grantedGroupsForAccessScope
  1187. if (
  1188. grantedGroupsForAccessScope != null &&
  1189. grantedGroupsForAccessScope.length > 0
  1190. ) {
  1191. const extractedGrantedGroupIdsForAccessScope =
  1192. grantedGroupsForAccessScope.map((group) =>
  1193. getIdForRef(group.item).toString(),
  1194. );
  1195. const isValid = extractedGrantedGroupIdsForAccessScope.every((groupId) =>
  1196. ownerUserGroupIds.includes(groupId),
  1197. );
  1198. if (!isValid) {
  1199. throw new Error(
  1200. 'A userGroup to which the owner does not belong is specified in grantedGroupsForAccessScope',
  1201. );
  1202. }
  1203. }
  1204. }
  1205. async isAiAssistantUsable(
  1206. aiAssistantId: string,
  1207. user: IUserHasId,
  1208. ): Promise<boolean> {
  1209. const aiAssistant = await AiAssistantModel.findOne({
  1210. _id: { $eq: aiAssistantId },
  1211. });
  1212. if (aiAssistant == null) {
  1213. throw createError(404, 'AiAssistant document does not exist');
  1214. }
  1215. const isOwner =
  1216. getIdStringForRef(aiAssistant.owner) === getIdStringForRef(user._id);
  1217. if (aiAssistant.shareScope === AiAssistantShareScope.PUBLIC_ONLY) {
  1218. return true;
  1219. }
  1220. if (aiAssistant.shareScope === AiAssistantShareScope.OWNER && isOwner) {
  1221. return true;
  1222. }
  1223. if (
  1224. aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE &&
  1225. aiAssistant.accessScope === AiAssistantAccessScope.OWNER &&
  1226. isOwner
  1227. ) {
  1228. return true;
  1229. }
  1230. if (
  1231. aiAssistant.shareScope === AiAssistantShareScope.GROUPS ||
  1232. (aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE &&
  1233. aiAssistant.accessScope === AiAssistantAccessScope.GROUPS)
  1234. ) {
  1235. const userGroupIds = [
  1236. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  1237. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
  1238. user,
  1239. )),
  1240. ].map((group) => group.toString());
  1241. const grantedGroupIdsForShareScope =
  1242. aiAssistant.grantedGroupsForShareScope?.map((group) =>
  1243. getIdStringForRef(group.item),
  1244. ) ?? [];
  1245. const isShared = userGroupIds.some((userGroupId) =>
  1246. grantedGroupIdsForShareScope.includes(userGroupId),
  1247. );
  1248. return isShared;
  1249. }
  1250. return false;
  1251. }
  1252. async createAiAssistant(
  1253. data: UpsertAiAssistantData,
  1254. user: IUserHasId,
  1255. ): Promise<AiAssistantDocument> {
  1256. await this.validateGrantedUserGroupsForAiAssistant(
  1257. user,
  1258. data.shareScope,
  1259. data.accessScope,
  1260. data.grantedGroupsForShareScope,
  1261. data.grantedGroupsForAccessScope,
  1262. );
  1263. const conditions = await this.createConditionForCreateVectorStoreFile(
  1264. user,
  1265. data.accessScope,
  1266. data.grantedGroupsForAccessScope,
  1267. data.pagePathPatterns,
  1268. );
  1269. const vectorStoreRelation = await this.createVectorStore(data.name);
  1270. const aiAssistant = await AiAssistantModel.create({
  1271. ...data,
  1272. owner: user,
  1273. vectorStore: vectorStoreRelation,
  1274. });
  1275. // VectorStore creation process does not await
  1276. this.createVectorStoreFileWithStream(vectorStoreRelation, conditions);
  1277. return aiAssistant;
  1278. }
  1279. async updateAiAssistant(
  1280. aiAssistantId: string,
  1281. data: UpsertAiAssistantData,
  1282. user: IUserHasId,
  1283. ): Promise<AiAssistantDocument> {
  1284. const aiAssistant = await AiAssistantModel.findOne({
  1285. owner: user,
  1286. _id: aiAssistantId,
  1287. });
  1288. if (aiAssistant == null) {
  1289. throw createError(404, 'AiAssistant document does not exist');
  1290. }
  1291. await this.validateGrantedUserGroupsForAiAssistant(
  1292. user,
  1293. data.shareScope,
  1294. data.accessScope,
  1295. data.grantedGroupsForShareScope,
  1296. data.grantedGroupsForAccessScope,
  1297. );
  1298. const grantedGroupIdsForAccessScopeFromReq =
  1299. data.grantedGroupsForAccessScope?.map((group) =>
  1300. getIdStringForRef(group.item),
  1301. ) ?? []; // ObjectId[] -> string[]
  1302. const grantedGroupIdsForAccessScopeFromDb =
  1303. aiAssistant.grantedGroupsForAccessScope?.map((group) =>
  1304. getIdStringForRef(group.item),
  1305. ) ?? []; // ObjectId[] -> string[]
  1306. // If accessScope, pagePathPatterns, grantedGroupsForAccessScope have not changed, do not build VectorStore
  1307. const shouldRebuildVectorStore =
  1308. data.accessScope !== aiAssistant.accessScope ||
  1309. !isDeepEquals(data.pagePathPatterns, aiAssistant.pagePathPatterns) ||
  1310. !isDeepEquals(
  1311. grantedGroupIdsForAccessScopeFromReq,
  1312. grantedGroupIdsForAccessScopeFromDb,
  1313. );
  1314. let newVectorStoreRelation: VectorStoreDocument | undefined;
  1315. if (shouldRebuildVectorStore) {
  1316. const conditions = await this.createConditionForCreateVectorStoreFile(
  1317. user,
  1318. data.accessScope,
  1319. data.grantedGroupsForAccessScope,
  1320. data.pagePathPatterns,
  1321. );
  1322. // Delete obsoleted VectorStore
  1323. const obsoletedVectorStoreRelationId = getIdStringForRef(
  1324. aiAssistant.vectorStore,
  1325. );
  1326. await this.deleteVectorStore(obsoletedVectorStoreRelationId);
  1327. newVectorStoreRelation = await this.createVectorStore(data.name);
  1328. this.updateThreads(aiAssistantId, newVectorStoreRelation.vectorStoreId);
  1329. // VectorStore creation process does not await
  1330. this.createVectorStoreFileWithStream(newVectorStoreRelation, conditions);
  1331. }
  1332. const newData = {
  1333. ...data,
  1334. vectorStore: newVectorStoreRelation ?? aiAssistant.vectorStore,
  1335. };
  1336. aiAssistant.set({ ...newData });
  1337. let updatedAiAssistant: AiAssistantDocument = await aiAssistant.save();
  1338. if (
  1339. data.shareScope !== AiAssistantShareScope.PUBLIC_ONLY &&
  1340. aiAssistant.isDefault
  1341. ) {
  1342. updatedAiAssistant = await AiAssistantModel.setDefault(
  1343. aiAssistant._id,
  1344. false,
  1345. );
  1346. }
  1347. return updatedAiAssistant;
  1348. }
  1349. async getAccessibleAiAssistants(
  1350. user: IUserHasId,
  1351. ): Promise<AccessibleAiAssistants> {
  1352. const userGroupIds = [
  1353. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
  1354. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
  1355. user,
  1356. )),
  1357. ];
  1358. const assistants = await AiAssistantModel.find({
  1359. $or: [
  1360. // Case 1: Assistants owned by the user
  1361. { owner: user },
  1362. // Case 2: Public assistants owned by others
  1363. {
  1364. $and: [
  1365. { owner: { $ne: user } },
  1366. { shareScope: AiAssistantShareScope.PUBLIC_ONLY },
  1367. ],
  1368. },
  1369. // Case 3: Group-restricted assistants where user is in granted groups
  1370. {
  1371. $and: [
  1372. { owner: { $ne: user } },
  1373. { shareScope: AiAssistantShareScope.GROUPS },
  1374. { 'grantedGroupsForShareScope.item': { $in: userGroupIds } },
  1375. ],
  1376. },
  1377. ],
  1378. })
  1379. .populate('grantedGroupsForShareScope.item')
  1380. .populate('grantedGroupsForAccessScope.item');
  1381. return {
  1382. myAiAssistants:
  1383. assistants.filter(
  1384. (assistant) => assistant.owner.toString() === user._id.toString(),
  1385. ) ?? [],
  1386. teamAiAssistants:
  1387. assistants.filter(
  1388. (assistant) => assistant.owner.toString() !== user._id.toString(),
  1389. ) ?? [],
  1390. };
  1391. }
  1392. async isLearnablePageLimitExceeded(
  1393. user: IUserHasId,
  1394. pagePathPatterns: string[],
  1395. ): Promise<boolean> {
  1396. const normalizedPagePathPatterns = removeGlobPath(pagePathPatterns);
  1397. const PageModel = mongoose.model<IPage, PageModel>('Page');
  1398. const pagePathsWithDescendantCount = await PageModel.descendantCountByPaths(
  1399. normalizedPagePathPatterns,
  1400. user,
  1401. null,
  1402. true,
  1403. true,
  1404. );
  1405. const totalPageCount = pagePathsWithDescendantCount.reduce(
  1406. (total, pagePathWithDescendantCount) => {
  1407. const descendantCount = pagePathPatterns.includes(
  1408. pagePathWithDescendantCount.path,
  1409. )
  1410. ? 0 // Treat as single page when included in "pagePathPatterns"
  1411. : pagePathWithDescendantCount.descendantCount;
  1412. const pageCount = descendantCount + 1;
  1413. return total + pageCount;
  1414. },
  1415. 0,
  1416. );
  1417. logger.debug('TotalPageCount: ', totalPageCount);
  1418. const limitLearnablePageCountPerAssistant = configManager.getConfig(
  1419. 'openai:limitLearnablePageCountPerAssistant',
  1420. );
  1421. return totalPageCount > limitLearnablePageCountPerAssistant;
  1422. }
  1423. private async findAiAssistantByPagePath(
  1424. pagePaths: string[],
  1425. options?: {
  1426. shouldPopulateOwner?: boolean;
  1427. shouldPopulateVectorStore?: boolean;
  1428. },
  1429. ): Promise<AiAssistantDocument[]> {
  1430. const pagePathsWithGlobPattern = pagePaths.flatMap((pagePath) =>
  1431. generateGlobPatterns(pagePath),
  1432. );
  1433. const query = AiAssistantModel.find({
  1434. $or: [
  1435. // Case 1: Exact match
  1436. { pagePathPatterns: { $in: pagePaths } },
  1437. // Case 2: Glob pattern match
  1438. { pagePathPatterns: { $in: pagePathsWithGlobPattern } },
  1439. ],
  1440. });
  1441. if (options?.shouldPopulateOwner) {
  1442. query.populate('owner');
  1443. }
  1444. if (options?.shouldPopulateVectorStore) {
  1445. query.populate('vectorStore');
  1446. }
  1447. const aiAssistants = await query.exec();
  1448. return aiAssistants;
  1449. }
  1450. }
  1451. let instance: OpenaiService;
  1452. export const initializeOpenaiService = (crowi: Crowi): void => {
  1453. const aiEnabled = configManager.getConfig('app:aiEnabled');
  1454. const openaiServiceType = configManager.getConfig('openai:serviceType');
  1455. if (
  1456. aiEnabled &&
  1457. openaiServiceType != null &&
  1458. OpenaiServiceTypes.includes(openaiServiceType)
  1459. ) {
  1460. instance = new OpenaiService(crowi);
  1461. }
  1462. };
  1463. export const getOpenaiService = (): IOpenaiService | undefined => {
  1464. if (instance != null) {
  1465. return instance;
  1466. }
  1467. return;
  1468. };