openai.ts 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. import assert from 'node:assert';
  2. import { Readable, Transform } from 'stream';
  3. import { pipeline } from 'stream/promises';
  4. import { PageGrant, getIdForRef, isPopulated } from '@growi/core';
  5. import { isGrobPatternPath } from '@growi/core/dist/utils/page-path-utils';
  6. import escapeStringRegexp from 'escape-string-regexp';
  7. import type { HydratedDocument, Types } from 'mongoose';
  8. import mongoose from 'mongoose';
  9. import type OpenAI from 'openai';
  10. import { toFile } from 'openai';
  11. import ExternalUserGroupRelation from '~/features/external-user-group/server/models/external-user-group-relation';
  12. import ThreadRelationModel from '~/features/openai/server/models/thread-relation';
  13. import VectorStoreModel, { type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
  14. import VectorStoreFileRelationModel, {
  15. type VectorStoreFileRelation,
  16. prepareVectorStoreFileRelations,
  17. } from '~/features/openai/server/models/vector-store-file-relation';
  18. import type { PageDocument, PageModel } from '~/server/models/page';
  19. import UserGroupRelation from '~/server/models/user-group-relation';
  20. import { configManager } from '~/server/service/config-manager';
  21. import { createBatchStream } from '~/server/util/batch-stream';
  22. import loggerFactory from '~/utils/logger';
  23. import { OpenaiServiceTypes } from '../../interfaces/ai';
  24. import { type AiAssistant, AiAssistantAccessScope } from '../../interfaces/ai-assistant';
  25. import AiAssistantModel, { type AiAssistantDocument } from '../models/ai-assistant';
  26. import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
  27. import { getClient } from './client-delegator';
  28. // import { splitMarkdownIntoChunks } from './markdown-splitter/markdown-token-splitter';
  29. import { oepnaiApiErrorHandler } from './openai-api-error-handler';
  30. const BATCH_SIZE = 100;
  31. const logger = loggerFactory('growi:service:openai');
  32. // const isVectorStoreForPublicScopeExist = false;
  33. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
  34. const convertPathPatternsToRegExp = (pagePathPatterns: string[]): Array<string | RegExp> => {
  35. return pagePathPatterns.map((pagePathPattern) => {
  36. if (isGrobPatternPath(pagePathPattern)) {
  37. const trimedPagePathPattern = pagePathPattern.replace('/*', '');
  38. const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
  39. return new RegExp(`^${escapedPagePathPattern}`);
  40. }
  41. return pagePathPattern;
  42. });
  43. };
  44. export interface IOpenaiService {
  45. getOrCreateThread(userId: string, vectorStoreId?: string, threadId?: string): Promise<OpenAI.Beta.Threads.Thread | undefined>;
  46. // getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
  47. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  48. deleteObsolatedVectorStoreRelations(): Promise<void> // for CronJob
  49. createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: PageDocument[]): Promise<void>;
  50. deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId): Promise<void>;
  51. deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  52. // rebuildVectorStoreAll(): Promise<void>;
  53. // rebuildVectorStore(page: HydratedDocument<PageDocument>): Promise<void>;
  54. createAiAssistant(data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument>;
  55. }
  56. class OpenaiService implements IOpenaiService {
  57. private get client() {
  58. const openaiServiceType = configManager.getConfig('crowi', 'openai:serviceType');
  59. return getClient({ openaiServiceType });
  60. }
  61. public async getOrCreateThread(userId: string, vectorStoreId?: string, threadId?: string): Promise<OpenAI.Beta.Threads.Thread> {
  62. if (vectorStoreId != null && threadId == null) {
  63. try {
  64. const thread = await this.client.createThread(vectorStoreId);
  65. await ThreadRelationModel.create({ userId, threadId: thread.id });
  66. return thread;
  67. }
  68. catch (err) {
  69. throw new Error(err);
  70. }
  71. }
  72. const threadRelation = await ThreadRelationModel.findOne({ threadId });
  73. if (threadRelation == null) {
  74. throw new Error('ThreadRelation document is not exists');
  75. }
  76. // Check if a thread entity exists
  77. // If the thread entity does not exist, the thread-relation document is deleted
  78. try {
  79. const thread = await this.client.retrieveThread(threadRelation.threadId);
  80. // Update expiration date if thread entity exists
  81. await threadRelation.updateThreadExpiration();
  82. return thread;
  83. }
  84. catch (err) {
  85. await oepnaiApiErrorHandler(err, { notFoundError: async() => { await threadRelation.remove() } });
  86. throw new Error(err);
  87. }
  88. }
  89. public async deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void> {
  90. const expiredThreadRelations = await ThreadRelationModel.getExpiredThreadRelations(limit);
  91. if (expiredThreadRelations == null) {
  92. return;
  93. }
  94. const deletedThreadIds: string[] = [];
  95. for await (const expiredThreadRelation of expiredThreadRelations) {
  96. try {
  97. const deleteThreadResponse = await this.client.deleteThread(expiredThreadRelation.threadId);
  98. logger.debug('Delete thread', deleteThreadResponse);
  99. deletedThreadIds.push(expiredThreadRelation.threadId);
  100. // sleep
  101. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  102. }
  103. catch (err) {
  104. logger.error(err);
  105. }
  106. }
  107. await ThreadRelationModel.deleteMany({ threadId: { $in: deletedThreadIds } });
  108. }
  109. // TODO: https://redmine.weseek.co.jp/issues/160332
  110. // public async getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument> {
  111. // const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ scopeType: VectorStoreScopeType.PUBLIC, isDeleted: false });
  112. // if (vectorStoreDocument != null && isVectorStoreForPublicScopeExist) {
  113. // return vectorStoreDocument;
  114. // }
  115. // if (vectorStoreDocument != null && !isVectorStoreForPublicScopeExist) {
  116. // try {
  117. // // Check if vector store entity exists
  118. // // If the vector store entity does not exist, the vector store document is deleted
  119. // await this.client.retrieveVectorStore(vectorStoreDocument.vectorStoreId);
  120. // isVectorStoreForPublicScopeExist = true;
  121. // return vectorStoreDocument;
  122. // }
  123. // catch (err) {
  124. // await oepnaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  125. // throw new Error(err);
  126. // }
  127. // }
  128. // const newVectorStore = await this.client.createVectorStore(VectorStoreScopeType.PUBLIC);
  129. // const newVectorStoreDocument = await VectorStoreModel.create({
  130. // vectorStoreId: newVectorStore.id,
  131. // scopeType: VectorStoreScopeType.PUBLIC,
  132. // }) as VectorStoreDocument;
  133. // isVectorStoreForPublicScopeExist = true;
  134. // return newVectorStoreDocument;
  135. // }
  136. private async createVectorStore(name: string): Promise<VectorStoreDocument> {
  137. try {
  138. const newVectorStore = await this.client.createVectorStore(name);
  139. const newVectorStoreDocument = await VectorStoreModel.create({
  140. vectorStoreId: newVectorStore.id,
  141. }) as VectorStoreDocument;
  142. return newVectorStoreDocument;
  143. }
  144. catch (err) {
  145. throw new Error(err);
  146. }
  147. }
  148. // TODO: https://redmine.weseek.co.jp/issues/160332
  149. // TODO: https://redmine.weseek.co.jp/issues/156643
  150. // private async uploadFileByChunks(pageId: Types.ObjectId, body: string, vectorStoreFileRelationsMap: VectorStoreFileRelationsMap) {
  151. // const chunks = await splitMarkdownIntoChunks(body, 'gpt-4o');
  152. // for await (const [index, chunk] of chunks.entries()) {
  153. // try {
  154. // const file = await toFile(Readable.from(chunk), `${pageId}-chunk-${index}.md`);
  155. // const uploadedFile = await this.client.uploadFile(file);
  156. // prepareVectorStoreFileRelations(pageId, uploadedFile.id, vectorStoreFileRelationsMap);
  157. // }
  158. // catch (err) {
  159. // logger.error(err);
  160. // }
  161. // }
  162. // }
  163. private async uploadFile(pageId: Types.ObjectId, pagePath: string, revisionBody: string): Promise<OpenAI.Files.FileObject> {
  164. const convertedHtml = await convertMarkdownToHtml({ pagePath, revisionBody });
  165. const file = await toFile(Readable.from(convertedHtml), `${pageId}.html`);
  166. const uploadedFile = await this.client.uploadFile(file);
  167. return uploadedFile;
  168. }
  169. // TODO: https://redmine.weseek.co.jp/issues/160333
  170. // private async deleteVectorStore(vectorStoreScopeType: VectorStoreScopeType): Promise<void> {
  171. // const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ scopeType: vectorStoreScopeType, isDeleted: false });
  172. // if (vectorStoreDocument == null) {
  173. // return;
  174. // }
  175. // try {
  176. // await this.client.deleteVectorStore(vectorStoreDocument.vectorStoreId);
  177. // await vectorStoreDocument.markAsDeleted();
  178. // }
  179. // catch (err) {
  180. // await oepnaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  181. // throw new Error(err);
  182. // }
  183. // }
  184. async createVectorStoreFile(vectorStoreRelation: VectorStoreDocument, pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
  185. // const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  186. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  187. const processUploadFile = async(page: HydratedDocument<PageDocument>) => {
  188. if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
  189. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  190. const uploadedFile = await this.uploadFile(page._id, page.path, page.revision.body);
  191. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  192. return;
  193. }
  194. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  195. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  196. const uploadedFile = await this.uploadFile(page._id, page.path, pagePopulatedToShowRevision.revision.body);
  197. prepareVectorStoreFileRelations(vectorStoreRelation._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  198. }
  199. }
  200. };
  201. // Start workers to process results
  202. const workers = pages.map(processUploadFile);
  203. // Wait for all processing to complete.
  204. assert(workers.length <= BATCH_SIZE, 'workers.length must be less than or equal to BATCH_SIZE');
  205. const fileUploadResult = await Promise.allSettled(workers);
  206. fileUploadResult.forEach((result) => {
  207. if (result.status === 'rejected') {
  208. logger.error(result.reason);
  209. }
  210. });
  211. const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
  212. const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
  213. if (uploadedFileIds.length === 0) {
  214. return;
  215. }
  216. const pageIds = pages.map(page => page._id);
  217. try {
  218. // Save vector store file relation
  219. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
  220. // Create vector store file
  221. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStoreRelation.vectorStoreId, uploadedFileIds);
  222. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  223. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  224. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  225. }
  226. catch (err) {
  227. logger.error(err);
  228. // Delete all uploaded files if createVectorStoreFileBatch fails
  229. for await (const pageId of pageIds) {
  230. await this.deleteVectorStoreFile(vectorStoreRelation._id, pageId);
  231. }
  232. }
  233. }
  234. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  235. async deleteObsolatedVectorStoreRelations(): Promise<void> {
  236. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  237. if (deletedVectorStoreRelations.length === 0) {
  238. return;
  239. }
  240. const currentVectorStoreRelationIds: Types.ObjectId[] = await VectorStoreFileRelationModel.aggregate([
  241. {
  242. $group: {
  243. _id: '$vectorStoreRelationId',
  244. relationCount: { $sum: 1 },
  245. },
  246. },
  247. { $match: { relationCount: { $gt: 0 } } },
  248. { $project: { _id: 1 } },
  249. ]);
  250. if (currentVectorStoreRelationIds.length === 0) {
  251. return;
  252. }
  253. await VectorStoreModel.deleteMany({ _id: { $nin: currentVectorStoreRelationIds }, isDeleted: true });
  254. }
  255. async deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId, apiCallInterval?: number): Promise<void> {
  256. // Delete vector store file and delete vector store file relation
  257. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ vectorStoreRelationId, page: pageId });
  258. if (vectorStoreFileRelation == null) {
  259. return;
  260. }
  261. const deletedFileIds: string[] = [];
  262. for await (const fileId of vectorStoreFileRelation.fileIds) {
  263. try {
  264. const deleteFileResponse = await this.client.deleteFile(fileId);
  265. logger.debug('Delete vector store file', deleteFileResponse);
  266. deletedFileIds.push(fileId);
  267. if (apiCallInterval != null) {
  268. // sleep
  269. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  270. }
  271. }
  272. catch (err) {
  273. await oepnaiApiErrorHandler(err, { notFoundError: async() => { deletedFileIds.push(fileId) } });
  274. logger.error(err);
  275. }
  276. }
  277. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  278. if (undeletedFileIds.length === 0) {
  279. await vectorStoreFileRelation.remove();
  280. return;
  281. }
  282. vectorStoreFileRelation.fileIds = undeletedFileIds;
  283. await vectorStoreFileRelation.save();
  284. }
  285. async deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void> {
  286. // Retrieves all VectorStore documents that are marked as deleted
  287. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  288. if (deletedVectorStoreRelations.length === 0) {
  289. return;
  290. }
  291. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  292. const obsoleteVectorStoreFileRelations = await VectorStoreFileRelationModel.find(
  293. { vectorStoreRelationId: { $in: deletedVectorStoreRelations.map(deletedVectorStoreRelation => deletedVectorStoreRelation._id) } },
  294. ).limit(limit);
  295. if (obsoleteVectorStoreFileRelations.length === 0) {
  296. return;
  297. }
  298. // Delete obsolete VectorStoreFile
  299. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  300. try {
  301. await this.deleteVectorStoreFile(vectorStoreFileRelation.vectorStoreRelationId, vectorStoreFileRelation.page, apiCallInterval);
  302. }
  303. catch (err) {
  304. logger.error(err);
  305. }
  306. }
  307. }
  308. // TODO: https://redmine.weseek.co.jp/issues/160332
  309. // async rebuildVectorStoreAll() {
  310. // await this.deleteVectorStore(VectorStoreScopeType.PUBLIC);
  311. // // Create all public pages VectorStoreFile
  312. // const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  313. // const pagesStream = Page.find({ grant: PageGrant.GRANT_PUBLIC }).populate('revision').cursor({ batch_size: BATCH_SIZE });
  314. // const batchStrem = createBatchStream(BATCH_SIZE);
  315. // const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  316. // const createVectorStoreFileStream = new Transform({
  317. // objectMode: true,
  318. // async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  319. // await createVectorStoreFile(chunk);
  320. // this.push(chunk);
  321. // callback();
  322. // },
  323. // });
  324. // await pipeline(pagesStream, batchStrem, createVectorStoreFileStream);
  325. // }
  326. // async rebuildVectorStore(page: HydratedDocument<PageDocument>) {
  327. // const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  328. // await this.deleteVectorStoreFile(vectorStore._id, page._id);
  329. // await this.createVectorStoreFile([page]);
  330. // }
  331. private async createVectorStoreFileWithStream(vectorStoreRelation: VectorStoreDocument, conditions: mongoose.FilterQuery<PageDocument>): Promise<void> {
  332. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  333. const pagesStream = Page.find({ ...conditions })
  334. .populate('revision')
  335. .cursor({ batchSize: BATCH_SIZE });
  336. const batchStream = createBatchStream(BATCH_SIZE);
  337. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  338. const createVectorStoreFileStream = new Transform({
  339. objectMode: true,
  340. async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  341. try {
  342. await createVectorStoreFile(vectorStoreRelation, chunk);
  343. this.push(chunk);
  344. callback();
  345. }
  346. catch (error) {
  347. callback(error);
  348. }
  349. },
  350. });
  351. await pipeline(pagesStream, batchStream, createVectorStoreFileStream);
  352. }
  353. private async createConditionForCreateAiAssistant(
  354. owner: AiAssistant['owner'],
  355. accessScope: AiAssistant['accessScope'],
  356. grantedGroups: AiAssistant['grantedGroups'],
  357. pagePathPatterns: AiAssistant['pagePathPatterns'],
  358. ): Promise<mongoose.FilterQuery<PageDocument>> {
  359. const converterdPagePatgPatterns = convertPathPatternsToRegExp(pagePathPatterns);
  360. if (accessScope === AiAssistantAccessScope.PUBLIC_ONLY) {
  361. return {
  362. grant: PageGrant.GRANT_PUBLIC,
  363. path: { $in: converterdPagePatgPatterns },
  364. };
  365. }
  366. if (accessScope === AiAssistantAccessScope.GROUPS) {
  367. if (grantedGroups == null || grantedGroups.length === 0) {
  368. throw new Error('grantedGroups is required when accessScope is GROUPS');
  369. }
  370. const extractedGrantedGroupIds = grantedGroups.map(group => getIdForRef(group.item).toString());
  371. const extractedOwnerGroupIds = [
  372. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  373. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  374. ].map(group => group.toString());
  375. // Check if the owner belongs to the group specified in grantedGroups
  376. const isValid = extractedGrantedGroupIds.every(groupId => extractedOwnerGroupIds.includes(groupId));
  377. if (!isValid) {
  378. throw new Error('A group to which the owner does not belong is specified.');
  379. }
  380. return {
  381. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP] },
  382. path: { $in: converterdPagePatgPatterns },
  383. $or: [
  384. { 'grantedGroups.item': { $in: extractedGrantedGroupIds } },
  385. { grant: PageGrant.GRANT_PUBLIC },
  386. ],
  387. };
  388. }
  389. if (accessScope === AiAssistantAccessScope.OWNER) {
  390. const ownerUserGroup = [
  391. ...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  392. ...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
  393. ].map(group => group.toString());
  394. return {
  395. grant: { $in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP, PageGrant.GRANT_OWNER] },
  396. path: { $in: converterdPagePatgPatterns },
  397. $or: [
  398. { 'grantedGroups.item': { $in: ownerUserGroup } },
  399. { grantedUsers: { $in: [getIdForRef(owner)] } },
  400. { grant: PageGrant.GRANT_PUBLIC },
  401. ],
  402. };
  403. }
  404. throw new Error('Invalid accessScope value');
  405. }
  406. async createAiAssistant(data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument> {
  407. const conditions = await this.createConditionForCreateAiAssistant(data.owner, data.accessScope, data.grantedGroups, data.pagePathPatterns);
  408. const vectorStoreRelation = await this.createVectorStore(data.name);
  409. const aiAssistant = await AiAssistantModel.create({
  410. ...data, vectorStore: vectorStoreRelation,
  411. });
  412. // VectorStore creation process does not await
  413. this.createVectorStoreFileWithStream(vectorStoreRelation, conditions);
  414. return aiAssistant;
  415. }
  416. }
  417. let instance: OpenaiService;
  418. export const getOpenaiService = (): IOpenaiService | undefined => {
  419. if (instance != null) {
  420. return instance;
  421. }
  422. const aiEnabled = configManager.getConfig('crowi', 'app:aiEnabled');
  423. const openaiServiceType = configManager.getConfig('crowi', 'openai:serviceType');
  424. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  425. instance = new OpenaiService();
  426. return instance;
  427. }
  428. return;
  429. };