openai.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. import assert from 'node:assert';
  2. import { Readable, Transform } from 'stream';
  3. import { pipeline } from 'stream/promises';
  4. import type { IPagePopulatedToShowRevision } from '@growi/core';
  5. import { PageGrant, isPopulated } from '@growi/core';
  6. import type { HydratedDocument, Types } from 'mongoose';
  7. import mongoose from 'mongoose';
  8. import type OpenAI from 'openai';
  9. import { toFile } from 'openai';
  10. import ThreadRelationModel from '~/features/openai/server/models/thread-relation';
  11. import VectorStoreModel, { VectorStoreScopeType, type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
  12. import VectorStoreFileRelationModel, {
  13. type VectorStoreFileRelation,
  14. prepareVectorStoreFileRelations,
  15. } from '~/features/openai/server/models/vector-store-file-relation';
  16. import type { PageDocument, PageModel } from '~/server/models/page';
  17. import { configManager } from '~/server/service/config-manager';
  18. import { createBatchStream } from '~/server/util/batch-stream';
  19. import loggerFactory from '~/utils/logger';
  20. import { OpenaiServiceTypes } from '../../interfaces/ai';
  21. import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
  22. import { getClient } from './client-delegator';
  23. // import { splitMarkdownIntoChunks } from './markdown-splitter/markdown-token-splitter';
  24. import { oepnaiApiErrorHandler } from './openai-api-error-handler';
  25. const BATCH_SIZE = 100;
  26. const logger = loggerFactory('growi:service:openai');
  27. let isVectorStoreForPublicScopeExist = false;
  28. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
  29. const isPagePopulatedToShowRevision = (page: HydratedDocument<PageDocument>): page is IPagePopulatedToShowRevision & PageDocument => {
  30. if (page?.revision != null && !isPopulated(page.revision)) {
  31. return false;
  32. }
  33. return true;
  34. };
  35. export interface IOpenaiService {
  36. getOrCreateThread(userId: string, vectorStoreId?: string, threadId?: string): Promise<OpenAI.Beta.Threads.Thread | undefined>;
  37. getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
  38. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  39. deleteObsolatedVectorStoreRelations(): Promise<void> // for CronJob
  40. createVectorStoreFile(pages: PageDocument[]): Promise<void>;
  41. deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId): Promise<void>;
  42. deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  43. rebuildVectorStoreAll(): Promise<void>;
  44. rebuildVectorStore(page: HydratedDocument<PageDocument>): Promise<void>;
  45. }
  46. class OpenaiService implements IOpenaiService {
  47. private get client() {
  48. const openaiServiceType = configManager.getConfig('crowi', 'openai:serviceType');
  49. return getClient({ openaiServiceType });
  50. }
  51. public async getOrCreateThread(userId: string, vectorStoreId?: string, threadId?: string): Promise<OpenAI.Beta.Threads.Thread> {
  52. if (vectorStoreId != null && threadId == null) {
  53. try {
  54. const thread = await this.client.createThread(vectorStoreId);
  55. await ThreadRelationModel.create({ userId, threadId: thread.id });
  56. return thread;
  57. }
  58. catch (err) {
  59. throw new Error(err);
  60. }
  61. }
  62. const threadRelation = await ThreadRelationModel.findOne({ threadId });
  63. if (threadRelation == null) {
  64. throw new Error('ThreadRelation document is not exists');
  65. }
  66. // Check if a thread entity exists
  67. // If the thread entity does not exist, the thread-relation document is deleted
  68. try {
  69. const thread = await this.client.retrieveThread(threadRelation.threadId);
  70. // Update expiration date if thread entity exists
  71. await threadRelation.updateThreadExpiration();
  72. return thread;
  73. }
  74. catch (err) {
  75. await oepnaiApiErrorHandler(err, { notFoundError: async() => { await threadRelation.remove() } });
  76. throw new Error(err);
  77. }
  78. }
  79. public async deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void> {
  80. const expiredThreadRelations = await ThreadRelationModel.getExpiredThreadRelations(limit);
  81. if (expiredThreadRelations == null) {
  82. return;
  83. }
  84. const deletedThreadIds: string[] = [];
  85. for await (const expiredThreadRelation of expiredThreadRelations) {
  86. try {
  87. const deleteThreadResponse = await this.client.deleteThread(expiredThreadRelation.threadId);
  88. logger.debug('Delete thread', deleteThreadResponse);
  89. deletedThreadIds.push(expiredThreadRelation.threadId);
  90. // sleep
  91. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  92. }
  93. catch (err) {
  94. logger.error(err);
  95. }
  96. }
  97. await ThreadRelationModel.deleteMany({ threadId: { $in: deletedThreadIds } });
  98. }
  99. public async getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument> {
  100. const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ scopeType: VectorStoreScopeType.PUBLIC, isDeleted: false });
  101. if (vectorStoreDocument != null && isVectorStoreForPublicScopeExist) {
  102. return vectorStoreDocument;
  103. }
  104. if (vectorStoreDocument != null && !isVectorStoreForPublicScopeExist) {
  105. try {
  106. // Check if vector store entity exists
  107. // If the vector store entity does not exist, the vector store document is deleted
  108. await this.client.retrieveVectorStore(vectorStoreDocument.vectorStoreId);
  109. isVectorStoreForPublicScopeExist = true;
  110. return vectorStoreDocument;
  111. }
  112. catch (err) {
  113. await oepnaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  114. throw new Error(err);
  115. }
  116. }
  117. const newVectorStore = await this.client.createVectorStore(VectorStoreScopeType.PUBLIC);
  118. const newVectorStoreDocument = await VectorStoreModel.create({
  119. vectorStoreId: newVectorStore.id,
  120. scopeType: VectorStoreScopeType.PUBLIC,
  121. }) as VectorStoreDocument;
  122. isVectorStoreForPublicScopeExist = true;
  123. return newVectorStoreDocument;
  124. }
  125. // TODO: https://redmine.weseek.co.jp/issues/156643
  126. // private async uploadFileByChunks(pageId: Types.ObjectId, body: string, vectorStoreFileRelationsMap: VectorStoreFileRelationsMap) {
  127. // const chunks = await splitMarkdownIntoChunks(body, 'gpt-4o');
  128. // for await (const [index, chunk] of chunks.entries()) {
  129. // try {
  130. // const file = await toFile(Readable.from(chunk), `${pageId}-chunk-${index}.md`);
  131. // const uploadedFile = await this.client.uploadFile(file);
  132. // prepareVectorStoreFileRelations(pageId, uploadedFile.id, vectorStoreFileRelationsMap);
  133. // }
  134. // catch (err) {
  135. // logger.error(err);
  136. // }
  137. // }
  138. // }
  139. private async uploadFile(page: IPagePopulatedToShowRevision): Promise<OpenAI.Files.FileObject> {
  140. const convertedHtml = await convertMarkdownToHtml(page);
  141. const file = await toFile(Readable.from(convertedHtml), `${page._id}.html`);
  142. const uploadedFile = await this.client.uploadFile(file);
  143. return uploadedFile;
  144. }
  145. private async deleteVectorStore(vectorStoreScopeType: VectorStoreScopeType): Promise<void> {
  146. const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ scopeType: vectorStoreScopeType, isDeleted: false });
  147. if (vectorStoreDocument == null) {
  148. return;
  149. }
  150. try {
  151. await this.client.deleteVectorStore(vectorStoreDocument.vectorStoreId);
  152. await vectorStoreDocument.markAsDeleted();
  153. }
  154. catch (err) {
  155. await oepnaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  156. throw new Error(err);
  157. }
  158. }
  159. async createVectorStoreFile(pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
  160. const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  161. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  162. const processUploadFile = async(page: HydratedDocument<PageDocument>) => {
  163. if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
  164. if (isPagePopulatedToShowRevision(page)) {
  165. if (page.revision.body.length > 0) {
  166. return;
  167. }
  168. const uploadedFile = await this.uploadFile(page);
  169. prepareVectorStoreFileRelations(vectorStore._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  170. return;
  171. }
  172. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  173. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  174. const uploadedFile = await this.uploadFile(pagePopulatedToShowRevision);
  175. prepareVectorStoreFileRelations(vectorStore._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  176. }
  177. }
  178. };
  179. // Start workers to process results
  180. const workers = pages.map(processUploadFile);
  181. // Wait for all processing to complete.
  182. assert(workers.length <= BATCH_SIZE, 'workers.length must be less than or equal to BATCH_SIZE');
  183. const fileUploadResult = await Promise.allSettled(workers);
  184. fileUploadResult.forEach((result) => {
  185. if (result.status === 'rejected') {
  186. logger.error(result.reason);
  187. }
  188. });
  189. const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
  190. const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
  191. if (uploadedFileIds.length === 0) {
  192. return;
  193. }
  194. const pageIds = pages.map(page => page._id);
  195. try {
  196. // Save vector store file relation
  197. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
  198. // Create vector store file
  199. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStore.vectorStoreId, uploadedFileIds);
  200. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  201. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  202. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  203. }
  204. catch (err) {
  205. logger.error(err);
  206. // Delete all uploaded files if createVectorStoreFileBatch fails
  207. for await (const pageId of pageIds) {
  208. await this.deleteVectorStoreFile(vectorStore._id, pageId);
  209. }
  210. }
  211. }
  212. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  213. async deleteObsolatedVectorStoreRelations(): Promise<void> {
  214. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  215. if (deletedVectorStoreRelations.length === 0) {
  216. return;
  217. }
  218. const currentVectorStoreRelationIds: Types.ObjectId[] = await VectorStoreFileRelationModel.aggregate([
  219. {
  220. $group: {
  221. _id: '$vectorStoreRelationId',
  222. relationCount: { $sum: 1 },
  223. },
  224. },
  225. { $match: { relationCount: { $gt: 0 } } },
  226. { $project: { _id: 1 } },
  227. ]);
  228. if (currentVectorStoreRelationIds.length === 0) {
  229. return;
  230. }
  231. await VectorStoreModel.deleteMany({ _id: { $nin: currentVectorStoreRelationIds }, isDeleted: true });
  232. }
  233. async deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId, apiCallInterval?: number): Promise<void> {
  234. // Delete vector store file and delete vector store file relation
  235. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ vectorStoreRelationId, page: pageId });
  236. if (vectorStoreFileRelation == null) {
  237. return;
  238. }
  239. const deletedFileIds: string[] = [];
  240. for await (const fileId of vectorStoreFileRelation.fileIds) {
  241. try {
  242. const deleteFileResponse = await this.client.deleteFile(fileId);
  243. logger.debug('Delete vector store file', deleteFileResponse);
  244. deletedFileIds.push(fileId);
  245. if (apiCallInterval != null) {
  246. // sleep
  247. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  248. }
  249. }
  250. catch (err) {
  251. await oepnaiApiErrorHandler(err, { notFoundError: async() => { deletedFileIds.push(fileId) } });
  252. logger.error(err);
  253. }
  254. }
  255. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  256. if (undeletedFileIds.length === 0) {
  257. await vectorStoreFileRelation.remove();
  258. return;
  259. }
  260. vectorStoreFileRelation.fileIds = undeletedFileIds;
  261. await vectorStoreFileRelation.save();
  262. }
  263. async deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void> {
  264. // Retrieves all VectorStore documents that are marked as deleted
  265. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  266. if (deletedVectorStoreRelations.length === 0) {
  267. return;
  268. }
  269. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  270. const obsoleteVectorStoreFileRelations = await VectorStoreFileRelationModel.find(
  271. { vectorStoreRelationId: { $in: deletedVectorStoreRelations.map(deletedVectorStoreRelation => deletedVectorStoreRelation._id) } },
  272. ).limit(limit);
  273. if (obsoleteVectorStoreFileRelations.length === 0) {
  274. return;
  275. }
  276. // Delete obsolete VectorStoreFile
  277. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  278. try {
  279. await this.deleteVectorStoreFile(vectorStoreFileRelation.vectorStoreRelationId, vectorStoreFileRelation.page, apiCallInterval);
  280. }
  281. catch (err) {
  282. logger.error(err);
  283. }
  284. }
  285. }
  286. async rebuildVectorStoreAll() {
  287. await this.deleteVectorStore(VectorStoreScopeType.PUBLIC);
  288. // Create all public pages VectorStoreFile
  289. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  290. const pagesStream = Page.find({ grant: PageGrant.GRANT_PUBLIC }).populate('revision').cursor({ batch_size: BATCH_SIZE });
  291. const batchStrem = createBatchStream(BATCH_SIZE);
  292. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  293. const createVectorStoreFileStream = new Transform({
  294. objectMode: true,
  295. async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  296. await createVectorStoreFile(chunk);
  297. this.push(chunk);
  298. callback();
  299. },
  300. });
  301. await pipeline(pagesStream, batchStrem, createVectorStoreFileStream);
  302. }
  303. async rebuildVectorStore(page: HydratedDocument<PageDocument>) {
  304. const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  305. await this.deleteVectorStoreFile(vectorStore._id, page._id);
  306. await this.createVectorStoreFile([page]);
  307. }
  308. }
  309. let instance: OpenaiService;
  310. export const getOpenaiService = (): IOpenaiService | undefined => {
  311. if (instance != null) {
  312. return instance;
  313. }
  314. const aiEnabled = configManager.getConfig('crowi', 'app:aiEnabled');
  315. const openaiServiceType = configManager.getConfig('crowi', 'openai:serviceType');
  316. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  317. instance = new OpenaiService();
  318. return instance;
  319. }
  320. return;
  321. };