openai.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. import assert from 'node:assert';
  2. import { Readable, Transform } from 'stream';
  3. import { PageGrant, isPopulated } from '@growi/core';
  4. import type { HydratedDocument, Types } from 'mongoose';
  5. import mongoose from 'mongoose';
  6. import type OpenAI from 'openai';
  7. import { toFile } from 'openai';
  8. import ThreadRelationModel from '~/features/openai/server/models/thread-relation';
  9. import VectorStoreModel, { VectorStoreScopeType, type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
  10. import VectorStoreFileRelationModel, {
  11. type VectorStoreFileRelation,
  12. prepareVectorStoreFileRelations,
  13. } from '~/features/openai/server/models/vector-store-file-relation';
  14. import type { PageDocument, PageModel } from '~/server/models/page';
  15. import { configManager } from '~/server/service/config-manager';
  16. import { createBatchStream } from '~/server/util/batch-stream';
  17. import loggerFactory from '~/utils/logger';
  18. import { OpenaiServiceTypes } from '../../interfaces/ai';
  19. import { getClient } from './client-delegator';
  20. // import { splitMarkdownIntoChunks } from './markdown-splitter/markdown-token-splitter';
  21. import { oepnaiApiErrorHandler } from './openai-api-error-handler';
  22. const BATCH_SIZE = 100;
  23. const logger = loggerFactory('growi:service:openai');
  24. let isVectorStoreForPublicScopeExist = false;
  25. type VectorStoreFileRelationsMap = Map<string, VectorStoreFileRelation>
  26. export interface IOpenaiService {
  27. getOrCreateThread(userId: string, vectorStoreId?: string, threadId?: string): Promise<OpenAI.Beta.Threads.Thread | undefined>;
  28. getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
  29. deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  30. deleteObsolatedVectorStoreRelations(): Promise<void> // for CronJob
  31. createVectorStoreFile(pages: PageDocument[]): Promise<void>;
  32. deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId): Promise<void>;
  33. deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void>; // for CronJob
  34. rebuildVectorStoreAll(): Promise<void>;
  35. rebuildVectorStore(page: HydratedDocument<PageDocument>): Promise<void>;
  36. }
  37. class OpenaiService implements IOpenaiService {
  38. private get client() {
  39. const openaiServiceType = configManager.getConfig('crowi', 'openai:serviceType');
  40. return getClient({ openaiServiceType });
  41. }
  42. public async getOrCreateThread(userId: string, vectorStoreId?: string, threadId?: string): Promise<OpenAI.Beta.Threads.Thread> {
  43. if (vectorStoreId != null && threadId == null) {
  44. try {
  45. const thread = await this.client.createThread(vectorStoreId);
  46. await ThreadRelationModel.create({ userId, threadId: thread.id });
  47. return thread;
  48. }
  49. catch (err) {
  50. throw new Error(err);
  51. }
  52. }
  53. const threadRelation = await ThreadRelationModel.findOne({ threadId });
  54. if (threadRelation == null) {
  55. throw new Error('ThreadRelation document is not exists');
  56. }
  57. // Check if a thread entity exists
  58. // If the thread entity does not exist, the thread-relation document is deleted
  59. try {
  60. const thread = await this.client.retrieveThread(threadRelation.threadId);
  61. // Update expiration date if thread entity exists
  62. await threadRelation.updateThreadExpiration();
  63. return thread;
  64. }
  65. catch (err) {
  66. await oepnaiApiErrorHandler(err, { notFoundError: async() => { await threadRelation.remove() } });
  67. throw new Error(err);
  68. }
  69. }
  70. public async deleteExpiredThreads(limit: number, apiCallInterval: number): Promise<void> {
  71. const expiredThreadRelations = await ThreadRelationModel.getExpiredThreadRelations(limit);
  72. if (expiredThreadRelations == null) {
  73. return;
  74. }
  75. const deletedThreadIds: string[] = [];
  76. for await (const expiredThreadRelation of expiredThreadRelations) {
  77. try {
  78. const deleteThreadResponse = await this.client.deleteThread(expiredThreadRelation.threadId);
  79. logger.debug('Delete thread', deleteThreadResponse);
  80. deletedThreadIds.push(expiredThreadRelation.threadId);
  81. // sleep
  82. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  83. }
  84. catch (err) {
  85. logger.error(err);
  86. }
  87. }
  88. await ThreadRelationModel.deleteMany({ threadId: { $in: deletedThreadIds } });
  89. }
  90. public async getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument> {
  91. const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ scopeType: VectorStoreScopeType.PUBLIC, isDeleted: false });
  92. if (vectorStoreDocument != null && isVectorStoreForPublicScopeExist) {
  93. return vectorStoreDocument;
  94. }
  95. if (vectorStoreDocument != null && !isVectorStoreForPublicScopeExist) {
  96. try {
  97. // Check if vector store entity exists
  98. // If the vector store entity does not exist, the vector store document is deleted
  99. await this.client.retrieveVectorStore(vectorStoreDocument.vectorStoreId);
  100. isVectorStoreForPublicScopeExist = true;
  101. return vectorStoreDocument;
  102. }
  103. catch (err) {
  104. await oepnaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  105. throw new Error(err);
  106. }
  107. }
  108. const newVectorStore = await this.client.createVectorStore(VectorStoreScopeType.PUBLIC);
  109. const newVectorStoreDocument = await VectorStoreModel.create({
  110. vectorStoreId: newVectorStore.id,
  111. scopeType: VectorStoreScopeType.PUBLIC,
  112. }) as VectorStoreDocument;
  113. isVectorStoreForPublicScopeExist = true;
  114. return newVectorStoreDocument;
  115. }
  116. // TODO: https://redmine.weseek.co.jp/issues/156643
  117. // private async uploadFileByChunks(pageId: Types.ObjectId, body: string, vectorStoreFileRelationsMap: VectorStoreFileRelationsMap) {
  118. // const chunks = await splitMarkdownIntoChunks(body, 'gpt-4o');
  119. // for await (const [index, chunk] of chunks.entries()) {
  120. // try {
  121. // const file = await toFile(Readable.from(chunk), `${pageId}-chunk-${index}.md`);
  122. // const uploadedFile = await this.client.uploadFile(file);
  123. // prepareVectorStoreFileRelations(pageId, uploadedFile.id, vectorStoreFileRelationsMap);
  124. // }
  125. // catch (err) {
  126. // logger.error(err);
  127. // }
  128. // }
  129. // }
  130. private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
  131. const file = await toFile(Readable.from(body), `${pageId}.md`);
  132. const uploadedFile = await this.client.uploadFile(file);
  133. return uploadedFile;
  134. }
  135. private async deleteVectorStore(vectorStoreScopeType: VectorStoreScopeType): Promise<void> {
  136. const vectorStoreDocument: VectorStoreDocument | null = await VectorStoreModel.findOne({ scopeType: vectorStoreScopeType, isDeleted: false });
  137. if (vectorStoreDocument == null) {
  138. return;
  139. }
  140. try {
  141. await this.client.deleteVectorStore(vectorStoreDocument.vectorStoreId);
  142. await vectorStoreDocument.markAsDeleted();
  143. }
  144. catch (err) {
  145. await oepnaiApiErrorHandler(err, { notFoundError: vectorStoreDocument.markAsDeleted });
  146. throw new Error(err);
  147. }
  148. }
  149. async createVectorStoreFile(pages: Array<HydratedDocument<PageDocument>>): Promise<void> {
  150. const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  151. const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
  152. const processUploadFile = async(page: PageDocument) => {
  153. if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
  154. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  155. const uploadedFile = await this.uploadFile(page._id, page.revision.body);
  156. prepareVectorStoreFileRelations(vectorStore._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  157. return;
  158. }
  159. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  160. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  161. const uploadedFile = await this.uploadFile(page._id, pagePopulatedToShowRevision.revision.body);
  162. prepareVectorStoreFileRelations(vectorStore._id, page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  163. }
  164. }
  165. };
  166. // Start workers to process results
  167. const workers = pages.map(processUploadFile);
  168. // Wait for all processing to complete.
  169. assert(workers.length <= BATCH_SIZE, 'workers.length must be less than or equal to BATCH_SIZE');
  170. const fileUploadResult = await Promise.allSettled(workers);
  171. fileUploadResult.forEach((result) => {
  172. if (result.status === 'rejected') {
  173. logger.error(result.reason);
  174. }
  175. });
  176. const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
  177. const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
  178. if (uploadedFileIds.length === 0) {
  179. return;
  180. }
  181. const pageIds = pages.map(page => page._id);
  182. try {
  183. // Save vector store file relation
  184. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
  185. // Create vector store file
  186. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStore.vectorStoreId, uploadedFileIds);
  187. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  188. // Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
  189. await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
  190. }
  191. catch (err) {
  192. logger.error(err);
  193. // Delete all uploaded files if createVectorStoreFileBatch fails
  194. for await (const pageId of pageIds) {
  195. await this.deleteVectorStoreFile(vectorStore._id, pageId);
  196. }
  197. }
  198. }
  199. // Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
  200. async deleteObsolatedVectorStoreRelations(): Promise<void> {
  201. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  202. if (deletedVectorStoreRelations.length === 0) {
  203. return;
  204. }
  205. const currentVectorStoreRelationIds: Types.ObjectId[] = await VectorStoreFileRelationModel.aggregate([
  206. {
  207. $group: {
  208. _id: '$vectorStoreRelationId',
  209. relationCount: { $sum: 1 },
  210. },
  211. },
  212. { $match: { relationCount: { $gt: 0 } } },
  213. { $project: { _id: 1 } },
  214. ]);
  215. if (currentVectorStoreRelationIds.length === 0) {
  216. return;
  217. }
  218. await VectorStoreModel.deleteMany({ _id: { $nin: currentVectorStoreRelationIds }, isDeleted: true });
  219. }
  220. async deleteVectorStoreFile(vectorStoreRelationId: Types.ObjectId, pageId: Types.ObjectId, apiCallInterval?: number): Promise<void> {
  221. // Delete vector store file and delete vector store file relation
  222. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ vectorStoreRelationId, pageId });
  223. if (vectorStoreFileRelation == null) {
  224. return;
  225. }
  226. const deletedFileIds: string[] = [];
  227. for await (const fileId of vectorStoreFileRelation.fileIds) {
  228. try {
  229. const deleteFileResponse = await this.client.deleteFile(fileId);
  230. logger.debug('Delete vector store file', deleteFileResponse);
  231. deletedFileIds.push(fileId);
  232. if (apiCallInterval != null) {
  233. // sleep
  234. await new Promise(resolve => setTimeout(resolve, apiCallInterval));
  235. }
  236. }
  237. catch (err) {
  238. await oepnaiApiErrorHandler(err, { notFoundError: async() => { deletedFileIds.push(fileId) } });
  239. logger.error(err);
  240. }
  241. }
  242. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  243. if (undeletedFileIds.length === 0) {
  244. await vectorStoreFileRelation.remove();
  245. return;
  246. }
  247. vectorStoreFileRelation.fileIds = undeletedFileIds;
  248. await vectorStoreFileRelation.save();
  249. }
  250. async deleteObsoleteVectorStoreFile(limit: number, apiCallInterval: number): Promise<void> {
  251. // Retrieves all VectorStore documents that are marked as deleted
  252. const deletedVectorStoreRelations = await VectorStoreModel.find({ isDeleted: true });
  253. if (deletedVectorStoreRelations.length === 0) {
  254. return;
  255. }
  256. // Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
  257. const obsoleteVectorStoreFileRelations = await VectorStoreFileRelationModel.find(
  258. { vectorStoreRelationId: { $in: deletedVectorStoreRelations.map(deletedVectorStoreRelation => deletedVectorStoreRelation._id) } },
  259. ).limit(limit);
  260. if (obsoleteVectorStoreFileRelations.length === 0) {
  261. return;
  262. }
  263. // Delete obsolete VectorStoreFile
  264. for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
  265. try {
  266. await this.deleteVectorStoreFile(vectorStoreFileRelation.vectorStoreRelationId, vectorStoreFileRelation.pageId, apiCallInterval);
  267. }
  268. catch (err) {
  269. logger.error(err);
  270. }
  271. }
  272. }
  273. async rebuildVectorStoreAll() {
  274. await this.deleteVectorStore(VectorStoreScopeType.PUBLIC);
  275. // Create all public pages VectorStoreFile
  276. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  277. const pagesStream = Page.find({ grant: PageGrant.GRANT_PUBLIC }).populate('revision').cursor({ batch_size: BATCH_SIZE });
  278. const batchStrem = createBatchStream(BATCH_SIZE);
  279. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  280. const createVectorStoreFileStream = new Transform({
  281. objectMode: true,
  282. async transform(chunk: HydratedDocument<PageDocument>[], encoding, callback) {
  283. await createVectorStoreFile(chunk);
  284. this.push(chunk);
  285. callback();
  286. },
  287. });
  288. pagesStream
  289. .pipe(batchStrem)
  290. .pipe(createVectorStoreFileStream);
  291. }
  292. async rebuildVectorStore(page: HydratedDocument<PageDocument>) {
  293. const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  294. await this.deleteVectorStoreFile(vectorStore._id, page._id);
  295. await this.createVectorStoreFile([page]);
  296. }
  297. }
  298. let instance: OpenaiService;
  299. export const getOpenaiService = (): IOpenaiService | undefined => {
  300. if (instance != null) {
  301. return instance;
  302. }
  303. const aiEnabled = configManager.getConfig('crowi', 'app:aiEnabled');
  304. const openaiServiceType = configManager.getConfig('crowi', 'openai:serviceType');
  305. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  306. instance = new OpenaiService();
  307. return instance;
  308. }
  309. return;
  310. };