openai.ts 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. import assert from 'node:assert';
  2. import { Readable, Transform } from 'stream';
  3. import { PageGrant, isPopulated } from '@growi/core';
  4. import type { HydratedDocument, Types } from 'mongoose';
  5. import mongoose from 'mongoose';
  6. import type OpenAI from 'openai';
  7. import { toFile } from 'openai';
  8. import VectorStoreModel, { VectorStoreScopeType, type VectorStoreDocument } from '~/features/openai/server/models/vector-store';
  9. import VectorStoreFileRelationModel, {
  10. type VectorStoreFileRelation,
  11. prepareVectorStoreFileRelations,
  12. } from '~/features/openai/server/models/vector-store-file-relation';
  13. import { OpenaiServiceTypes } from '~/interfaces/ai';
  14. import type { PageDocument, PageModel } from '~/server/models/page';
  15. import { configManager } from '~/server/service/config-manager';
  16. import { createBatchStream } from '~/server/util/batch-stream';
  17. import loggerFactory from '~/utils/logger';
  18. import { getClient } from './client-delegator';
  19. const BATCH_SIZE = 100;
  20. const logger = loggerFactory('growi:service:openai');
  21. let isVectorStoreForPublicScopeExist = false;
  22. export interface IOpenaiService {
  23. getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument>;
  24. createVectorStoreFile(pages: PageDocument[]): Promise<void>;
  25. deleteVectorStoreFile(pageId: Types.ObjectId): Promise<void>;
  26. rebuildVectorStoreAll(): Promise<void>;
  27. rebuildVectorStore(page: HydratedDocument<PageDocument>): Promise<void>;
  28. }
  29. class OpenaiService implements IOpenaiService {
  30. private get client() {
  31. const openaiServiceType = configManager.getConfig('crowi', 'openai:serviceType');
  32. return getClient({ openaiServiceType });
  33. }
  34. public async getOrCreateVectorStoreForPublicScope(): Promise<VectorStoreDocument> {
  35. const vectorStoreDocument = await VectorStoreModel.findOne({ scorpeType: VectorStoreScopeType.PUBLIC });
  36. if (vectorStoreDocument != null && isVectorStoreForPublicScopeExist) {
  37. return vectorStoreDocument;
  38. }
  39. if (vectorStoreDocument != null && !isVectorStoreForPublicScopeExist) {
  40. try {
  41. const vectorStore = await this.client.retrieveVectorStore(vectorStoreDocument.vectorStoreId);
  42. if (vectorStore != null) {
  43. isVectorStoreForPublicScopeExist = true;
  44. return vectorStoreDocument;
  45. }
  46. }
  47. catch (err) {
  48. logger.error(err);
  49. }
  50. }
  51. const newVectorStore = await this.client.createVectorStore(VectorStoreScopeType.PUBLIC);
  52. const newVectorStoreDocument = await VectorStoreModel.create({
  53. vectorStoreId: newVectorStore.id,
  54. scorpeType: VectorStoreScopeType.PUBLIC,
  55. });
  56. isVectorStoreForPublicScopeExist = true;
  57. return newVectorStoreDocument;
  58. }
  59. private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
  60. const file = await toFile(Readable.from(body), `${pageId}.md`);
  61. const uploadedFile = await this.client.uploadFile(file);
  62. return uploadedFile;
  63. }
  64. async createVectorStoreFile(pages: Array<PageDocument>): Promise<void> {
  65. const vectorStoreFileRelationsMap: Map<string, VectorStoreFileRelation> = new Map();
  66. const processUploadFile = async(page: PageDocument) => {
  67. if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
  68. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  69. const uploadedFile = await this.uploadFile(page._id, page.revision.body);
  70. prepareVectorStoreFileRelations(page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  71. return;
  72. }
  73. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  74. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  75. const uploadedFile = await this.uploadFile(page._id, pagePopulatedToShowRevision.revision.body);
  76. prepareVectorStoreFileRelations(page._id, uploadedFile.id, vectorStoreFileRelationsMap);
  77. }
  78. }
  79. };
  80. // Start workers to process results
  81. const workers = pages.map(processUploadFile);
  82. // Wait for all processing to complete.
  83. assert(workers.length <= BATCH_SIZE, 'workers.length must be less than or equal to BATCH_SIZE');
  84. const fileUploadResult = await Promise.allSettled(workers);
  85. fileUploadResult.forEach((result) => {
  86. if (result.status === 'rejected') {
  87. logger.error(result.reason);
  88. }
  89. });
  90. const vectorStoreFileRelations = Array.from(vectorStoreFileRelationsMap.values());
  91. const uploadedFileIds = vectorStoreFileRelations.map(data => data.fileIds).flat();
  92. if (uploadedFileIds.length === 0) {
  93. return;
  94. }
  95. try {
  96. // Create vector store file
  97. const vectorStore = await this.getOrCreateVectorStoreForPublicScope();
  98. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(vectorStore.vectorStoreId, uploadedFileIds);
  99. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  100. // Save vector store file relation
  101. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(vectorStoreFileRelations);
  102. }
  103. catch (err) {
  104. logger.error(err);
  105. // Delete all uploaded files if createVectorStoreFileBatch fails
  106. uploadedFileIds.forEach(async(fileId) => {
  107. const deleteFileResponse = await this.client.deleteFile(fileId);
  108. logger.debug('Delete vector store file (Due to createVectorStoreFileBatch failure)', deleteFileResponse);
  109. });
  110. }
  111. }
  112. async deleteVectorStoreFile(pageId: Types.ObjectId): Promise<void> {
  113. // Delete vector store file and delete vector store file relation
  114. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ pageId });
  115. if (vectorStoreFileRelation == null) {
  116. return;
  117. }
  118. const deletedFileIds: string[] = [];
  119. for (const fileId of vectorStoreFileRelation.fileIds) {
  120. try {
  121. // eslint-disable-next-line no-await-in-loop
  122. const deleteFileResponse = await this.client.deleteFile(fileId);
  123. logger.debug('Delete vector store file', deleteFileResponse);
  124. deletedFileIds.push(fileId);
  125. }
  126. catch (err) {
  127. logger.error(err);
  128. }
  129. }
  130. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  131. if (undeletedFileIds.length === 0) {
  132. await vectorStoreFileRelation.remove();
  133. return;
  134. }
  135. vectorStoreFileRelation.fileIds = undeletedFileIds;
  136. await vectorStoreFileRelation.save();
  137. }
  138. async rebuildVectorStoreAll() {
  139. // TODO: https://redmine.weseek.co.jp/issues/154364
  140. // Create all public pages VectorStoreFile
  141. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  142. const pagesStream = Page.find({ grant: PageGrant.GRANT_PUBLIC }).populate('revision').cursor({ batch_size: BATCH_SIZE });
  143. const batchStrem = createBatchStream(BATCH_SIZE);
  144. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  145. const createVectorStoreFileStream = new Transform({
  146. objectMode: true,
  147. async transform(chunk: PageDocument[], encoding, callback) {
  148. await createVectorStoreFile(chunk);
  149. this.push(chunk);
  150. callback();
  151. },
  152. });
  153. pagesStream
  154. .pipe(batchStrem)
  155. .pipe(createVectorStoreFileStream);
  156. }
  157. async rebuildVectorStore(page: HydratedDocument<PageDocument>) {
  158. await this.deleteVectorStoreFile(page._id);
  159. await this.createVectorStoreFile([page]);
  160. }
  161. }
  162. let instance: OpenaiService;
  163. export const getOpenaiService = (): IOpenaiService | undefined => {
  164. if (instance != null) {
  165. return instance;
  166. }
  167. const aiEnabled = configManager.getConfig('crowi', 'app:aiEnabled');
  168. const openaiServiceType = configManager.getConfig('crowi', 'openai:serviceType');
  169. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  170. instance = new OpenaiService();
  171. return instance;
  172. }
  173. return;
  174. };