openai.ts 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. import { Readable, Transform } from 'stream';
  2. import { PageGrant, isPopulated } from '@growi/core';
  3. import type { HydratedDocument, Types } from 'mongoose';
  4. import mongoose from 'mongoose';
  5. import type OpenAI from 'openai';
  6. import { toFile } from 'openai';
  7. import VectorStoreFileRelationModel, {
  8. type VectorStoreFileRelation,
  9. prepareVectorStoreFileRelations,
  10. } from '~/features/openai/server/models/vector-store-file-relation';
  11. import { OpenaiServiceTypes } from '~/interfaces/ai';
  12. import type { PageDocument, PageModel } from '~/server/models/page';
  13. import { configManager } from '~/server/service/config-manager';
  14. import { createBatchStream } from '~/server/util/batch-stream';
  15. import loggerFactory from '~/utils/logger';
  16. import { getClient } from './client-delegator';
  17. const BATCH_SIZE = 100;
  18. const logger = loggerFactory('growi:service:openai');
  19. export interface IOpenaiService {
  20. createVectorStoreFile(pages: PageDocument[]): Promise<void>;
  21. rebuildVectorStoreAll(): Promise<void>;
  22. rebuildVectorStore(page: PageDocument): Promise<void>;
  23. }
  24. class OpenaiService implements IOpenaiService {
  25. private get client() {
  26. const openaiServiceType = configManager.getConfig('crowi', 'app:openaiServiceType');
  27. return getClient({ openaiServiceType });
  28. }
  29. private async uploadFile(pageId: Types.ObjectId, body: string): Promise<OpenAI.Files.FileObject> {
  30. const file = await toFile(Readable.from(body), `${pageId}.md`);
  31. const uploadedFile = await this.client.uploadFile(file);
  32. return uploadedFile;
  33. }
  34. async createVectorStoreFile(pages: Array<PageDocument>): Promise<void> {
  35. const preparedVectorStoreFileRelations: VectorStoreFileRelation[] = [];
  36. const processUploadFile = async(page: PageDocument) => {
  37. if (page._id != null && page.grant === PageGrant.GRANT_PUBLIC && page.revision != null) {
  38. if (isPopulated(page.revision) && page.revision.body.length > 0) {
  39. const uploadedFile = await this.uploadFile(page._id, page.revision.body);
  40. prepareVectorStoreFileRelations(page._id, uploadedFile.id, preparedVectorStoreFileRelations);
  41. return;
  42. }
  43. const pagePopulatedToShowRevision = await page.populateDataToShowRevision();
  44. if (pagePopulatedToShowRevision.revision != null && pagePopulatedToShowRevision.revision.body.length > 0) {
  45. const uploadedFile = await this.uploadFile(page._id, pagePopulatedToShowRevision.revision.body);
  46. prepareVectorStoreFileRelations(page._id, uploadedFile.id, preparedVectorStoreFileRelations);
  47. }
  48. }
  49. };
  50. // Start workers to process results
  51. const workers = pages.map(processUploadFile);
  52. // Wait for all processing to complete.
  53. const fileUploadResult = await Promise.allSettled(workers);
  54. fileUploadResult.forEach((result) => {
  55. if (result.status === 'rejected') {
  56. logger.error(result.reason);
  57. }
  58. });
  59. try {
  60. // Create vector store file
  61. const uploadedFileIds = preparedVectorStoreFileRelations.map(data => data.fileIds).flat();
  62. const createVectorStoreFileBatchResponse = await this.client.createVectorStoreFileBatch(uploadedFileIds);
  63. logger.debug('Create vector store file', createVectorStoreFileBatchResponse);
  64. // Save vector store file relation
  65. await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(preparedVectorStoreFileRelations);
  66. }
  67. catch (err) {
  68. logger.error(err);
  69. }
  70. }
  71. private async deleteVectorStoreFile(page: PageDocument): Promise<void> {
  72. // Delete vector store file and delete vector store file relation
  73. const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({ pageId: page._id });
  74. if (vectorStoreFileRelation != null) {
  75. const deletedFileIds: string[] = [];
  76. for (const fileId of vectorStoreFileRelation.fileIds) {
  77. try {
  78. // eslint-disable-next-line no-await-in-loop
  79. const deleteFileResponse = await this.client.deleteFile(fileId);
  80. logger.debug('Delete vector store file', deleteFileResponse);
  81. deletedFileIds.push(fileId);
  82. }
  83. catch (err) {
  84. logger.error(err);
  85. }
  86. }
  87. const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(fileId => !deletedFileIds.includes(fileId));
  88. if (undeletedFileIds.length === 0) {
  89. await vectorStoreFileRelation.remove();
  90. return;
  91. }
  92. vectorStoreFileRelation.fileIds = undeletedFileIds;
  93. await vectorStoreFileRelation.save();
  94. }
  95. }
  96. async rebuildVectorStoreAll() {
  97. // TODO: https://redmine.weseek.co.jp/issues/154364
  98. // Create all public pages VectorStoreFile
  99. const Page = mongoose.model<HydratedDocument<PageDocument>, PageModel>('Page');
  100. const pagesStream = Page.find({ grant: PageGrant.GRANT_PUBLIC }).populate('revision').cursor({ batch_size: BATCH_SIZE });
  101. const batchStrem = createBatchStream(BATCH_SIZE);
  102. const createVectorStoreFile = this.createVectorStoreFile.bind(this);
  103. const createVectorStoreFileStream = new Transform({
  104. objectMode: true,
  105. async transform(chunk: PageDocument[], encoding, callback) {
  106. await createVectorStoreFile(chunk);
  107. this.push(chunk);
  108. callback();
  109. },
  110. });
  111. pagesStream
  112. .pipe(batchStrem)
  113. .pipe(createVectorStoreFileStream);
  114. }
  115. async rebuildVectorStore(page: PageDocument) {
  116. await this.deleteVectorStoreFile(page);
  117. await this.createVectorStoreFile([page]);
  118. }
  119. }
  120. let instance: OpenaiService;
  121. export const getOpenaiService = (): IOpenaiService | undefined => {
  122. if (instance != null) {
  123. return instance;
  124. }
  125. const aiEnabled = configManager.getConfig('crowi', 'app:aiEnabled');
  126. const openaiServiceType = configManager.getConfig('crowi', 'app:openaiServiceType');
  127. if (aiEnabled && openaiServiceType != null && OpenaiServiceTypes.includes(openaiServiceType)) {
  128. instance = new OpenaiService();
  129. return instance;
  130. }
  131. return;
  132. };