///
import assert from 'node:assert';
import fs from 'node:fs';
import { Readable, Transform, Writable } from 'node:stream';
import { pipeline } from 'node:stream/promises';
import type { IPage, IUser, Lang, Nullable, Ref } from '@growi/core';
import {
getIdForRef,
getIdStringForRef,
type IUserHasId,
isPopulated,
PageGrant,
} from '@growi/core';
import { deepEquals } from '@growi/core/dist/utils';
import { isGlobPatternPath } from '@growi/core/dist/utils/page-path-utils';
import escapeStringRegexp from 'escape-string-regexp';
import createError from 'http-errors';
import mongoose, { type HydratedDocument, type Types } from 'mongoose';
import type { OpenAI } from 'openai';
import { toFile } from 'openai';
import type { ChatCompletionChunk } from 'openai/resources/chat/completions';
import ExternalUserGroupRelation from '~/features/external-user-group/server/models/external-user-group-relation';
import ThreadRelationModel, {
type ThreadRelationDocument,
} from '~/features/openai/server/models/thread-relation';
import VectorStoreModel, {
type VectorStoreDocument,
} from '~/features/openai/server/models/vector-store';
import VectorStoreFileRelationModel, {
prepareVectorStoreFileRelations,
type VectorStoreFileRelation,
} from '~/features/openai/server/models/vector-store-file-relation';
import type Crowi from '~/server/crowi';
import type {
IAttachmentDocument,
IAttachmentModel,
} from '~/server/models/attachment';
import type { PageDocument, PageModel } from '~/server/models/page';
import UserGroupRelation from '~/server/models/user-group-relation';
import { configManager } from '~/server/service/config-manager';
import { createBatchStream } from '~/server/util/batch-stream';
import loggerFactory from '~/utils/logger';
import { OpenaiServiceTypes } from '../../interfaces/ai';
import type { UpsertAiAssistantData } from '../../interfaces/ai-assistant';
import {
type AccessibleAiAssistants,
type AiAssistant,
AiAssistantAccessScope,
AiAssistantShareScope,
} from '../../interfaces/ai-assistant';
import type { MessageListParams } from '../../interfaces/message';
import { ThreadType } from '../../interfaces/thread-relation';
import type { IVectorStore } from '../../interfaces/vector-store';
import { removeGlobPath } from '../../utils/remove-glob-path';
import AiAssistantModel, {
type AiAssistantDocument,
} from '../models/ai-assistant';
import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
import { generateGlobPatterns } from '../utils/generate-glob-patterns';
import { isVectorStoreCompatible } from '../utils/is-vector-store-compatible';
import { getClient, isStreamResponse } from './client-delegator';
import { openaiApiErrorHandler } from './openai-api-error-handler';
import { replaceAnnotationWithPageLink } from './replace-annotation-with-page-link';
const { isDeepEquals } = deepEquals;
const BATCH_SIZE = 100;
const logger = loggerFactory('growi:service:openai');
type VectorStoreFileRelationsMap = Map;
const convertPathPatternsToRegExp = (
pagePathPatterns: string[],
): Array => {
return pagePathPatterns.map((pagePathPattern) => {
if (isGlobPatternPath(pagePathPattern)) {
const trimedPagePathPattern = pagePathPattern.replace('/*', '');
const escapedPagePathPattern = escapeStringRegexp(trimedPagePathPattern);
// https://regex101.com/r/x5KIZL/1
return new RegExp(`^${escapedPagePathPattern}($|/)`);
}
return pagePathPattern;
});
};
export interface IOpenaiService {
generateAndProcessPreMessage(
message: string,
chunkProcessor: (chunk: ChatCompletionChunk) => void,
): Promise;
createThread(
userId: string,
type: ThreadType,
aiAssistantId?: string,
initialUserMessage?: string,
): Promise;
getThreadsByAiAssistantId(
aiAssistantId: string,
userId?: string,
): Promise;
deleteThread(threadRelationId: string): Promise;
deleteExpiredThreads(limit: number, apiCallInterval: number): Promise; // for CronJob
deleteObsoletedVectorStoreRelations(): Promise; // for CronJob
deleteVectorStore(vectorStoreRelationId: string): Promise;
getMessageData(
threadId: string,
lang?: Lang,
options?: MessageListParams,
): Promise;
createVectorStoreFileOnPageCreate(pages: PageDocument[]): Promise;
updateVectorStoreFileOnPageUpdate(
page: HydratedDocument,
): Promise;
deleteVectorStoreFilesByPageIds(pageIds: Types.ObjectId[]): Promise;
deleteObsoleteVectorStoreFile(
limit: number,
apiCallInterval: number,
): Promise; // for CronJob
isAiAssistantUsable(
aiAssistantId: string,
user: IUserHasId,
): Promise;
createAiAssistant(
data: UpsertAiAssistantData,
user: IUserHasId,
): Promise;
updateAiAssistant(
aiAssistantId: string,
data: UpsertAiAssistantData,
user: IUserHasId,
): Promise;
getAccessibleAiAssistants(user: IUserHasId): Promise;
isLearnablePageLimitExceeded(
user: IUserHasId,
pagePathPatterns: string[],
): Promise;
}
class OpenaiService implements IOpenaiService {
private crowi: Crowi;
constructor(crowi: Crowi) {
this.crowi = crowi;
this.createVectorStoreFileOnUploadAttachment =
this.createVectorStoreFileOnUploadAttachment.bind(this);
crowi.attachmentService.addAttachHandler(
this.createVectorStoreFileOnUploadAttachment,
);
this.deleteVectorStoreFileOnDeleteAttachment =
this.deleteVectorStoreFileOnDeleteAttachment.bind(this);
crowi.attachmentService.addDetachHandler(
this.deleteVectorStoreFileOnDeleteAttachment,
);
}
private get client() {
const openaiServiceType = configManager.getConfig('openai:serviceType');
return getClient({ openaiServiceType });
}
async generateAndProcessPreMessage(
message: string,
chunkProcessor: (delta: ChatCompletionChunk) => void,
): Promise {
const systemMessage = [
"Generate a message briefly confirming the user's question.",
'Please generate up to 20 characters',
].join('');
const preMessageCompletion = await this.client.chatCompletion({
stream: true,
model: 'gpt-4.1-nano',
messages: [
{
role: 'system',
content: systemMessage,
},
{
role: 'user',
content: message,
},
],
});
if (!isStreamResponse(preMessageCompletion)) {
return;
}
for await (const chunk of preMessageCompletion) {
chunkProcessor(chunk);
}
}
private async generateThreadTitle(
message: string,
): Promise> {
const systemMessage = [
'Create a brief title (max 5 words) from your message.',
'Respond in the same language the user uses in their input.',
'Response should only contain the title.',
].join('');
const threadTitleCompletion = await this.client.chatCompletion({
model: 'gpt-4.1-nano',
messages: [
{
role: 'system',
content: systemMessage,
},
{
role: 'user',
content: message,
},
],
});
if (!isStreamResponse(threadTitleCompletion)) {
const threadTitle = threadTitleCompletion.choices[0].message.content;
return threadTitle;
}
}
async createThread(
userId: string,
type: ThreadType,
aiAssistantId?: string,
initialUserMessage?: string,
): Promise {
try {
const aiAssistant =
aiAssistantId != null
? await AiAssistantModel.findOne({
_id: { $eq: aiAssistantId },
}).populate<{ vectorStore: IVectorStore }>('vectorStore')
: null;
const thread = await this.client.createThread(
aiAssistant?.vectorStore?.vectorStoreId,
);
const threadRelation = await ThreadRelationModel.create({
userId,
type,
aiAssistant: aiAssistantId,
threadId: thread.id,
title: null, // Initialize title as null
});
if (initialUserMessage != null) {
// Do not await, run in background
this.generateThreadTitle(initialUserMessage)
.then(async (generatedTitle) => {
if (generatedTitle != null) {
threadRelation.title = generatedTitle;
await threadRelation.save();
}
})
.catch((err) => {
logger.error(
`Failed to generate thread title for threadId ${thread.id}:`,
err,
);
});
}
return threadRelation;
} catch (err) {
throw err;
}
}
private async updateThreads(
aiAssistantId: string,
vectorStoreId: string,
): Promise {
const threadRelations = await this.getThreadsByAiAssistantId(aiAssistantId);
for await (const threadRelation of threadRelations) {
try {
const updatedThreadResponse = await this.client.updateThread(
threadRelation.threadId,
vectorStoreId,
);
logger.debug('Update thread', updatedThreadResponse);
} catch (err) {
logger.error(err);
}
}
}
async getThreadsByAiAssistantId(
aiAssistantId: string,
userId?: string,
type: ThreadType = ThreadType.KNOWLEDGE,
): Promise {
const query: { aiAssistant: string; type: ThreadType; userId?: string } = {
aiAssistant: aiAssistantId,
type,
};
if (userId != null) {
query.userId = userId;
}
const threadRelations = await ThreadRelationModel.find(query).sort({
updatedAt: -1,
});
return threadRelations;
}
async deleteThread(
threadRelationId: string,
): Promise {
const threadRelation = await ThreadRelationModel.findById(threadRelationId);
if (threadRelation == null) {
throw createError(404, 'ThreadRelation document does not exist');
}
try {
const deletedThreadResponse = await this.client.deleteThread(
threadRelation.threadId,
);
logger.debug('Delete thread', deletedThreadResponse);
await threadRelation.remove();
} catch (err) {
await openaiApiErrorHandler(err, {
notFoundError: async () => {
await threadRelation.remove();
},
});
throw err;
}
return threadRelation;
}
public async deleteExpiredThreads(
limit: number,
apiCallInterval: number,
): Promise {
const expiredThreadRelations =
await ThreadRelationModel.getExpiredThreadRelations(limit);
if (expiredThreadRelations == null) {
return;
}
const deletedThreadIds: string[] = [];
for await (const expiredThreadRelation of expiredThreadRelations) {
try {
const deleteThreadResponse = await this.client.deleteThread(
expiredThreadRelation.threadId,
);
logger.debug('Delete thread', deleteThreadResponse);
deletedThreadIds.push(expiredThreadRelation.threadId);
// sleep
await new Promise((resolve) => setTimeout(resolve, apiCallInterval));
} catch (err) {
logger.error(err);
}
}
await ThreadRelationModel.deleteMany({
threadId: { $in: deletedThreadIds },
});
}
async getMessageData(
threadId: string,
lang?: Lang,
options?: MessageListParams,
): Promise {
const messages = await this.client.getMessages(threadId, options);
for await (const message of messages.data) {
for await (const content of message.content) {
if (content.type === 'text') {
await replaceAnnotationWithPageLink(content, lang);
}
}
}
return messages;
}
private async getVectorStoreRelationsByPageIds(
pageIds: Types.ObjectId[],
): Promise {
const pipeline = [
// Stage 1: Match documents with the given pageId
{
$match: {
page: {
$in: pageIds,
},
},
},
// Stage 2: Lookup VectorStore documents
{
$lookup: {
from: 'vectorstores',
localField: 'vectorStoreRelationId',
foreignField: '_id',
as: 'vectorStore',
},
},
// Stage 3: Unwind the vectorStore array
{
$unwind: '$vectorStore',
},
// Stage 4: Match non-deleted vector stores
{
$match: {
'vectorStore.isDeleted': false,
},
},
// Stage 5: Replace the root with vectorStore document
{
$replaceRoot: {
newRoot: '$vectorStore',
},
},
// Stage 6: Group by _id to remove duplicates
{
$group: {
_id: '$_id',
doc: { $first: '$$ROOT' },
},
},
// Stage 7: Restore the document structure
{
$replaceRoot: {
newRoot: '$doc',
},
},
];
const vectorStoreRelations =
await VectorStoreFileRelationModel.aggregate(
pipeline,
);
return vectorStoreRelations;
}
private async createVectorStore(name: string): Promise {
try {
const newVectorStore = await this.client.createVectorStore(name);
const newVectorStoreDocument = (await VectorStoreModel.create({
vectorStoreId: newVectorStore.id,
})) as VectorStoreDocument;
return newVectorStoreDocument;
} catch (err) {
throw new Error(err);
}
}
private async uploadFile(
revisionBody: string,
page: HydratedDocument,
): Promise {
const siteUrl = configManager.getConfig('app:siteUrl');
const convertedHtml = await convertMarkdownToHtml(revisionBody, {
page,
siteUrl,
});
const file = await toFile(Readable.from(convertedHtml), `${page._id}.html`);
const uploadedFile = await this.client.uploadFile(file);
return uploadedFile;
}
private async uploadFileForAttachment(
fileName: string,
readStream?: NodeJS.ReadableStream,
filePath?: string,
): Promise {
const streamSource: NodeJS.ReadableStream = (() => {
if (readStream != null) {
return readStream;
}
if (filePath != null) {
return fs.createReadStream(filePath);
}
throw new Error('readStream and filePath are both null');
})();
const uploadableFile = await toFile(streamSource, fileName);
const uploadedFile = await this.client.uploadFile(uploadableFile);
return uploadedFile;
}
async deleteVectorStore(vectorStoreRelationId: string): Promise {
const vectorStoreDocument: VectorStoreDocument | null =
await VectorStoreModel.findOne({
_id: vectorStoreRelationId,
isDeleted: false,
});
if (vectorStoreDocument == null) {
return;
}
try {
const deleteVectorStoreResponse = await this.client.deleteVectorStore(
vectorStoreDocument.vectorStoreId,
);
logger.debug('Delete vector store', deleteVectorStoreResponse);
await vectorStoreDocument.markAsDeleted();
} catch (err) {
await openaiApiErrorHandler(err, {
notFoundError: vectorStoreDocument.markAsDeleted,
});
throw new Error(err);
}
}
private async createVectorStoreFileWithStreamForAttachment(
pageId: Types.ObjectId,
vectorStoreRelationId: Types.ObjectId,
vectorStoreFileRelationsMap: VectorStoreFileRelationsMap,
): Promise {
const Attachment = mongoose.model<
HydratedDocument,
IAttachmentModel
>('Attachment');
const attachmentsCursor = Attachment.find({ page: pageId }).cursor();
const batchStream = createBatchStream(BATCH_SIZE);
const uploadFileStreamForAttachment = new Writable({
objectMode: true,
write: async (
attachments: HydratedDocument[],
_encoding,
callback,
) => {
for await (const attachment of attachments) {
try {
if (
!isVectorStoreCompatible(
attachment.originalName,
attachment.fileFormat,
)
) {
continue;
}
const readStream =
await this.crowi.fileUploadService.findDeliveryFile(attachment);
const uploadedFileForAttachment =
await this.uploadFileForAttachment(
attachment.originalName,
readStream,
);
prepareVectorStoreFileRelations(
vectorStoreRelationId,
pageId,
uploadedFileForAttachment.id,
vectorStoreFileRelationsMap,
attachment._id,
);
} catch (err) {
logger.error(err);
}
}
callback();
},
final: (callback) => {
logger.debug('Finished uploading attachments');
callback();
},
});
await pipeline(
attachmentsCursor,
batchStream,
uploadFileStreamForAttachment,
);
}
private async createVectorStoreFile(
vectorStoreRelation: VectorStoreDocument,
pages: Array>,
ignoreAttachments = false,
): Promise {
const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap = new Map();
const processUploadFile = async (page: HydratedDocument) => {
if (page._id != null && page.revision != null) {
if (isPopulated(page.revision) && page.revision.body.length > 0) {
const uploadedFile = await this.uploadFile(page.revision.body, page);
prepareVectorStoreFileRelations(
vectorStoreRelation._id,
page._id,
uploadedFile.id,
vectorStoreFileRelationsMap,
);
if (!ignoreAttachments) {
await this.createVectorStoreFileWithStreamForAttachment(
page._id,
vectorStoreRelation._id,
vectorStoreFileRelationsMap,
);
}
return;
}
const pagePopulatedToShowRevision =
await page.populateDataToShowRevision();
if (
pagePopulatedToShowRevision.revision != null &&
pagePopulatedToShowRevision.revision.body.length > 0
) {
const uploadedFile = await this.uploadFile(
pagePopulatedToShowRevision.revision.body,
page,
);
prepareVectorStoreFileRelations(
vectorStoreRelation._id,
page._id,
uploadedFile.id,
vectorStoreFileRelationsMap,
);
if (!ignoreAttachments) {
await this.createVectorStoreFileWithStreamForAttachment(
page._id,
vectorStoreRelation._id,
vectorStoreFileRelationsMap,
);
}
}
}
};
// Start workers to process results
const workers = pages.map(processUploadFile);
// Wait for all processing to complete.
assert(
workers.length <= BATCH_SIZE,
'workers.length must be less than or equal to BATCH_SIZE',
);
const fileUploadResult = await Promise.allSettled(workers);
fileUploadResult.forEach((result) => {
if (result.status === 'rejected') {
logger.error(result.reason);
}
});
const vectorStoreFileRelations = Array.from(
vectorStoreFileRelationsMap.values(),
);
const uploadedFileIds = vectorStoreFileRelations.flatMap(
(data) => data.fileIds,
);
if (uploadedFileIds.length === 0) {
return;
}
const pageIds = pages.map((page) => page._id);
try {
// Save vector store file relation
await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(
vectorStoreFileRelations,
);
// Create vector store file
const createVectorStoreFileBatchResponse =
await this.client.createVectorStoreFileBatch(
vectorStoreRelation.vectorStoreId,
uploadedFileIds,
);
logger.debug(
'Create vector store file',
createVectorStoreFileBatchResponse,
);
// Set isAttachedToVectorStore: true when the uploaded file is attached to VectorStore
await VectorStoreFileRelationModel.markAsAttachedToVectorStore(pageIds);
} catch (err) {
logger.error(err);
// Delete all uploaded files if createVectorStoreFileBatch fails
for await (const pageId of pageIds) {
await this.deleteVectorStoreFile(vectorStoreRelation._id, pageId);
}
}
}
// Deletes all VectorStore documents that are marked as deleted (isDeleted: true) and have no associated VectorStoreFileRelation documents
async deleteObsoletedVectorStoreRelations(): Promise {
const deletedVectorStoreRelations = await VectorStoreModel.find({
isDeleted: true,
});
if (deletedVectorStoreRelations.length === 0) {
return;
}
const currentVectorStoreRelationIds: Types.ObjectId[] =
await VectorStoreFileRelationModel.aggregate([
{
$group: {
_id: '$vectorStoreRelationId',
relationCount: { $sum: 1 },
},
},
{ $match: { relationCount: { $gt: 0 } } },
{ $project: { _id: 1 } },
]);
if (currentVectorStoreRelationIds.length === 0) {
return;
}
await VectorStoreModel.deleteMany({
_id: { $nin: currentVectorStoreRelationIds },
isDeleted: true,
});
}
private async deleteVectorStoreFileForAttachment(
vectorStoreFileRelation: VectorStoreFileRelation,
): Promise {
if (vectorStoreFileRelation.attachment == null) {
return;
}
const deleteAllAttachmentVectorStoreFileRelations = async () => {
await VectorStoreFileRelationModel.deleteMany({
attachment: vectorStoreFileRelation.attachment,
});
};
try {
// Delete entities in VectorStoreFile
const fileId = vectorStoreFileRelation.fileIds[0];
const deleteFileResponse = await this.client.deleteFile(fileId);
logger.debug(
'Delete vector store file (attachment) ',
deleteFileResponse,
);
// Delete related VectorStoreFileRelation document
const attachmentId = vectorStoreFileRelation.attachment;
if (attachmentId != null) {
await deleteAllAttachmentVectorStoreFileRelations();
}
} catch (err) {
logger.error(err);
await openaiApiErrorHandler(err, {
notFoundError: () => deleteAllAttachmentVectorStoreFileRelations(),
});
}
}
private async deleteVectorStoreFile(
vectorStoreRelationId: Types.ObjectId,
pageId: Types.ObjectId,
ignoreAttachments = false,
apiCallInterval?: number,
): Promise {
if (!ignoreAttachments) {
// Get all VectorStoreFIleDocument (attachments) associated with the page
const vectorStoreFileRelationsForAttachment =
await VectorStoreFileRelationModel.find({
vectorStoreRelationId,
page: pageId,
attachment: { $exists: true },
});
if (vectorStoreFileRelationsForAttachment.length !== 0) {
for await (const vectorStoreFileRelation of vectorStoreFileRelationsForAttachment) {
try {
await this.deleteVectorStoreFileForAttachment(
vectorStoreFileRelation,
);
} catch (err) {
logger.error(err);
}
}
}
}
// Delete vector store file and delete vector store file relation
const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({
vectorStoreRelationId,
page: pageId,
});
if (vectorStoreFileRelation == null) {
return;
}
const deletedFileIds: string[] = [];
for await (const fileId of vectorStoreFileRelation.fileIds) {
try {
const deleteFileResponse = await this.client.deleteFile(fileId);
logger.debug('Delete vector store file', deleteFileResponse);
deletedFileIds.push(fileId);
if (apiCallInterval != null) {
// sleep
await new Promise((resolve) => setTimeout(resolve, apiCallInterval));
}
} catch (err) {
await openaiApiErrorHandler(err, {
notFoundError: async () => {
deletedFileIds.push(fileId);
},
});
logger.error(err);
}
}
const undeletedFileIds = vectorStoreFileRelation.fileIds.filter(
(fileId) => !deletedFileIds.includes(fileId),
);
if (undeletedFileIds.length === 0) {
await vectorStoreFileRelation.remove();
return;
}
vectorStoreFileRelation.fileIds = undeletedFileIds;
await vectorStoreFileRelation.save();
}
async deleteVectorStoreFilesByPageIds(
pageIds: Types.ObjectId[],
): Promise {
const vectorStoreRelations =
await this.getVectorStoreRelationsByPageIds(pageIds);
if (vectorStoreRelations != null && vectorStoreRelations.length !== 0) {
for await (const pageId of pageIds) {
const deleteVectorStoreFilePromises = vectorStoreRelations.map(
(vectorStoreRelation) =>
this.deleteVectorStoreFile(vectorStoreRelation._id, pageId),
);
await Promise.allSettled(deleteVectorStoreFilePromises);
}
}
}
async deleteObsoleteVectorStoreFile(
limit: number,
apiCallInterval: number,
): Promise {
// Retrieves all VectorStore documents that are marked as deleted
const deletedVectorStoreRelations = await VectorStoreModel.find({
isDeleted: true,
});
if (deletedVectorStoreRelations.length === 0) {
return;
}
// Retrieves VectorStoreFileRelation documents associated with deleted VectorStore documents
const obsoleteVectorStoreFileRelations =
await VectorStoreFileRelationModel.find({
vectorStoreRelationId: {
$in: deletedVectorStoreRelations.map(
(deletedVectorStoreRelation) => deletedVectorStoreRelation._id,
),
},
}).limit(limit);
if (obsoleteVectorStoreFileRelations.length === 0) {
return;
}
// Delete obsolete VectorStoreFile
for await (const vectorStoreFileRelation of obsoleteVectorStoreFileRelations) {
try {
await this.deleteVectorStoreFile(
vectorStoreFileRelation.vectorStoreRelationId,
vectorStoreFileRelation.page,
false,
apiCallInterval,
);
} catch (err) {
logger.error(err);
}
}
}
private async deleteVectorStoreFileOnDeleteAttachment(attachmentId: string) {
const vectorStoreFileRelation = await VectorStoreFileRelationModel.findOne({
attachment: attachmentId,
});
if (vectorStoreFileRelation == null) {
return;
}
try {
await this.deleteVectorStoreFileForAttachment(vectorStoreFileRelation);
} catch (err) {
logger.error(err);
}
}
private async filterPagesByAccessScope(
aiAssistant: AiAssistantDocument,
pages: HydratedDocument[],
) {
const isPublicPage = (page: HydratedDocument) =>
page.grant === PageGrant.GRANT_PUBLIC;
const isUserGroupAccessible = (
page: HydratedDocument,
ownerUserGroupIds: string[],
) => {
if (page.grant !== PageGrant.GRANT_USER_GROUP) return false;
return page.grantedGroups.some((group) =>
ownerUserGroupIds.includes(getIdStringForRef(group.item)),
);
};
const isOwnerAccessible = (
page: HydratedDocument,
ownerId: Ref,
) => {
if (page.grant !== PageGrant.GRANT_OWNER) return false;
return page.grantedUsers.some(
(user) => getIdStringForRef(user) === getIdStringForRef(ownerId),
);
};
const getOwnerUserGroupIds = async (owner: Ref) => {
const userGroups =
await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
const externalGroups =
await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(owner);
return [...userGroups, ...externalGroups].map((group) =>
getIdStringForRef(group),
);
};
switch (aiAssistant.accessScope) {
case AiAssistantAccessScope.PUBLIC_ONLY:
return pages.filter(isPublicPage);
case AiAssistantAccessScope.GROUPS: {
const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
return pages.filter(
(page) =>
isPublicPage(page) ||
isUserGroupAccessible(page, ownerUserGroupIds),
);
}
case AiAssistantAccessScope.OWNER: {
const ownerUserGroupIds = await getOwnerUserGroupIds(aiAssistant.owner);
return pages.filter(
(page) =>
isPublicPage(page) ||
isOwnerAccessible(page, aiAssistant.owner) ||
isUserGroupAccessible(page, ownerUserGroupIds),
);
}
default:
return [];
}
}
async createVectorStoreFileOnPageCreate(
pages: HydratedDocument[],
): Promise {
const pagePaths = pages.map((page) => page.path);
const aiAssistants = await this.findAiAssistantByPagePath(pagePaths, {
shouldPopulateOwner: true,
shouldPopulateVectorStore: true,
});
if (aiAssistants.length === 0) {
return;
}
for await (const aiAssistant of aiAssistants) {
if (!isPopulated(aiAssistant.owner)) {
continue;
}
const isLearnablePageLimitExceeded =
await this.isLearnablePageLimitExceeded(
aiAssistant.owner,
aiAssistant.pagePathPatterns,
);
if (isLearnablePageLimitExceeded) {
continue;
}
const pagesToVectorize = await this.filterPagesByAccessScope(
aiAssistant,
pages,
);
const vectorStoreRelation = aiAssistant.vectorStore;
if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
continue;
}
logger.debug('--------- createVectorStoreFileOnPageCreate ---------');
logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
logger.debug(
'VectorStoreFile pagePath to be created: ',
pagesToVectorize.map((page) => page.path),
);
logger.debug('-----------------------------------------------------');
await this.createVectorStoreFile(
vectorStoreRelation as VectorStoreDocument,
pagesToVectorize,
);
}
}
async updateVectorStoreFileOnPageUpdate(
page: HydratedDocument,
) {
const aiAssistants = await this.findAiAssistantByPagePath([page.path], {
shouldPopulateVectorStore: true,
});
if (aiAssistants.length === 0) {
return;
}
for await (const aiAssistant of aiAssistants) {
const pagesToVectorize = await this.filterPagesByAccessScope(
aiAssistant,
[page],
);
const vectorStoreRelation = aiAssistant.vectorStore;
if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
continue;
}
logger.debug('---------- updateVectorStoreOnPageUpdate ------------');
logger.debug('AccessScopeType of aiAssistant: ', aiAssistant.accessScope);
logger.debug('PagePath of VectorStoreFile to be deleted: ', page.path);
logger.debug(
'pagePath of VectorStoreFile to be created: ',
pagesToVectorize.map((page) => page.path),
);
logger.debug('-----------------------------------------------------');
// Do not create a new VectorStoreFile if page is changed to a permission that AiAssistant does not have access to
await this.deleteVectorStoreFile(
(vectorStoreRelation as VectorStoreDocument)._id,
page._id,
true, // ignoreAttachments = true
);
await this.createVectorStoreFile(
vectorStoreRelation as VectorStoreDocument,
pagesToVectorize,
true, // ignoreAttachments = true
);
}
}
private async createVectorStoreFileOnUploadAttachment(
pageId: string,
attachment: HydratedDocument,
file: Express.Multer.File,
): Promise {
if (!isVectorStoreCompatible(file.originalname, file.mimetype)) {
return;
}
const Page = mongoose.model, PageModel>(
'Page',
);
const page = await Page.findById(pageId);
if (page == null) {
return;
}
const aiAssistants = await this.findAiAssistantByPagePath([page.path], {
shouldPopulateVectorStore: true,
});
if (aiAssistants.length === 0) {
return;
}
const uploadedFile = await this.uploadFileForAttachment(
file.originalname,
undefined,
file.path,
);
logger.debug('Uploaded file', uploadedFile);
for await (const aiAssistant of aiAssistants) {
const pagesToVectorize = await this.filterPagesByAccessScope(
aiAssistant,
[page],
);
if (pagesToVectorize.length === 0) {
continue;
}
const vectorStoreRelation = aiAssistant.vectorStore;
if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
continue;
}
await this.client.createVectorStoreFile(
vectorStoreRelation.vectorStoreId,
uploadedFile.id,
);
const vectorStoreFileRelationsMap: VectorStoreFileRelationsMap =
new Map();
prepareVectorStoreFileRelations(
vectorStoreRelation._id as Types.ObjectId,
page._id,
uploadedFile.id,
vectorStoreFileRelationsMap,
attachment._id,
);
const vectorStoreFileRelations = Array.from(
vectorStoreFileRelationsMap.values(),
);
await VectorStoreFileRelationModel.upsertVectorStoreFileRelations(
vectorStoreFileRelations,
);
}
}
private async createVectorStoreFileWithStream(
vectorStoreRelation: VectorStoreDocument,
conditions: mongoose.FilterQuery,
): Promise {
const Page = mongoose.model, PageModel>(
'Page',
);
const pagesStream = Page.find({ ...conditions })
.populate('revision')
.cursor({ batchSize: BATCH_SIZE });
const batchStream = createBatchStream(BATCH_SIZE);
const createVectorStoreFile = this.createVectorStoreFile.bind(this);
const createVectorStoreFileStream = new Transform({
objectMode: true,
async transform(
chunk: HydratedDocument[],
encoding,
callback,
) {
try {
logger.debug(
'Target page path for VectorStoreFile generation: ',
chunk.map((page) => page.path),
);
await createVectorStoreFile(vectorStoreRelation, chunk);
this.push(chunk);
callback();
} catch (error) {
callback(error);
}
},
});
await pipeline(pagesStream, batchStream, createVectorStoreFileStream);
}
private async createConditionForCreateVectorStoreFile(
owner: AiAssistant['owner'],
accessScope: AiAssistant['accessScope'],
grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
pagePathPatterns: AiAssistant['pagePathPatterns'],
): Promise> {
const convertedPagePathPatterns =
convertPathPatternsToRegExp(pagePathPatterns);
// Include pages in search targets when their paths with 'Anyone with the link' permission are directly specified instead of using glob pattern
const nonGrabPagePathPatterns = pagePathPatterns.filter(
(pagePathPattern) => !isGlobPatternPath(pagePathPattern),
);
const baseCondition: mongoose.FilterQuery = {
grant: PageGrant.GRANT_RESTRICTED,
path: { $in: nonGrabPagePathPatterns },
};
if (accessScope === AiAssistantAccessScope.PUBLIC_ONLY) {
return {
$or: [
baseCondition,
{
grant: PageGrant.GRANT_PUBLIC,
path: { $in: convertedPagePathPatterns },
},
],
};
}
if (accessScope === AiAssistantAccessScope.GROUPS) {
if (
grantedGroupsForAccessScope == null ||
grantedGroupsForAccessScope.length === 0
) {
throw new Error('grantedGroups is required when accessScope is GROUPS');
}
const extractedGrantedGroupIdsForAccessScope =
grantedGroupsForAccessScope.map((group) =>
getIdForRef(group.item).toString(),
);
return {
$or: [
baseCondition,
{
grant: {
$in: [PageGrant.GRANT_PUBLIC, PageGrant.GRANT_USER_GROUP],
},
path: { $in: convertedPagePathPatterns },
$or: [
{
'grantedGroups.item': {
$in: extractedGrantedGroupIdsForAccessScope,
},
},
{ grant: PageGrant.GRANT_PUBLIC },
],
},
],
};
}
if (accessScope === AiAssistantAccessScope.OWNER) {
const ownerUserGroups = [
...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
owner,
)),
].map((group) => group.toString());
return {
$or: [
baseCondition,
{
grant: {
$in: [
PageGrant.GRANT_PUBLIC,
PageGrant.GRANT_USER_GROUP,
PageGrant.GRANT_OWNER,
],
},
path: { $in: convertedPagePathPatterns },
$or: [
{ 'grantedGroups.item': { $in: ownerUserGroups } },
{ grantedUsers: { $in: [getIdForRef(owner)] } },
{ grant: PageGrant.GRANT_PUBLIC },
],
},
],
};
}
throw new Error('Invalid accessScope value');
}
private async validateGrantedUserGroupsForAiAssistant(
owner: AiAssistant['owner'],
shareScope: AiAssistant['shareScope'],
accessScope: AiAssistant['accessScope'],
grantedGroupsForShareScope: AiAssistant['grantedGroupsForShareScope'],
grantedGroupsForAccessScope: AiAssistant['grantedGroupsForAccessScope'],
) {
// Check if grantedGroupsForShareScope is not specified when shareScope is not a “group”
if (
shareScope !== AiAssistantShareScope.GROUPS &&
grantedGroupsForShareScope != null
) {
throw new Error(
'grantedGroupsForShareScope is specified when shareScope is not “groups”.',
);
}
// Check if grantedGroupsForAccessScope is not specified when accessScope is not a “group”
if (
accessScope !== AiAssistantAccessScope.GROUPS &&
grantedGroupsForAccessScope != null
) {
throw new Error(
'grantedGroupsForAccessScope is specified when accsessScope is not “groups”.',
);
}
const ownerUserGroupIds = [
...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(owner)),
...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
owner,
)),
].map((group) => group.toString());
// Check if the owner belongs to the group specified in grantedGroupsForShareScope
if (
grantedGroupsForShareScope != null &&
grantedGroupsForShareScope.length > 0
) {
const extractedGrantedGroupIdsForShareScope =
grantedGroupsForShareScope.map((group) =>
getIdForRef(group.item).toString(),
);
const isValid = extractedGrantedGroupIdsForShareScope.every((groupId) =>
ownerUserGroupIds.includes(groupId),
);
if (!isValid) {
throw new Error(
'A userGroup to which the owner does not belong is specified in grantedGroupsForShareScope',
);
}
}
// Check if the owner belongs to the group specified in grantedGroupsForAccessScope
if (
grantedGroupsForAccessScope != null &&
grantedGroupsForAccessScope.length > 0
) {
const extractedGrantedGroupIdsForAccessScope =
grantedGroupsForAccessScope.map((group) =>
getIdForRef(group.item).toString(),
);
const isValid = extractedGrantedGroupIdsForAccessScope.every((groupId) =>
ownerUserGroupIds.includes(groupId),
);
if (!isValid) {
throw new Error(
'A userGroup to which the owner does not belong is specified in grantedGroupsForAccessScope',
);
}
}
}
async isAiAssistantUsable(
aiAssistantId: string,
user: IUserHasId,
): Promise {
const aiAssistant = await AiAssistantModel.findOne({
_id: { $eq: aiAssistantId },
});
if (aiAssistant == null) {
throw createError(404, 'AiAssistant document does not exist');
}
const isOwner =
getIdStringForRef(aiAssistant.owner) === getIdStringForRef(user._id);
if (aiAssistant.shareScope === AiAssistantShareScope.PUBLIC_ONLY) {
return true;
}
if (aiAssistant.shareScope === AiAssistantShareScope.OWNER && isOwner) {
return true;
}
if (
aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE &&
aiAssistant.accessScope === AiAssistantAccessScope.OWNER &&
isOwner
) {
return true;
}
if (
aiAssistant.shareScope === AiAssistantShareScope.GROUPS ||
(aiAssistant.shareScope === AiAssistantShareScope.SAME_AS_ACCESS_SCOPE &&
aiAssistant.accessScope === AiAssistantAccessScope.GROUPS)
) {
const userGroupIds = [
...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
user,
)),
].map((group) => group.toString());
const grantedGroupIdsForShareScope =
aiAssistant.grantedGroupsForShareScope?.map((group) =>
getIdStringForRef(group.item),
) ?? [];
const isShared = userGroupIds.some((userGroupId) =>
grantedGroupIdsForShareScope.includes(userGroupId),
);
return isShared;
}
return false;
}
async createAiAssistant(
data: UpsertAiAssistantData,
user: IUserHasId,
): Promise {
await this.validateGrantedUserGroupsForAiAssistant(
user,
data.shareScope,
data.accessScope,
data.grantedGroupsForShareScope,
data.grantedGroupsForAccessScope,
);
const conditions = await this.createConditionForCreateVectorStoreFile(
user,
data.accessScope,
data.grantedGroupsForAccessScope,
data.pagePathPatterns,
);
const vectorStoreRelation = await this.createVectorStore(data.name);
const aiAssistant = await AiAssistantModel.create({
...data,
owner: user,
vectorStore: vectorStoreRelation,
});
// VectorStore creation process does not await
this.createVectorStoreFileWithStream(vectorStoreRelation, conditions);
return aiAssistant;
}
async updateAiAssistant(
aiAssistantId: string,
data: UpsertAiAssistantData,
user: IUserHasId,
): Promise {
const aiAssistant = await AiAssistantModel.findOne({
owner: user,
_id: aiAssistantId,
});
if (aiAssistant == null) {
throw createError(404, 'AiAssistant document does not exist');
}
await this.validateGrantedUserGroupsForAiAssistant(
user,
data.shareScope,
data.accessScope,
data.grantedGroupsForShareScope,
data.grantedGroupsForAccessScope,
);
const grantedGroupIdsForAccessScopeFromReq =
data.grantedGroupsForAccessScope?.map((group) =>
getIdStringForRef(group.item),
) ?? []; // ObjectId[] -> string[]
const grantedGroupIdsForAccessScopeFromDb =
aiAssistant.grantedGroupsForAccessScope?.map((group) =>
getIdStringForRef(group.item),
) ?? []; // ObjectId[] -> string[]
// If accessScope, pagePathPatterns, grantedGroupsForAccessScope have not changed, do not build VectorStore
const shouldRebuildVectorStore =
data.accessScope !== aiAssistant.accessScope ||
!isDeepEquals(data.pagePathPatterns, aiAssistant.pagePathPatterns) ||
!isDeepEquals(
grantedGroupIdsForAccessScopeFromReq,
grantedGroupIdsForAccessScopeFromDb,
);
let newVectorStoreRelation: VectorStoreDocument | undefined;
if (shouldRebuildVectorStore) {
const conditions = await this.createConditionForCreateVectorStoreFile(
user,
data.accessScope,
data.grantedGroupsForAccessScope,
data.pagePathPatterns,
);
// Delete obsoleted VectorStore
const obsoletedVectorStoreRelationId = getIdStringForRef(
aiAssistant.vectorStore,
);
await this.deleteVectorStore(obsoletedVectorStoreRelationId);
newVectorStoreRelation = await this.createVectorStore(data.name);
this.updateThreads(aiAssistantId, newVectorStoreRelation.vectorStoreId);
// VectorStore creation process does not await
this.createVectorStoreFileWithStream(newVectorStoreRelation, conditions);
}
const newData = {
...data,
vectorStore: newVectorStoreRelation ?? aiAssistant.vectorStore,
};
aiAssistant.set({ ...newData });
let updatedAiAssistant: AiAssistantDocument = await aiAssistant.save();
if (
data.shareScope !== AiAssistantShareScope.PUBLIC_ONLY &&
aiAssistant.isDefault
) {
updatedAiAssistant = await AiAssistantModel.setDefault(
aiAssistant._id,
false,
);
}
return updatedAiAssistant;
}
async getAccessibleAiAssistants(
user: IUserHasId,
): Promise {
const userGroupIds = [
...(await UserGroupRelation.findAllUserGroupIdsRelatedToUser(user)),
...(await ExternalUserGroupRelation.findAllUserGroupIdsRelatedToUser(
user,
)),
];
const assistants = await AiAssistantModel.find({
$or: [
// Case 1: Assistants owned by the user
{ owner: user },
// Case 2: Public assistants owned by others
{
$and: [
{ owner: { $ne: user } },
{ shareScope: AiAssistantShareScope.PUBLIC_ONLY },
],
},
// Case 3: Group-restricted assistants where user is in granted groups
{
$and: [
{ owner: { $ne: user } },
{ shareScope: AiAssistantShareScope.GROUPS },
{ 'grantedGroupsForShareScope.item': { $in: userGroupIds } },
],
},
],
})
.populate('grantedGroupsForShareScope.item')
.populate('grantedGroupsForAccessScope.item');
return {
myAiAssistants:
assistants.filter(
(assistant) => assistant.owner.toString() === user._id.toString(),
) ?? [],
teamAiAssistants:
assistants.filter(
(assistant) => assistant.owner.toString() !== user._id.toString(),
) ?? [],
};
}
async isLearnablePageLimitExceeded(
user: IUserHasId,
pagePathPatterns: string[],
): Promise {
const normalizedPagePathPatterns = removeGlobPath(pagePathPatterns);
const PageModel = mongoose.model('Page');
const pagePathsWithDescendantCount = await PageModel.descendantCountByPaths(
normalizedPagePathPatterns,
user,
null,
true,
true,
);
const totalPageCount = pagePathsWithDescendantCount.reduce(
(total, pagePathWithDescendantCount) => {
const descendantCount = pagePathPatterns.includes(
pagePathWithDescendantCount.path,
)
? 0 // Treat as single page when included in "pagePathPatterns"
: pagePathWithDescendantCount.descendantCount;
const pageCount = descendantCount + 1;
return total + pageCount;
},
0,
);
logger.debug('TotalPageCount: ', totalPageCount);
const limitLearnablePageCountPerAssistant = configManager.getConfig(
'openai:limitLearnablePageCountPerAssistant',
);
return totalPageCount > limitLearnablePageCountPerAssistant;
}
private async findAiAssistantByPagePath(
pagePaths: string[],
options?: {
shouldPopulateOwner?: boolean;
shouldPopulateVectorStore?: boolean;
},
): Promise {
const pagePathsWithGlobPattern = pagePaths.flatMap((pagePath) =>
generateGlobPatterns(pagePath),
);
const query = AiAssistantModel.find({
$or: [
// Case 1: Exact match
{ pagePathPatterns: { $in: pagePaths } },
// Case 2: Glob pattern match
{ pagePathPatterns: { $in: pagePathsWithGlobPattern } },
],
});
if (options?.shouldPopulateOwner) {
query.populate('owner');
}
if (options?.shouldPopulateVectorStore) {
query.populate('vectorStore');
}
const aiAssistants = await query.exec();
return aiAssistants;
}
}
let instance: OpenaiService;
export const initializeOpenaiService = (crowi: Crowi): void => {
const aiEnabled = configManager.getConfig('app:aiEnabled');
const openaiServiceType = configManager.getConfig('openai:serviceType');
if (
aiEnabled &&
openaiServiceType != null &&
OpenaiServiceTypes.includes(openaiServiceType)
) {
instance = new OpenaiService(crowi);
}
};
export const getOpenaiService = (): IOpenaiService | undefined => {
if (instance != null) {
return instance;
}
return;
};