Просмотр исходного кода

Merge pull request #9702 from weseek/feat/162624-limit-on-learnable-pages

feat: Limit on learnable pages
Yuki Takei 1 год назад
Родитель
Сommit
cda450b30e

+ 1 - 0
apps/app/public/static/locales/en_US/translation.json

@@ -504,6 +504,7 @@
     "show_error_detail": "Show error details"
   },
   "modal_ai_assistant": {
+    "edit_page_description": "Edit pages that the assistant can reference.<br> The assistant can reference up to {{limitLearnablePageCountPerAssistant}} pages including child pages.",
     "default_instruction": "You are the knowledge assistant for this Wiki. Please provide support according to the following guidelines:\n\n- Analyze document relevance and connect information\n- Suggest new perspectives\n- Provide accurate information based on understanding the intent of questions\nI will provide information in a structured format when necessary.",
     "page_mode_title": {
       "share": "Assistant Sharing",

+ 1 - 0
apps/app/public/static/locales/fr_FR/translation.json

@@ -499,6 +499,7 @@
     "show_error_detail": "Détails de l'exposition"
   },
   "modal_ai_assistant": {
+    "edit_page_description": "Modifier les pages que l'assistant peut référencer.<br> L'assistant peut référencer jusqu'à {{limitLearnablePageCountPerAssistant}} pages, y compris les pages enfants.",
     "default_instruction": "Vous êtes l'assistant de connaissances pour ce Wiki. Veuillez fournir un support selon les directives suivantes :\n\n- Analyser la pertinence des documents et relier les informations\n- Proposer de nouvelles perspectives\n- Fournir des informations précises en comprenant l'intention des questions\nJe fournirai les informations sous forme structurée si nécessaire.",
     "page_mode_title": {
       "share": "Partage de l'assistant",

+ 1 - 0
apps/app/public/static/locales/ja_JP/translation.json

@@ -538,6 +538,7 @@
   },
   "modal_ai_assistant": {
     "default_instruction": "あなたはこのWikiの知識アシスタントです。以下の方針で支援を行ってください:\n\n- 文書の関連性分析と情報の関連付け\n- 新しい視点の提案\n- 質問の意図を理解した的確な情報提供 必要に応じて構造化された形式で情報を提供します。",
+    "edit_page_description": " アシスタントが参照するページを編集します。<br> 参照できるページは配下ページも含めて {{limitLearnablePageCountPerAssistant}} ページまでです。",
     "page_mode_title": {
       "share": "アシスタントの共有",
       "pages": "参照ページ",

+ 1 - 0
apps/app/public/static/locales/zh_CN/translation.json

@@ -494,6 +494,7 @@
     "show_error_detail": "显示详情"
   },
   "modal_ai_assistant": {
+    "edit_page_description": "编辑助手可以参考的页面。<br> 助手可以参考最多 {{limitLearnablePageCountPerAssistant}} 个页面,包括子页面。",
     "default_instruction": "您是这个Wiki的知识助手。请按照以下方针提供支持:\n\n- 分析文档相关性并连接信息\n- 提出新的观点\n- 理解问题意图并提供准确信息\n必要时我会以结构化的形式提供信息。",
     "page_mode_title": {
       "share": "助理共享",

+ 10 - 4
apps/app/src/features/openai/client/components/AiAssistant/AiAssistantManagementModal/AiAssistantManagementEditPages.tsx

@@ -1,8 +1,10 @@
 import React, { useCallback } from 'react';
 
+import { useTranslation } from 'react-i18next';
 import { ModalBody } from 'reactstrap';
 
 import type { IPageForItem } from '~/interfaces/page';
+import { useLimitLearnablePageCountPerAssistant } from '~/stores-universal/context';
 import { usePageSelectModal } from '~/stores/modal';
 
 import type { SelectedPage } from '../../../../interfaces/selected-page';
@@ -18,6 +20,9 @@ type Props = {
 }
 
 export const AiAssistantManagementEditPages = (props: Props): JSX.Element => {
+  const { t } = useTranslation();
+  const { data: limitLearnablePageCountPerAssistant } = useLimitLearnablePageCountPerAssistant();
+
   const { selectedPages, onSelect, onRemove } = props;
 
   const { open: openPageSelectModal } = usePageSelectModal();
@@ -31,10 +36,11 @@ export const AiAssistantManagementEditPages = (props: Props): JSX.Element => {
       <AiAssistantManagementHeader />
 
       <ModalBody className="px-4">
-        <p className="text-secondary py-1">
-          アシスタントが参照するページを編集します。<br />
-          参照できるページは配下ページも含めて200ページまでです。
-        </p>
+        <p
+          className="text-secondary py-1"
+          // eslint-disable-next-line react/no-danger
+          dangerouslySetInnerHTML={{ __html: t('modal_ai_assistant.edit_page_description', { limitLearnablePageCountPerAssistant }) }}
+        />
 
         <button
           type="button"

+ 4 - 3
apps/app/src/features/openai/client/components/AiAssistant/AiAssistantManagementModal/AiAssistantManagementHome.tsx

@@ -7,7 +7,7 @@ import {
 
 import { AiAssistantShareScope, AiAssistantAccessScope } from '~/features/openai/interfaces/ai-assistant';
 import type { PopulatedGrantedGroup } from '~/interfaces/page-grant';
-import { useCurrentUser } from '~/stores-universal/context';
+import { useCurrentUser, useLimitLearnablePageCountPerAssistant } from '~/stores-universal/context';
 
 import type { SelectedPage } from '../../../../interfaces/selected-page';
 import { determineShareScope } from '../../../../utils/determine-share-scope';
@@ -48,12 +48,11 @@ export const AiAssistantManagementHome = (props: Props): JSX.Element => {
 
   const { t } = useTranslation();
   const { data: currentUser } = useCurrentUser();
+  const { data: limitLearnablePageCountPerAssistant } = useLimitLearnablePageCountPerAssistant();
   const { close: closeAiAssistantManagementModal, changePageMode } = useAiAssistantManagementModal();
 
   const [isShareScopeWarningModalOpen, setIsShareScopeWarningModalOpen] = useState(false);
 
-  const canUpsert = name !== '' && selectedPages.length !== 0;
-
   const totalSelectedPageCount = useMemo(() => {
     return selectedPages.reduce((total, selectedPage) => {
       const descendantCount = selectedPage.isIncludeSubPage
@@ -71,6 +70,8 @@ export const AiAssistantManagementHome = (props: Props): JSX.Element => {
       : t(baseLabel);
   }, [currentUser?.username, t]);
 
+  const canUpsert = name !== '' && selectedPages.length !== 0 && (limitLearnablePageCountPerAssistant ?? 3000) >= totalSelectedPageCount;
+
   const upsertAiAssistantHandler = useCallback(async() => {
     const shouldWarning = () => {
       const isDifferentUserGroup = () => {

+ 1 - 9
apps/app/src/features/openai/client/components/AiAssistant/AiAssistantManagementModal/AiAssistantManagementModal.tsx

@@ -16,6 +16,7 @@ import { useSWRxPagePathsWithDescendantCount } from '~/stores/page';
 import loggerFactory from '~/utils/logger';
 
 import type { SelectedPage } from '../../../../interfaces/selected-page';
+import { removeGlobPath } from '../../../../utils/remove-glob-path';
 import { createAiAssistant, updateAiAssistant } from '../../../services/ai-assistant';
 import { useAiAssistantManagementModal, AiAssistantManagementModalPageMode, useSWRxAiAssistants } from '../../../stores/ai-assistant';
 
@@ -56,15 +57,6 @@ const convertToSelectedPages = (pagePathPatterns: string[], pagePathsWithDescend
   });
 };
 
-const removeGlobPath = (pagePathPattens?: string[]): string[] => {
-  if (pagePathPattens == null) {
-    return [];
-  }
-  return pagePathPattens.map((pagePathPattern) => {
-    return pagePathPattern.endsWith('/*') ? pagePathPattern.slice(0, -2) : pagePathPattern;
-  });
-};
-
 const AiAssistantManagementModalSubstance = (): JSX.Element => {
   // Hooks
   const { t } = useTranslation();

+ 2 - 2
apps/app/src/features/openai/interfaces/ai-assistant.ts

@@ -1,5 +1,5 @@
 import type {
-  IGrantedGroup, IUser, Ref, HasObjectId,
+  IGrantedGroup, IUserHasId, Ref, HasObjectId,
 } from '@growi/core';
 
 import type { IVectorStore } from './vector-store';
@@ -32,7 +32,7 @@ export interface AiAssistant {
   additionalInstruction: string
   pagePathPatterns: string[],
   vectorStore: Ref<IVectorStore>
-  owner: Ref<IUser>
+  owner: Ref<IUserHasId>
   grantedGroupsForShareScope?: IGrantedGroup[]
   grantedGroupsForAccessScope?: IGrantedGroup[]
   shareScope: AiAssistantShareScope

+ 0 - 16
apps/app/src/features/openai/server/models/ai-assistant.ts

@@ -5,12 +5,10 @@ import { type Model, type Document, Schema } from 'mongoose';
 import { getOrCreateModel } from '~/server/util/mongoose-utils';
 
 import { type AiAssistant, AiAssistantShareScope, AiAssistantAccessScope } from '../../interfaces/ai-assistant';
-import { generateGlobPatterns } from '../utils/generate-glob-patterns';
 
 export interface AiAssistantDocument extends AiAssistant, Document {}
 
 interface AiAssistantModel extends Model<AiAssistantDocument> {
-  findByPagePaths(pagePaths: string[]): Promise<AiAssistantDocument[]>;
   setDefault(id: string, isDefault: boolean): Promise<AiAssistantDocument>;
 }
 
@@ -113,20 +111,6 @@ const schema = new Schema<AiAssistantDocument>(
 );
 
 
-schema.statics.findByPagePaths = async function(pagePaths: string[]): Promise<AiAssistantDocument[]> {
-  const pagePathsWithGlobPattern = pagePaths.map(pagePath => generateGlobPatterns(pagePath)).flat();
-  const assistants = await this.find({
-    $or: [
-      // Case 1: Exact match
-      { pagePathPatterns: { $in: pagePaths } },
-      // Case 2: Glob pattern match
-      { pagePathPatterns: { $in: pagePathsWithGlobPattern } },
-    ],
-  }).populate('vectorStore');
-
-  return assistants;
-};
-
 schema.statics.setDefault = async function(id: string, isDefault: boolean): Promise<AiAssistantDocument> {
   const aiAssistant = await this.findOne({ _id: id, shareScope: AiAssistantAccessScope.PUBLIC_ONLY });
   if (aiAssistant == null) {

+ 6 - 0
apps/app/src/features/openai/server/routes/ai-assistant.ts

@@ -37,6 +37,12 @@ export const createAiAssistantFactory: CreateAssistantFactory = (crowi) => {
 
       try {
         const aiAssistantData = { ...req.body, owner: req.user._id };
+
+        const isLearnablePageLimitExceeded = await openaiService.isLearnablePageLimitExceeded(req.user, aiAssistantData.pagePathPatterns);
+        if (isLearnablePageLimitExceeded) {
+          return res.apiv3Err(new ErrorV3('The number of learnable pages exceeds the limit'), 400);
+        }
+
         const aiAssistant = await openaiService.createAiAssistant(aiAssistantData);
 
         return res.apiv3({ aiAssistant });

+ 8 - 2
apps/app/src/features/openai/server/routes/middlewares/upsert-ai-assistant-validator.ts

@@ -1,6 +1,5 @@
 import { GroupType } from '@growi/core';
 import { isGlobPatternPath, isCreatablePage } from '@growi/core/dist/utils/page-path-utils';
-import escapeStringRegexp from 'escape-string-regexp';
 import { type ValidationChain, body } from 'express-validator';
 
 import { AiAssistantShareScope, AiAssistantAccessScope } from '../../../interfaces/ai-assistant';
@@ -31,7 +30,14 @@ export const upsertAiAssistantValidator: ValidationChain[] = [
     .withMessage('pagePathPatterns must be an array of strings')
     .not()
     .isEmpty()
-    .withMessage('pagePathPatterns must not be empty'),
+    .withMessage('pagePathPatterns must not be empty')
+    .custom((pagePathPattens: string[]) => {
+      if (pagePathPattens.length > 300) {
+        throw new Error('pagePathPattens must be an array of strings with a maximum length of 300');
+      }
+
+      return true;
+    }),
 
   body('pagePathPatterns.*') // each item of pagePathPatterns
     .isString()

+ 6 - 0
apps/app/src/features/openai/server/routes/update-ai-assistant.ts

@@ -51,6 +51,12 @@ export const updateAiAssistantsFactory: UpdateAiAssistantsFactory = (crowi) => {
 
       try {
         const aiAssistantData = { ...req.body, owner: user._id };
+
+        const isLearnablePageLimitExceeded = await openaiService.isLearnablePageLimitExceeded(user, aiAssistantData.pagePathPatterns);
+        if (isLearnablePageLimitExceeded) {
+          return res.apiv3Err(new ErrorV3('The number of learnable pages exceeds the limit'), 400);
+        }
+
         const updatedAiAssistant = await openaiService.updateAiAssistant(id, aiAssistantData);
 
         return res.apiv3({ updatedAiAssistant });

+ 65 - 3
apps/app/src/features/openai/server/services/openai.ts

@@ -2,7 +2,9 @@ import assert from 'node:assert';
 import { Readable, Transform } from 'stream';
 import { pipeline } from 'stream/promises';
 
-import type { IUser, Ref, Lang } from '@growi/core';
+import type {
+  IUser, Ref, Lang, IPage,
+} from '@growi/core';
 import {
   PageGrant, getIdForRef, getIdStringForRef, isPopulated, type IUserHasId,
 } from '@growi/core';
@@ -31,8 +33,10 @@ import {
   type AccessibleAiAssistants, type AiAssistant, AiAssistantAccessScope, AiAssistantShareScope,
 } from '../../interfaces/ai-assistant';
 import type { MessageListParams } from '../../interfaces/message';
+import { removeGlobPath } from '../../utils/remove-glob-path';
 import AiAssistantModel, { type AiAssistantDocument } from '../models/ai-assistant';
 import { convertMarkdownToHtml } from '../utils/convert-markdown-to-html';
+import { generateGlobPatterns } from '../utils/generate-glob-patterns';
 
 import { getClient } from './client-delegator';
 import { openaiApiErrorHandler } from './openai-api-error-handler';
@@ -86,6 +90,7 @@ export interface IOpenaiService {
   updateAiAssistant(aiAssistantId: string, data: Omit<AiAssistant, 'vectorStore'>): Promise<AiAssistantDocument>;
   getAccessibleAiAssistants(user: IUserHasId): Promise<AccessibleAiAssistants>
   deleteAiAssistant(ownerId: string, aiAssistantId: string): Promise<AiAssistantDocument>
+  isLearnablePageLimitExceeded(user: IUserHasId, pagePathPatterns: string[]): Promise<boolean>;
 }
 class OpenaiService implements IOpenaiService {
 
@@ -532,13 +537,22 @@ class OpenaiService implements IOpenaiService {
 
   async createVectorStoreFileOnPageCreate(pages: HydratedDocument<PageDocument>[]): Promise<void> {
     const pagePaths = pages.map(page => page.path);
-    const aiAssistants = await AiAssistantModel.findByPagePaths(pagePaths);
+    const aiAssistants = await this.findAiAssistantByPagePath(pagePaths, { shouldPopulateOwner: true, shouldPopulateVectorStore: true });
 
     if (aiAssistants.length === 0) {
       return;
     }
 
     for await (const aiAssistant of aiAssistants) {
+      if (!isPopulated(aiAssistant.owner)) {
+        continue;
+      }
+
+      const isLearnablePageLimitExceeded = await this.isLearnablePageLimitExceeded(aiAssistant.owner, aiAssistant.pagePathPatterns);
+      if (isLearnablePageLimitExceeded) {
+        continue;
+      }
+
       const pagesToVectorize = await this.filterPagesByAccessScope(aiAssistant, pages);
       const vectorStoreRelation = aiAssistant.vectorStore;
       if (vectorStoreRelation == null || !isPopulated(vectorStoreRelation)) {
@@ -555,7 +569,7 @@ class OpenaiService implements IOpenaiService {
   }
 
   async updateVectorStoreFileOnPageUpdate(page: HydratedDocument<PageDocument>) {
-    const aiAssistants = await AiAssistantModel.findByPagePaths([page.path]);
+    const aiAssistants = await this.findAiAssistantByPagePath([page.path], { shouldPopulateVectorStore: true });
 
     if (aiAssistants.length === 0) {
       return;
@@ -890,6 +904,54 @@ class OpenaiService implements IOpenaiService {
     return deletedAiAssistant;
   }
 
+  async isLearnablePageLimitExceeded(user: IUserHasId, pagePathPatterns: string[]): Promise<boolean> {
+    const normalizedPagePathPatterns = removeGlobPath(pagePathPatterns);
+
+    const PageModel = mongoose.model<IPage, PageModel>('Page');
+    const pagePathsWithDescendantCount = await PageModel.descendantCountByPaths(normalizedPagePathPatterns, user, null, true, true);
+
+    const totalPageCount = pagePathsWithDescendantCount.reduce((total, pagePathWithDescendantCount) => {
+      const descendantCount = pagePathPatterns.includes(pagePathWithDescendantCount.path)
+        ? 0 // Treat as single page when included in "pagePathPatterns"
+        : pagePathWithDescendantCount.descendantCount;
+
+      const pageCount = descendantCount + 1;
+      return total + pageCount;
+    }, 0);
+
+    logger.debug('TotalPageCount: ', totalPageCount);
+
+    const limitLearnablePageCountPerAssistant = configManager.getConfig('openai:limitLearnablePageCountPerAssistant');
+    return totalPageCount > limitLearnablePageCountPerAssistant;
+  }
+
+  async findAiAssistantByPagePath(
+      pagePaths: string[], options?: { shouldPopulateOwner?: boolean, shouldPopulateVectorStore?: boolean },
+  ): Promise<AiAssistantDocument[]> {
+
+    const pagePathsWithGlobPattern = pagePaths.map(pagePath => generateGlobPatterns(pagePath)).flat();
+
+    const query = AiAssistantModel.find({
+      $or: [
+        // Case 1: Exact match
+        { pagePathPatterns: { $in: pagePaths } },
+        // Case 2: Glob pattern match
+        { pagePathPatterns: { $in: pagePathsWithGlobPattern } },
+      ],
+    });
+
+    if (options?.shouldPopulateOwner) {
+      query.populate('owner');
+    }
+
+    if (options?.shouldPopulateVectorStore) {
+      query.populate('vectorStore');
+    }
+
+    const aiAssistants = await query.exec();
+    return aiAssistants;
+  }
+
 }
 
 let instance: OpenaiService;

+ 8 - 0
apps/app/src/features/openai/utils/remove-glob-path.ts

@@ -0,0 +1,8 @@
+export const removeGlobPath = (pagePathPattens?: string[]): string[] => {
+  if (pagePathPattens == null) {
+    return [];
+  }
+  return pagePathPattens.map((pagePathPattern) => {
+    return pagePathPattern.endsWith('/*') ? pagePathPattern.slice(0, -2) : pagePathPattern;
+  });
+};

+ 4 - 1
apps/app/src/pages/[[...path]].page.tsx

@@ -46,7 +46,7 @@ import {
   useElasticsearchMaxBodyLengthToIndex,
   useIsLocalAccountRegistrationEnabled,
   useIsRomUserAllowedToComment,
-  useIsAiEnabled,
+  useIsAiEnabled, useLimitLearnablePageCountPerAssistant,
 } from '~/stores-universal/context';
 import { useEditingMarkdown } from '~/stores/editor';
 import {
@@ -195,6 +195,7 @@ type Props = CommonProps & {
   rendererConfig: RendererConfig,
 
   aiEnabled: boolean,
+  limitLearnablePageCountPerAssistant: number,
 };
 
 const Page: NextPageWithLayout<Props> = (props: Props) => {
@@ -248,6 +249,7 @@ const Page: NextPageWithLayout<Props> = (props: Props) => {
   useIsRomUserAllowedToComment(props.isRomUserAllowedToComment);
 
   useIsAiEnabled(props.aiEnabled);
+  useLimitLearnablePageCountPerAssistant(props.limitLearnablePageCountPerAssistant);
 
   const { pageWithMeta } = props;
 
@@ -566,6 +568,7 @@ function injectServerConfigurations(context: GetServerSidePropsContext, props: P
   } = crowi;
 
   props.aiEnabled = configManager.getConfig('app:aiEnabled');
+  props.limitLearnablePageCountPerAssistant = configManager.getConfig('openai:limitLearnablePageCountPerAssistant');
 
   props.isSearchServiceConfigured = searchService.isConfigured;
   props.isSearchServiceReachable = searchService.isReachable;

+ 5 - 0
apps/app/src/server/service/config-manager/config-definition.ts

@@ -260,6 +260,7 @@ export const CONFIG_KEYS = [
   'openai:vectorStoreFileDeletionCronExpression',
   'openai:vectorStoreFileDeletionBarchSize',
   'openai:vectorStoreFileDeletionApiCallInterval',
+  'openai:limitLearnablePageCountPerAssistant',
 
   // OpenTelemetry Settings
   'otel:enabled',
@@ -1125,6 +1126,10 @@ Guideline as a RAG:
     envVarName: 'OPENAI_SEARCH_ASSISTANT_INSTRUCTIONS',
     defaultValue: '',
   }),
+  'openai:limitLearnablePageCountPerAssistant': defineConfig<number>({
+    envVarName: 'OPENAI_LIMIT_LEARNABLE_PAGE_COUNT_PER_ASSISTANT',
+    defaultValue: 3000,
+  }),
 
   // OpenTelemetry Settings
   'otel:enabled': defineConfig<boolean>({

+ 4 - 0
apps/app/src/stores-universal/context.tsx

@@ -208,6 +208,10 @@ export const useIsAiEnabled = (initialData?: boolean): SWRResponse<boolean, Erro
   return useContextSWR('isAiEnabled', initialData);
 };
 
+export const useLimitLearnablePageCountPerAssistant = (initialData?: number): SWRResponse<number, Error> => {
+  return useContextSWR('limitLearnablePageCountPerAssistant', initialData);
+};
+
 /** **********************************************************
  *                     Computed contexts
  *********************************************************** */