Преглед изворни кода

Merge pull request #8937 from weseek/imprv/max-length-to-index-for-searching

imprv: Restrict indexing for full text search when the body length exceeds the threshold
Yuki Takei пре 1 година
родитељ
комит
b755b96914

+ 3 - 1
apps/app/public/static/locales/en_US/translation.json

@@ -321,7 +321,9 @@
       "stale": "More than {{count}} year has passed since last update.",
       "stale_plural": "More than {{count}} years has passed since last update.",
       "expiration": "This share link will expire at <strong>{{expiredAt}}</strong>.",
-      "no_deadline": "This page has no expiration date"
+      "no_deadline": "This page has no expiration date",
+      "not_indexed1": "This page may not be indexed by Full-Text search engines.",
+      "not_indexed2": "Page body exceeds the threshold specified by {{threshold}}."
     }
   },
   "page_edit": {

+ 3 - 1
apps/app/public/static/locales/fr_FR/translation.json

@@ -321,7 +321,9 @@
       "stale": "Plus de {{count}} an est passé depuis la dernière mise à jour.",
       "stale_plural": "Plus de {{count}} années sont passées depuis la dernière mise à jour.",
       "expiration": "Ce lien expirera <strong>{{expiredAt}}</strong>.",
-      "no_deadline": "Cette page n'a pas de date d'expiration"
+      "no_deadline": "Cette page n'a pas de date d'expiration",
+      "not_indexed1": "Cette page n'est peut-être pas indexée par les moteurs de recherche Full-Text.",
+      "not_indexed2": "Le corps de la page dépasse le seuil spécifié par {{threshold}}."
     }
   },
   "page_edit": {

+ 3 - 1
apps/app/public/static/locales/ja_JP/translation.json

@@ -354,7 +354,9 @@
       "restricted": "このページの閲覧は制限されています",
       "stale": "このページは最終更新日から{{count}}年以上が経過しています。",
       "expiration": "この共有パーマリンクの有効期限は <strong>{{expiredAt}}</strong> です。",
-      "no_deadline": "このページに有効期限は設定されていません。"
+      "no_deadline": "このページに有効期限は設定されていません。",
+      "not_indexed1": "このページは全文検索エンジンにインデックスされない可能性があります.",
+      "not_indexed2": "ページ本文が閾値を超えています: {{threshold}}."
     }
   },
   "page_edit": {

+ 3 - 1
apps/app/public/static/locales/zh_CN/translation.json

@@ -311,7 +311,9 @@
       "restricted": "访问此页受到限制",
       "stale": "自上次更新以来,已超过{{count}年。",
       "stale_plural": "自上次更新以来已过去{{count}年以上。",
-      "no_deadline": "This page has no expiration date"
+      "no_deadline": "此页面没有到期日期",
+      "not_indexed1": "此页面可能不会被全文搜索引擎索引。",
+      "not_indexed2": "页面正文超过了{{threshold}}指定的阈值。"
 		}
 	},
 	"page_edit": {

+ 31 - 0
apps/app/src/components/PageView/PageAlerts/FullTextSearchNotCoverAlert.tsx

@@ -0,0 +1,31 @@
+import { useTranslation } from 'react-i18next';
+
+import { useElasticsearchMaxBodyLengthToIndex } from '~/stores-universal/context';
+import { useSWRxCurrentPage } from '~/stores/page';
+
+export const FullTextSearchNotCoverAlert = (): JSX.Element => {
+  const { t } = useTranslation();
+
+  const { data: elasticsearchMaxBodyLengthToIndex } = useElasticsearchMaxBodyLengthToIndex();
+  const { data } = useSWRxCurrentPage();
+
+  const markdownLength = data?.revision?.body?.length;
+
+  if (markdownLength == null || elasticsearchMaxBodyLengthToIndex == null || markdownLength <= elasticsearchMaxBodyLengthToIndex) {
+    return <></>;
+  }
+
+  return (
+    <div className="alert alert-warning">
+      <strong>{t('Warning')}: {t('page_page.notice.not_indexed1')}</strong><br />
+      <small
+        // eslint-disable-next-line react/no-danger
+        dangerouslySetInnerHTML={{
+          __html: t('page_page.notice.not_indexed2', {
+            threshold: `<code>ELASTICSEARCH_MAX_BODY_LENGTH_TO_INDEX=${elasticsearchMaxBodyLengthToIndex}</code>`,
+          }),
+        }}
+      />
+    </div>
+  );
+};

+ 2 - 2
apps/app/src/components/PageView/PageAlerts/PageAlerts.tsx

@@ -1,5 +1,3 @@
-import React from 'react';
-
 import dynamic from 'next/dynamic';
 
 import { useIsNotFound } from '~/stores/page';
@@ -9,6 +7,7 @@ import { PageGrantAlert } from './PageGrantAlert';
 import { PageStaleAlert } from './PageStaleAlert';
 import { WipPageAlert } from './WipPageAlert';
 
+const FullTextSearchNotCoverAlert = dynamic(() => import('./FullTextSearchNotCoverAlert').then(mod => mod.FullTextSearchNotCoverAlert), { ssr: false });
 const PageRedirectedAlert = dynamic(() => import('./PageRedirectedAlert').then(mod => mod.PageRedirectedAlert), { ssr: false });
 const FixPageGrantAlert = dynamic(() => import('./FixPageGrantAlert').then(mod => mod.FixPageGrantAlert), { ssr: false });
 const TrashPageAlert = dynamic(() => import('./TrashPageAlert').then(mod => mod.TrashPageAlert), { ssr: false });
@@ -22,6 +21,7 @@ export const PageAlerts = (): JSX.Element => {
       <div className="col-sm-12">
         {/* alerts */}
         { !isNotFound && <FixPageGrantAlert /> }
+        <FullTextSearchNotCoverAlert />
         <WipPageAlert />
         <PageGrantAlert />
         <TrashPageAlert />

+ 0 - 19
apps/app/src/features/comment/server/models/comment.ts

@@ -27,7 +27,6 @@ type Add = (
 type FindCommentsByPageId = (pageId: Types.ObjectId) => Query<CommentDocument[], CommentDocument>;
 type FindCommentsByRevisionId = (revisionId: Types.ObjectId) => Query<CommentDocument[], CommentDocument>;
 type FindCreatorsByPage = (pageId: Types.ObjectId) => Promise<IUser[]>
-type GetPageIdToCommentMap = (pageIds: Types.ObjectId[]) => Promise<Record<string, CommentDocument[]>>
 type CountCommentByPageId = (pageId: Types.ObjectId) => Promise<number>
 
 export interface CommentModel extends Model<CommentDocument> {
@@ -35,7 +34,6 @@ export interface CommentModel extends Model<CommentDocument> {
   findCommentsByPageId: FindCommentsByPageId
   findCommentsByRevisionId: FindCommentsByRevisionId
   findCreatorsByPage: FindCreatorsByPage
-  getPageIdToCommentMap: GetPageIdToCommentMap
   countCommentByPageId: CountCommentByPageId
 }
 
@@ -91,23 +89,6 @@ commentSchema.statics.findCreatorsByPage = async function(page) {
   return this.distinct('creator', { page }).exec();
 };
 
-/**
- * @return {object} key: page._id, value: comments
- */
-commentSchema.statics.getPageIdToCommentMap = async function(pageIds) {
-  const results = await this.aggregate()
-    .match({ page: { $in: pageIds } })
-    .group({ _id: '$page', comments: { $push: '$comment' } });
-
-  // convert to map
-  const idToCommentMap = {};
-  results.forEach((result, i) => {
-    idToCommentMap[result._id] = result.comments;
-  });
-
-  return idToCommentMap;
-};
-
 commentSchema.statics.countCommentByPageId = async function(page) {
   return this.count({ page });
 };

+ 4 - 0
apps/app/src/pages/[[...path]].page.tsx

@@ -42,6 +42,7 @@ import {
   useIsSlackConfigured, useRendererConfig, useGrowiCloudUri,
   useIsAllReplyShown, useIsContainerFluid, useIsNotCreatable,
   useIsUploadAllFileAllowed, useIsUploadEnabled,
+  useElasticsearchMaxBodyLengthToIndex,
 } from '~/stores-universal/context';
 import { useEditingMarkdown } from '~/stores/editor';
 import {
@@ -157,6 +158,7 @@ type Props = CommonProps & {
   isSearchServiceConfigured: boolean,
   isSearchServiceReachable: boolean,
   isSearchScopeChildrenAsDefault: boolean,
+  elasticsearchMaxBodyLengthToIndex: number,
   isEnabledMarp: boolean,
 
   sidebarConfig: ISidebarConfig,
@@ -215,6 +217,7 @@ const Page: NextPageWithLayout<Props> = (props: Props) => {
   useIsEnabledAttachTitleHeader(props.isEnabledAttachTitleHeader);
   useIsSearchServiceConfigured(props.isSearchServiceConfigured);
   useIsSearchServiceReachable(props.isSearchServiceReachable);
+  useElasticsearchMaxBodyLengthToIndex(props.elasticsearchMaxBodyLengthToIndex);
   useIsSearchScopeChildrenAsDefault(props.isSearchScopeChildrenAsDefault);
 
   useIsSlackConfigured(props.isSlackConfigured);
@@ -537,6 +540,7 @@ function injectServerConfigurations(context: GetServerSidePropsContext, props: P
   props.isSearchServiceConfigured = searchService.isConfigured;
   props.isSearchServiceReachable = searchService.isReachable;
   props.isSearchScopeChildrenAsDefault = configManager.getConfig('crowi', 'customize:isSearchScopeChildrenAsDefault');
+  props.elasticsearchMaxBodyLengthToIndex = configManager.getConfig('crowi', 'app:elasticsearchMaxBodyLengthToIndex');
 
   props.isSlackConfigured = crowi.slackIntegrationService.isSlackConfigured;
   // props.isMailerSetup = mailService.isMailerSetup;

+ 6 - 9
apps/app/src/server/models/page-tag-relation.ts

@@ -1,6 +1,6 @@
 import type { ITag } from '@growi/core';
-import type { Document, Model } from 'mongoose';
-import mongoose, { ObjectId } from 'mongoose';
+import type { Document, Model, ObjectId } from 'mongoose';
+import mongoose from 'mongoose';
 import mongoosePaginate from 'mongoose-paginate-v2';
 import uniqueValidator from 'mongoose-unique-validator';
 
@@ -9,13 +9,10 @@ import type { IPageTagRelation } from '~/interfaces/page-tag-relation';
 import type { ObjectIdLike } from '../interfaces/mongoose-utils';
 import { getOrCreateModel } from '../util/mongoose-utils';
 
-import type { IdToNameMap } from './tag';
+import type { IdToNameMap, IdToNamesMap } from './tag';
 import Tag from './tag';
 
 
-const ObjectId = mongoose.Schema.Types.ObjectId;
-
-
 // disable no-return-await for model functions
 /* eslint-disable no-return-await */
 
@@ -36,7 +33,7 @@ type CreateTagListWithCountResult = {
 }
 type CreateTagListWithCount = (this: PageTagRelationModel, opts?: CreateTagListWithCountOpts) => Promise<CreateTagListWithCountResult>;
 
-type GetIdToTagNamesMap = (this: PageTagRelationModel, pageIds: string[]) => Promise<IdToNameMap>;
+type GetIdToTagNamesMap = (this: PageTagRelationModel, pageIds: string[]) => Promise<IdToNamesMap>;
 
 type UpdatePageTags = (this: PageTagRelationModel, pageId: string, tags: string[]) => Promise<void>
 
@@ -54,13 +51,13 @@ export interface PageTagRelationModel extends Model<PageTagRelationDocument> {
  */
 const schema = new mongoose.Schema<PageTagRelationDocument, PageTagRelationModel>({
   relatedPage: {
-    type: ObjectId,
+    type: mongoose.Schema.Types.ObjectId,
     ref: 'Page',
     required: true,
     index: true,
   },
   relatedTag: {
-    type: ObjectId,
+    type: mongoose.Schema.Types.ObjectId,
     ref: 'Tag',
     required: true,
     index: true,

+ 1 - 0
apps/app/src/server/models/tag.ts

@@ -14,6 +14,7 @@ export interface TagDocument {
 }
 
 export type IdToNameMap = {[key: string] : string }
+export type IdToNamesMap = {[key: string] : string[] }
 
 export interface TagModel extends Model<TagDocument>{
   getIdToNameMap(tagIds: ObjectIdLike[]): IdToNameMap

+ 6 - 0
apps/app/src/server/service/config-loader.ts

@@ -282,6 +282,12 @@ const ENV_VAR_NAME_TO_CONFIG_INFO = {
     type:    ValueType.BOOLEAN,
     default: false,
   },
+  ELASTICSEARCH_MAX_BODY_LENGTH_TO_INDEX: {
+    ns:      'crowi',
+    key:     'app:elasticsearchMaxBodyLengthToIndex',
+    type:    ValueType.NUMBER,
+    default: 100000,
+  },
   ELASTICSEARCH_REINDEX_BULK_SIZE: {
     ns:      'crowi',
     key:     'app:elasticsearchReindexBulkSize',

+ 139 - 0
apps/app/src/server/service/search-delegator/aggregate-to-index.ts

@@ -0,0 +1,139 @@
+import type { IPage } from '@growi/core';
+import type { PipelineStage, Query } from 'mongoose';
+
+import type { PageModel } from '~/server/models/page';
+
+export const aggregatePipelineToIndex = (maxBodyLengthToIndex: number, query?: Query<PageModel, IPage>): PipelineStage[] => {
+
+  const basePipeline = query == null
+    ? []
+    : [{ $match: query.getQuery() }];
+
+  return [
+    ...basePipeline,
+
+    // join Revision
+    {
+      $lookup: {
+        from: 'revisions',
+        localField: 'revision',
+        foreignField: '_id',
+        as: 'revision',
+      },
+    },
+    // unwind and filter pages that does not have revision
+    {
+      $unwind: {
+        path: '$revision',
+      },
+    },
+    {
+      $addFields: {
+        bodyLength: { $strLenCP: '$revision.body' },
+      },
+    },
+
+    // join User
+    {
+      $lookup: {
+        from: 'users',
+        localField: 'creator',
+        foreignField: '_id',
+        as: 'creator',
+      },
+    },
+    {
+      $unwind: {
+        path: '$creator',
+        preserveNullAndEmptyArrays: true,
+      },
+    },
+
+    // join Comment
+    {
+      $lookup: {
+        from: 'comments',
+        localField: '_id',
+        foreignField: 'page',
+        pipeline: [
+          {
+            $addFields: {
+              commentLength: { $strLenCP: '$comment' },
+            },
+          },
+        ],
+        as: 'comments',
+      },
+    },
+    {
+      $addFields: {
+        commentsCount: { $size: '$comments' },
+      },
+    },
+
+    // join Bookmark
+    {
+      $lookup: {
+        from: 'bookmarks',
+        localField: '_id',
+        foreignField: 'page',
+        as: 'bookmarks',
+      },
+    },
+    {
+      $addFields: {
+        bookmarksCount: { $size: '$bookmarks' },
+      },
+    },
+
+    // add counts for embedded arrays
+    {
+      $addFields: {
+        likeCount: { $size: '$liker' },
+      },
+    },
+    {
+      $addFields: {
+        seenUsersCount: { $size: '$seenUsers' },
+      },
+    },
+
+    // project
+    {
+      $project: {
+        path: 1,
+        createdAt: 1,
+        updatedAt: 1,
+        grant: 1,
+        grantedUsers: 1,
+        grantedGroups: 1,
+        'revision.body': {
+          $cond: {
+            if: { $lte: ['$bodyLength', maxBodyLengthToIndex] },
+            then: '$revision.body',
+            else: '',
+          },
+        },
+        comments: {
+          $map: {
+            input: '$comments',
+            as: 'comment',
+            in: {
+              $cond: {
+                if: { $lte: ['$$comment.commentLength', maxBodyLengthToIndex] },
+                then: '$$comment.comment',
+                else: '',
+              },
+            },
+          },
+        },
+        commentsCount: 1,
+        bookmarksCount: 1,
+        likeCount: 1,
+        seenUsersCount: 1,
+        'creator.username': 1,
+        'creator.email': 1,
+      },
+    },
+  ];
+};

+ 52 - 0
apps/app/src/server/service/search-delegator/bulk-write.d.ts

@@ -0,0 +1,52 @@
+import type { IPageHasId, PageGrant } from '@growi/core';
+
+export type AggregatedPage = Pick<IPageHasId,
+  '_id'
+  | 'path'
+  | 'createdAt'
+  | 'updatedAt'
+  | 'grant'
+  | 'grantedUsers'
+  | 'grantedGroups'
+> & {
+  revision: { body: string },
+  comments: string[],
+  commentsCount: number,
+  bookmarksCount: number,
+  likeCount: number,
+  seenUsersCount: number,
+  creator: {
+    username: string,
+    email: string,
+  },
+} & {
+  tagNames: string[],
+};
+
+export type BulkWriteCommand = {
+  index: {
+    _index: string,
+    _type: '_doc' | undefined,
+    _id: string,
+  },
+}
+
+export type BulkWriteBodyRestriction = {
+  grant: PageGrant,
+  granted_users?: string[],
+  granted_groups: string[],
+}
+
+export type BulkWriteBody = {
+  path: string;
+  created_at: Date;
+  updated_at: Date;
+  body: string;
+  username?: string;
+  comments?: string[];
+  comment_count: number;
+  bookmark_count: number;
+  seenUsers_count: number;
+  like_count: number;
+  tag_names?: string[];
+} & BulkWriteBodyRestriction;

+ 33 - 135
apps/app/src/server/service/search-delegator/elasticsearch.ts

@@ -1,11 +1,11 @@
 import { Writable, Transform } from 'stream';
 import { URL } from 'url';
 
+import { getIdForRef, type IPage } from '@growi/core';
 import gc from 'expose-gc/function';
 import mongoose from 'mongoose';
 import streamToPromise from 'stream-to-promise';
 
-import { Comment } from '~/features/comment/server';
 import { SearchDelegatorName } from '~/interfaces/named-query';
 import type { ISearchResult, ISearchResultData } from '~/interfaces/search';
 import { SORT_AXIS, SORT_ORDER } from '~/interfaces/search';
@@ -22,6 +22,8 @@ import { configManager } from '../config-manager';
 import type { UpdateOrInsertPagesOpts } from '../interfaces/search';
 
 
+import { aggregatePipelineToIndex } from './aggregate-to-index';
+import type { AggregatedPage, BulkWriteBody, BulkWriteCommand } from './bulk-write';
 import ElasticsearchClient from './elasticsearch-client';
 
 const logger = loggerFactory('growi:service:search-delegator:elasticsearch');
@@ -109,10 +111,6 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
     return `${this.indexName}-alias`;
   }
 
-  shouldIndexed(page) {
-    return page.revision != null;
-  }
-
   initClient() {
     const { host, auth, indexName } = this.getConnectionInfo();
 
@@ -128,7 +126,7 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
     this.indexName = indexName;
   }
 
-  getType() {
+  getType(): '_doc' | undefined {
     return this.isElasticsearchV7 ? '_doc' : undefined;
   }
 
@@ -358,20 +356,9 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
   /**
    * generate object that is related to page.grant*
    */
-  generateDocContentsRelatedToRestriction(page) {
-    let grantedUserIds = null;
-    if (page.grantedUsers != null && page.grantedUsers.length > 0) {
-      grantedUserIds = page.grantedUsers.map((user) => {
-        const userId = (user._id == null) ? user : user._id;
-        return userId.toString();
-      });
-    }
-
-    let grantedGroupIds = [];
-    grantedGroupIds = page.grantedGroups.map((group) => {
-      const groupId = (group.item._id == null) ? group.item : group.item._id;
-      return groupId.toString();
-    });
+  generateDocContentsRelatedToRestriction(page: AggregatedPage) {
+    const grantedUserIds = page.grantedUsers.map(user => getIdForRef(user));
+    const grantedGroupIds = page.grantedGroups.map(group => getIdForRef(group.item));
 
     return {
       grant: page.grant,
@@ -380,10 +367,7 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
     };
   }
 
-  prepareBodyForCreate(body, page) {
-    if (!Array.isArray(body)) {
-      throw new Error('Body must be an array.');
-    }
+  prepareBodyForCreate(page: AggregatedPage): [BulkWriteCommand, BulkWriteBody] {
 
     const command = {
       index: {
@@ -393,27 +377,22 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
       },
     };
 
-    const bookmarkCount = page.bookmarkCount || 0;
-    const seenUsersCount = page.seenUsers?.length || 0;
-    let document = {
+    const document: BulkWriteBody = {
       path: page.path,
       body: page.revision.body,
-      // username: page.creator?.username, // available Node.js v14 and above
-      username: page.creator != null ? page.creator.username : null,
-      comments: page.comments,
-      comment_count: page.commentCount,
-      bookmark_count: bookmarkCount,
-      seenUsers_count: seenUsersCount,
-      like_count: page.liker?.length || 0,
+      username: page.creator?.username,
+      comments: page.commentsCount > 0 ? page.comments : undefined,
+      comment_count: page.commentsCount,
+      bookmark_count: page.bookmarksCount,
+      like_count: page.likeCount,
+      seenUsers_count: page.seenUsersCount,
       created_at: page.createdAt,
       updated_at: page.updatedAt,
       tag_names: page.tagNames,
+      ...this.generateDocContentsRelatedToRestriction(page),
     };
 
-    document = Object.assign(document, this.generateDocContentsRelatedToRestriction(page));
-
-    body.push(command);
-    body.push(document);
+    return [command, document];
   }
 
   prepareBodyForDelete(body, page) {
@@ -456,91 +435,29 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
   async updateOrInsertPages(queryFactory, option: UpdateOrInsertPagesOpts = {}) {
     const { shouldEmitProgress = false, invokeGarbageCollection = false } = option;
 
-    const Page = mongoose.model('Page') as unknown as PageModel;
+    const Page = mongoose.model<IPage, PageModel>('Page');
     const { PageQueryBuilder } = Page;
-    const Bookmark = mongoose.model('Bookmark') as any; // TODO: typescriptize model
 
     const socket = shouldEmitProgress ? this.socketIoService.getAdminSocket() : null;
 
     // prepare functions invoked from custom streams
     const prepareBodyForCreate = this.prepareBodyForCreate.bind(this);
-    const shouldIndexed = this.shouldIndexed.bind(this);
     const bulkWrite = this.client.bulk.bind(this.client);
 
-    const findQuery = new PageQueryBuilder(queryFactory()).query;
-    const countQuery = new PageQueryBuilder(queryFactory()).query;
+    const matchQuery = new PageQueryBuilder(queryFactory()).query;
 
+    const countQuery = new PageQueryBuilder(queryFactory()).query;
     const totalCount = await countQuery.count();
 
-    const readStream = findQuery
-      // populate data which will be referenced by prepareBodyForCreate()
-      .populate([
-        { path: 'creator', model: 'User', select: 'username' },
-        { path: 'revision', model: 'Revision', select: 'body' },
-      ])
-      .lean()
-      .cursor();
-
-    let skipped = 0;
-    const thinOutStream = new Transform({
-      objectMode: true,
-      async transform(doc, encoding, callback) {
-        if (shouldIndexed(doc)) {
-          this.push(doc);
-        }
-        else {
-          skipped++;
-        }
-        callback();
-      },
-    });
+    const maxBodyLengthToIndex = configManager.getConfig('crowi', 'app:elasticsearchMaxBodyLengthToIndex');
+
+    const readStream = Page.aggregate<AggregatedPage>(
+      aggregatePipelineToIndex(maxBodyLengthToIndex, matchQuery),
+    ).cursor();
 
     const bulkSize: number = configManager.getConfig('crowi', 'app:elasticsearchReindexBulkSize');
     const batchStream = createBatchStream(bulkSize);
 
-    const appendBookmarkCountStream = new Transform({
-      objectMode: true,
-      async transform(chunk, encoding, callback) {
-        const pageIds = chunk.map(doc => doc._id);
-
-        const idToCountMap = await Bookmark.getPageIdToCountMap(pageIds);
-        const idsHavingCount = Object.keys(idToCountMap);
-
-        // append count
-        chunk
-          .filter(doc => idsHavingCount.includes(doc._id.toString()))
-          .forEach((doc) => {
-            // append count from idToCountMap
-            doc.bookmarkCount = idToCountMap[doc._id.toString()];
-          });
-
-        this.push(chunk);
-        callback();
-      },
-    });
-
-
-    const appendCommentStream = new Transform({
-      objectMode: true,
-      async transform(chunk, encoding, callback) {
-        const pageIds = chunk.map(doc => doc._id);
-
-        const idToCommentMap = await Comment.getPageIdToCommentMap(pageIds);
-        const idsHavingComment = Object.keys(idToCommentMap);
-
-        // append comments
-        chunk
-          .filter(doc => idsHavingComment.includes(doc._id.toString()))
-          .forEach((doc) => {
-            // append comments from idToCommentMap
-            doc.comments = idToCommentMap[doc._id.toString()];
-          });
-
-        this.push(chunk);
-        callback();
-      },
-    });
-
     const appendTagNamesStream = new Transform({
       objectMode: true,
       async transform(chunk, encoding, callback) {
@@ -552,7 +469,7 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
         // append tagNames
         chunk
           .filter(doc => idsHavingTagNames.includes(doc._id.toString()))
-          .forEach((doc) => {
+          .forEach((doc: AggregatedPage) => {
             // append tagName from idToTagNamesMap
             doc.tagNames = idToTagNamesMap[doc._id.toString()];
           });
@@ -566,8 +483,10 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
     const writeStream = new Writable({
       objectMode: true,
       async write(batch, encoding, callback) {
-        const body = [];
-        batch.forEach(doc => prepareBodyForCreate(body, doc));
+        const body: (BulkWriteCommand|BulkWriteBody)[] = [];
+        batch.forEach((doc: AggregatedPage) => {
+          body.push(...prepareBodyForCreate(doc));
+        });
 
         try {
           const bulkResponse = await bulkWrite({
@@ -580,7 +499,7 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
           logger.info(`Adding pages progressing: (count=${count}, errors=${bulkResponse.errors}, took=${bulkResponse.took}ms)`);
 
           if (shouldEmitProgress) {
-            socket?.emit(SocketEventName.AddPageProgress, { totalCount, count, skipped });
+            socket?.emit(SocketEventName.AddPageProgress, { totalCount, count });
           }
         }
         catch (err) {
@@ -601,20 +520,17 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
         callback();
       },
       final(callback) {
-        logger.info(`Adding pages has completed: (totalCount=${totalCount}, skipped=${skipped})`);
+        logger.info(`Adding pages has completed: (totalCount=${totalCount})`);
 
         if (shouldEmitProgress) {
-          socket?.emit(SocketEventName.FinishAddPage, { totalCount, count, skipped });
+          socket?.emit(SocketEventName.FinishAddPage, { totalCount, count });
         }
         callback();
       },
     });
 
     readStream
-      .pipe(thinOutStream)
       .pipe(batchStream)
-      .pipe(appendBookmarkCountStream)
-      .pipe(appendCommentStream)
       .pipe(appendTagNamesStream)
       .pipe(writeStream);
 
@@ -977,30 +893,12 @@ class ElasticsearchDelegator implements SearchDelegator<Data, ESTermsKey, ESQuer
 
   async syncPageUpdated(page, user) {
     logger.debug('SearchClient.syncPageUpdated', page.path);
-
-    // delete if page should not indexed
-    if (!this.shouldIndexed(page)) {
-      try {
-        await this.deletePages([page]);
-      }
-      catch (err) {
-        logger.error('deletePages:ES Error', err);
-      }
-      return;
-    }
-
     return this.updateOrInsertPageById(page._id);
   }
 
   // remove pages whitch should nod Indexed
   async syncPagesUpdated(pages, user) {
     const shoudDeletePages: any[] = [];
-    pages.forEach((page) => {
-      logger.debug('SearchClient.syncPageUpdated', page.path);
-      if (!this.shouldIndexed(page)) {
-        shoudDeletePages.push(page);
-      }
-    });
 
     // delete if page should not indexed
     try {

+ 4 - 0
apps/app/src/stores-universal/context.tsx

@@ -88,6 +88,10 @@ export const useIsSearchServiceReachable = (initialData?: boolean) : SWRResponse
   return useContextSWR<boolean, Error>('isSearchServiceReachable', initialData);
 };
 
+export const useElasticsearchMaxBodyLengthToIndex = (initialData?: number) : SWRResponse<number, Error> => {
+  return useContextSWR('elasticsearchMaxBodyLengthToIndex', initialData);
+};
+
 export const useIsMailerSetup = (initialData?: boolean): SWRResponse<boolean, any> => {
   return useContextSWR('isMailerSetup', initialData);
 };