| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670 |
- import type { IPageHasId } from '@growi/core';
- import { serializeUserSecurely } from '@growi/core/dist/models/serializers';
- import mongoose from 'mongoose';
- import { FilterXSS } from 'xss';
- import { CommentEvent, commentEvent } from '~/features/comment/server';
- import {
- isIncludeAiMenthion,
- removeAiMenthion,
- } from '~/features/search/utils/ai';
- import { SearchDelegatorName } from '~/interfaces/named-query';
- import type {
- IFormattedSearchResult,
- IPageWithSearchMeta,
- ISearchResult,
- } from '~/interfaces/search';
- import loggerFactory from '~/utils/logger';
- import type Crowi from '../crowi';
- import type { ObjectIdLike } from '../interfaces/mongoose-utils';
- import type {
- ParsedQuery,
- QueryTerms,
- SearchableData,
- SearchDelegator,
- SearchQueryParser,
- SearchResolver,
- } from '../interfaces/search';
- import NamedQuery from '../models/named-query';
- import type { PageModel } from '../models/page';
- import { SearchError } from '../models/vo/search-error';
- import { hasIntersection } from '../util/compare-objectId';
- import { configManager } from './config-manager';
- import ElasticsearchDelegator from './search-delegator/elasticsearch';
- import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
- const logger = loggerFactory('growi:service:search');
- const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
- // options for filtering xss
- // Do not change the property key name to 'whitelist" because it depends on the 'xss' library
- const filterXssOptions = {
- whiteList: {
- em: ['class'],
- },
- };
- const filterXss = new FilterXSS(filterXssOptions);
- const normalizeQueryString = (_queryString: string): string => {
- let queryString = _queryString.trim();
- queryString = removeAiMenthion(queryString).replace(/\s+/g, ' ');
- return queryString;
- };
- const normalizeNQName = (nqName: string): string => {
- return nqName.trim();
- };
- const findPageListByIds = async (pageIds: ObjectIdLike[], crowi: any) => {
- const Page = crowi.model('Page') as unknown as PageModel;
- const User = crowi.model('User');
- const builder = new Page.PageQueryBuilder(
- Page.find({ _id: { $in: pageIds } }),
- false,
- );
- builder.addConditionToPagenate(undefined, undefined); // offset and limit are unnesessary
- builder.populateDataToList(User.USER_FIELDS_EXCEPT_CONFIDENTIAL); // populate lastUpdateUser
- builder.query = builder.query.populate({
- path: 'creator',
- select: User.USER_FIELDS_EXCEPT_CONFIDENTIAL,
- });
- const pages = await builder.query.clone().exec('find');
- const totalCount = await builder.query.exec('count');
- return {
- pages,
- totalCount,
- };
- };
- class SearchService implements SearchQueryParser, SearchResolver {
- crowi: Crowi;
- isErrorOccuredOnHealthcheck: boolean | null;
- isErrorOccuredOnSearching: boolean | null;
- fullTextSearchDelegator: any & ElasticsearchDelegator;
- nqDelegators: { [key in SearchDelegatorName]: SearchDelegator };
- constructor(crowi: Crowi) {
- this.crowi = crowi;
- this.isErrorOccuredOnHealthcheck = null;
- this.isErrorOccuredOnSearching = null;
- try {
- this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
- this.nqDelegators = this.generateNQDelegators(
- this.fullTextSearchDelegator,
- );
- logger.info('Succeeded to initialize search delegators');
- } catch (err) {
- logger.error(err);
- }
- if (this.isConfigured) {
- this.fullTextSearchDelegator.init();
- this.registerUpdateEvent();
- }
- }
- get isConfigured() {
- return this.fullTextSearchDelegator != null;
- }
- get isReachable() {
- return (
- this.isConfigured &&
- !this.isErrorOccuredOnHealthcheck &&
- !this.isErrorOccuredOnSearching
- );
- }
- get isElasticsearchEnabled() {
- const uri = configManager.getConfig('app:elasticsearchUri');
- return uri != null && uri.length > 0;
- }
- generateFullTextSearchDelegator() {
- logger.info('Initializing search delegator');
- if (this.isElasticsearchEnabled) {
- logger.info('Elasticsearch is enabled');
- return new ElasticsearchDelegator(this.crowi.socketIoService);
- }
- logger.info(
- 'No elasticsearch URI is specified so that full text search is disabled.',
- );
- }
- generateNQDelegators(defaultDelegator: ElasticsearchDelegator): {
- [key in SearchDelegatorName]: SearchDelegator;
- } {
- return {
- [SearchDelegatorName.DEFAULT]: defaultDelegator,
- [SearchDelegatorName.PRIVATE_LEGACY_PAGES]:
- new PrivateLegacyPagesDelegator() as unknown as SearchDelegator,
- };
- }
- registerUpdateEvent() {
- const pageEvent = this.crowi.event('page');
- pageEvent.on(
- 'create',
- this.fullTextSearchDelegator.syncPageUpdated.bind(
- this.fullTextSearchDelegator,
- ),
- );
- pageEvent.on(
- 'update',
- this.fullTextSearchDelegator.syncPageUpdated.bind(
- this.fullTextSearchDelegator,
- ),
- );
- pageEvent.on('delete', (targetPage, deletedPage, user) => {
- this.fullTextSearchDelegator.syncPageDeleted.bind(
- this.fullTextSearchDelegator,
- )(targetPage, user);
- this.fullTextSearchDelegator.syncPageUpdated.bind(
- this.fullTextSearchDelegator,
- )(deletedPage, user);
- });
- pageEvent.on('revert', (targetPage, revertedPage, user) => {
- this.fullTextSearchDelegator.syncPageDeleted.bind(
- this.fullTextSearchDelegator,
- )(targetPage, user);
- this.fullTextSearchDelegator.syncPageUpdated.bind(
- this.fullTextSearchDelegator,
- )(revertedPage, user);
- });
- pageEvent.on(
- 'deleteCompletely',
- this.fullTextSearchDelegator.syncPageDeleted.bind(
- this.fullTextSearchDelegator,
- ),
- );
- pageEvent.on(
- 'syncDescendantsDelete',
- this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(
- this.fullTextSearchDelegator,
- ),
- );
- pageEvent.on(
- 'updateMany',
- this.fullTextSearchDelegator.syncPagesUpdated.bind(
- this.fullTextSearchDelegator,
- ),
- );
- pageEvent.on(
- 'syncDescendantsUpdate',
- this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(
- this.fullTextSearchDelegator,
- ),
- );
- pageEvent.on(
- 'addSeenUsers',
- this.fullTextSearchDelegator.syncPageUpdated.bind(
- this.fullTextSearchDelegator,
- ),
- );
- pageEvent.on('rename', () => {
- this.fullTextSearchDelegator.syncPageDeleted.bind(
- this.fullTextSearchDelegator,
- );
- this.fullTextSearchDelegator.syncPageUpdated.bind(
- this.fullTextSearchDelegator,
- );
- });
- const bookmarkEvent = this.crowi.event('bookmark');
- bookmarkEvent.on(
- 'create',
- this.fullTextSearchDelegator.syncBookmarkChanged.bind(
- this.fullTextSearchDelegator,
- ),
- );
- bookmarkEvent.on(
- 'delete',
- this.fullTextSearchDelegator.syncBookmarkChanged.bind(
- this.fullTextSearchDelegator,
- ),
- );
- const tagEvent = this.crowi.event('tag');
- tagEvent.on(
- 'update',
- this.fullTextSearchDelegator.syncTagChanged.bind(
- this.fullTextSearchDelegator,
- ),
- );
- commentEvent.on(
- CommentEvent.CREATE,
- this.fullTextSearchDelegator.syncCommentChanged.bind(
- this.fullTextSearchDelegator,
- ),
- );
- commentEvent.on(
- CommentEvent.UPDATE,
- this.fullTextSearchDelegator.syncCommentChanged.bind(
- this.fullTextSearchDelegator,
- ),
- );
- commentEvent.on(
- CommentEvent.DELETE,
- this.fullTextSearchDelegator.syncCommentChanged.bind(
- this.fullTextSearchDelegator,
- ),
- );
- }
- resetErrorStatus() {
- this.isErrorOccuredOnHealthcheck = false;
- this.isErrorOccuredOnSearching = false;
- }
- async reconnectClient() {
- logger.info('Try to reconnect...');
- this.fullTextSearchDelegator.initClient();
- try {
- await this.getInfoForHealth();
- logger.info('Reconnecting succeeded.');
- this.resetErrorStatus();
- } catch (err) {
- throw err;
- }
- }
- async getInfo() {
- try {
- return await this.fullTextSearchDelegator.getInfo();
- } catch (err) {
- logger.error(err);
- throw err;
- }
- }
- async getInfoForHealth() {
- try {
- const result = await this.fullTextSearchDelegator.getInfoForHealth();
- this.isErrorOccuredOnHealthcheck = false;
- return result;
- } catch (err) {
- logger.error(err);
- // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
- this.isErrorOccuredOnHealthcheck = true;
- throw err;
- }
- }
- async getInfoForAdmin() {
- return this.fullTextSearchDelegator.getInfoForAdmin();
- }
- async normalizeIndices() {
- return this.fullTextSearchDelegator.normalizeIndices();
- }
- async rebuildIndex() {
- return this.fullTextSearchDelegator.rebuildIndex();
- }
- async parseSearchQuery(
- queryString: string,
- nqName: string | null,
- ): Promise<ParsedQuery> {
- // eslint-disable-next-line no-param-reassign
- queryString = normalizeQueryString(queryString);
- const terms = this.parseQueryString(queryString);
- if (nqName == null) {
- return { queryString, terms };
- }
- const nq = await NamedQuery.findOne({ name: normalizeNQName(nqName) });
- // will delegate to full-text search
- if (nq == null) {
- logger.debug(
- `Delegated to full-text search since a named query document did not found. (nqName="${nqName}")`,
- );
- return { queryString, terms };
- }
- const { aliasOf, delegatorName } = nq;
- let parsedQuery: ParsedQuery;
- if (aliasOf != null) {
- parsedQuery = {
- queryString: normalizeQueryString(aliasOf),
- terms: this.parseQueryString(aliasOf),
- };
- } else {
- parsedQuery = { queryString, terms, delegatorName };
- }
- return parsedQuery;
- }
- async resolve(
- parsedQuery: ParsedQuery,
- ): Promise<[SearchDelegator, SearchableData]> {
- const {
- queryString,
- terms,
- delegatorName = SearchDelegatorName.DEFAULT,
- } = parsedQuery;
- const nqDeledator = this.nqDelegators[delegatorName];
- const data = {
- queryString,
- terms,
- };
- return [nqDeledator, data];
- }
- /**
- * Throws SearchError if data is corrupted.
- * @param {SearchableData} data
- * @param {SearchDelegator} delegator
- * @throws {SearchError} SearchError
- */
- private validateSearchableData(
- delegator: SearchDelegator,
- data: SearchableData,
- ): void {
- const { terms } = data;
- if (delegator.isTermsNormalized(terms)) {
- return;
- }
- const unavailableTermsKeys = delegator.validateTerms(terms);
- throw new SearchError(
- 'The query string includes unavailable terms.',
- unavailableTermsKeys,
- );
- }
- async searchKeyword(
- keyword: string,
- nqName: string | null,
- user,
- userGroups,
- searchOpts,
- ): Promise<[ISearchResult<unknown>, string | null]> {
- let parsedQuery: ParsedQuery;
- // parse
- try {
- parsedQuery = await this.parseSearchQuery(keyword, nqName);
- } catch (err) {
- logger.error('Error occurred while parseSearchQuery', err);
- throw err;
- }
- if (isIncludeAiMenthion(keyword)) {
- searchOpts.vector = true;
- }
- let delegator: SearchDelegator;
- let data: SearchableData;
- // resolve
- try {
- [delegator, data] = await this.resolve(parsedQuery);
- } catch (err) {
- logger.error('Error occurred while resolving search delegator', err);
- throw err;
- }
- // throws
- this.validateSearchableData(delegator, data);
- return [
- await delegator.search(data, user, userGroups, searchOpts),
- delegator.name ?? null,
- ];
- }
- parseQueryString(queryString: string): QueryTerms {
- // terms
- const matchWords: string[] = [];
- const notMatchWords: string[] = [];
- const phraseWords: string[] = [];
- const notPhraseWords: string[] = [];
- const prefixPaths: string[] = [];
- const notPrefixPaths: string[] = [];
- const tags: string[] = [];
- const notTags: string[] = [];
- // First: Parse phrase keywords
- const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
- const phrases = queryString.match(phraseRegExp);
- if (phrases !== null) {
- queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
- phrases.forEach((phrase) => {
- phrase.trim();
- if (phrase.match(/^-/)) {
- notPhraseWords.push(phrase.replace(/^-/, ''));
- } else {
- phraseWords.push(phrase);
- }
- });
- }
- // Second: Parse other keywords (include minus keywords)
- queryString.split(' ').forEach((word) => {
- if (word === '') {
- return;
- }
- // https://regex101.com/r/pN9XfK/1
- const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
- // https://regex101.com/r/3qw9FQ/1
- const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
- if (matchNegative != null) {
- if (matchNegative[1] === 'prefix:') {
- notPrefixPaths.push(matchNegative[2]);
- } else if (matchNegative[1] === 'tag:') {
- notTags.push(matchNegative[2]);
- } else {
- notMatchWords.push(matchNegative[2]);
- }
- } else if (matchPositive != null) {
- if (matchPositive[1] === 'prefix:') {
- prefixPaths.push(matchPositive[2]);
- } else if (matchPositive[1] === 'tag:') {
- tags.push(matchPositive[2]);
- } else {
- matchWords.push(matchPositive[2]);
- }
- }
- });
- const terms = {
- match: matchWords,
- not_match: notMatchWords,
- phrase: phraseWords,
- not_phrase: notPhraseWords,
- prefix: prefixPaths,
- not_prefix: notPrefixPaths,
- tag: tags,
- not_tag: notTags,
- };
- return terms;
- }
- // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
- // So far, it determines by delegatorName passed by searchService.searchKeyword
- checkIsFormattable(
- searchResult,
- delegatorName: SearchDelegatorName,
- ): boolean {
- return delegatorName === SearchDelegatorName.DEFAULT;
- }
- /**
- * formatting result
- */
- async formatSearchResult(
- searchResult: ISearchResult<any>,
- delegatorName: SearchDelegatorName,
- user,
- userGroups,
- ): Promise<IFormattedSearchResult> {
- if (!this.checkIsFormattable(searchResult, delegatorName)) {
- const data: IPageWithSearchMeta[] = searchResult.data.map((page) => {
- return {
- data: page as IPageHasId,
- };
- });
- return {
- data,
- meta: searchResult.meta,
- };
- }
- /*
- * Format ElasticSearch result
- */
- const User = this.crowi.model('User');
- const result = {} as IFormattedSearchResult;
- // get page data
- const pageIds: string[] = searchResult.data.map((page) => {
- return page._id;
- });
- const findPageResult = await findPageListByIds(pageIds, this.crowi);
- // set meta data
- result.meta = searchResult.meta;
- // set search result page data
- const pages: (IPageWithSearchMeta | null)[] = searchResult.data.map(
- (data) => {
- const pageData = findPageResult.pages.find((pageData) => {
- return pageData.id === data._id;
- });
- if (pageData == null) {
- return null;
- }
- // add tags and seenUserCount to pageData
- pageData._doc.tags = data._source.tag_names;
- pageData._doc.seenUserCount =
- (pageData.seenUsers && pageData.seenUsers.length) || 0;
- // serialize lastUpdateUser
- if (
- pageData.lastUpdateUser != null &&
- pageData.lastUpdateUser instanceof User
- ) {
- pageData.lastUpdateUser = serializeUserSecurely(
- pageData.lastUpdateUser,
- );
- }
- // increment elasticSearchResult
- let elasticSearchResult:
- | { snippet: string | null; highlightedPath: string | null }
- | undefined;
- const highlightData = data._highlight;
- if (highlightData != null) {
- const snippet = this.canShowSnippet(pageData, user, userGroups)
- ? // eslint-disable-next-line max-len
- highlightData.body ||
- highlightData['body.en'] ||
- highlightData['body.ja'] ||
- highlightData.comments ||
- highlightData['comments.en'] ||
- highlightData['comments.ja']
- : null;
- const pathMatch =
- highlightData['path.en'] || highlightData['path.ja'];
- elasticSearchResult = {
- snippet:
- snippet != null && typeof snippet[0] === 'string'
- ? filterXss.process(snippet)
- : null,
- highlightedPath:
- pathMatch != null && typeof pathMatch[0] === 'string'
- ? filterXss.process(pathMatch)
- : null,
- };
- }
- // serialize creator
- if (pageData.creator != null && pageData.creator instanceof User) {
- pageData.creator = serializeUserSecurely(pageData.creator);
- }
- // generate pageMeta data
- const pageMeta = {
- bookmarkCount: data._source.bookmark_count || 0,
- elasticSearchResult,
- };
- return { data: pageData, meta: pageMeta };
- },
- );
- result.data = pages.filter(nonNullable);
- return result;
- }
- canShowSnippet(pageData, user, userGroups): boolean {
- const Page = mongoose.model('Page') as unknown as PageModel;
- const testGrant = pageData.grant;
- const testGrantedUser = pageData.grantedUsers?.[0];
- const testGrantedGroups = pageData.grantedGroups;
- if (testGrant === Page.GRANT_RESTRICTED) {
- return false;
- }
- if (testGrant === Page.GRANT_OWNER) {
- if (user == null) return false;
- return user._id.toString() === testGrantedUser.toString();
- }
- if (testGrant === Page.GRANT_USER_GROUP) {
- if (userGroups == null) return false;
- return hasIntersection(
- userGroups.map((id) => id.toString()),
- testGrantedGroups,
- );
- }
- return true;
- }
- }
- export default SearchService;
|