| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430 |
- import xss from 'xss';
- import { SearchDelegatorName } from '~/interfaces/named-query';
- import { IFormattedSearchResult } from '~/interfaces/search';
- import loggerFactory from '~/utils/logger';
- import NamedQuery from '../models/named-query';
- import {
- SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, Result, MetaData, SearchableData, QueryTerms,
- } from '../interfaces/search';
- import ElasticsearchDelegator from './search-delegator/elasticsearch';
- import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
- import { PageModel } from '../models/page';
- import { serializeUserSecurely } from '../models/serializers/user-serializer';
- // eslint-disable-next-line no-unused-vars
- const logger = loggerFactory('growi:service:search');
- // options for filtering xss
- const filterXssOptions = {
- whiteList: {
- em: ['class'],
- },
- };
- const filterXss = new xss.FilterXSS(filterXssOptions);
- const normalizeQueryString = (_queryString: string): string => {
- let queryString = _queryString.trim();
- queryString = queryString.replace(/\s+/g, ' ');
- return queryString;
- };
- class SearchService implements SearchQueryParser, SearchResolver {
- crowi!: any
- configManager!: any
- isErrorOccuredOnHealthcheck: boolean | null
- isErrorOccuredOnSearching: boolean | null
- fullTextSearchDelegator: any & SearchDelegator
- nqDelegators: {[key in SearchDelegatorName]: SearchDelegator}
- constructor(crowi) {
- this.crowi = crowi;
- this.configManager = crowi.configManager;
- this.isErrorOccuredOnHealthcheck = null;
- this.isErrorOccuredOnSearching = null;
- try {
- this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
- this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
- logger.info('Succeeded to initialize search delegators');
- }
- catch (err) {
- logger.error(err);
- }
- if (this.isConfigured) {
- this.fullTextSearchDelegator.init();
- this.registerUpdateEvent();
- }
- }
- get isConfigured() {
- return this.fullTextSearchDelegator != null;
- }
- get isReachable() {
- return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
- }
- get isElasticsearchEnabled() {
- const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
- return uri != null && uri.length > 0;
- }
- generateFullTextSearchDelegator() {
- logger.info('Initializing search delegator');
- if (this.isElasticsearchEnabled) {
- logger.info('Elasticsearch is enabled');
- return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
- }
- logger.info('No elasticsearch URI is specified so that full text search is disabled.');
- }
- generateNQDelegators(defaultDelegator: SearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
- return {
- [SearchDelegatorName.DEFAULT]: defaultDelegator,
- [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator(),
- };
- }
- registerUpdateEvent() {
- const pageEvent = this.crowi.event('page');
- pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
- pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
- pageEvent.on('delete', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
- pageEvent.on('revert', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
- pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
- pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
- pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
- pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
- pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
- pageEvent.on('rename', () => {
- this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
- this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
- });
- const bookmarkEvent = this.crowi.event('bookmark');
- bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
- bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
- const tagEvent = this.crowi.event('tag');
- tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
- const commentEvent = this.crowi.event('comment');
- commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
- commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
- commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
- }
- resetErrorStatus() {
- this.isErrorOccuredOnHealthcheck = false;
- this.isErrorOccuredOnSearching = false;
- }
- async reconnectClient() {
- logger.info('Try to reconnect...');
- this.fullTextSearchDelegator.initClient();
- try {
- await this.getInfoForHealth();
- logger.info('Reconnecting succeeded.');
- this.resetErrorStatus();
- }
- catch (err) {
- throw err;
- }
- }
- async getInfo() {
- try {
- return await this.fullTextSearchDelegator.getInfo();
- }
- catch (err) {
- logger.error(err);
- throw err;
- }
- }
- async getInfoForHealth() {
- try {
- const result = await this.fullTextSearchDelegator.getInfoForHealth();
- this.isErrorOccuredOnHealthcheck = false;
- return result;
- }
- catch (err) {
- logger.error(err);
- // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
- this.isErrorOccuredOnHealthcheck = true;
- throw err;
- }
- }
- async getInfoForAdmin() {
- return this.fullTextSearchDelegator.getInfoForAdmin();
- }
- async normalizeIndices() {
- return this.fullTextSearchDelegator.normalizeIndices();
- }
- async rebuildIndex() {
- return this.fullTextSearchDelegator.rebuildIndex();
- }
- async parseSearchQuery(_queryString: string): Promise<ParsedQuery> {
- const regexp = new RegExp(/^\[nq:.+\]$/g); // https://regex101.com/r/FzDUvT/1
- const replaceRegexp = new RegExp(/\[nq:|\]/g);
- const queryString = normalizeQueryString(_queryString);
- // when Normal Query
- if (!regexp.test(queryString)) {
- return { queryString, terms: this.parseQueryString(queryString) };
- }
- // when Named Query
- const name = queryString.replace(replaceRegexp, '');
- const nq = await NamedQuery.findOne({ name });
- // will delegate to full-text search
- if (nq == null) {
- return { queryString, terms: this.parseQueryString(queryString) };
- }
- const { aliasOf, delegatorName } = nq;
- let parsedQuery;
- if (aliasOf != null) {
- parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
- }
- if (delegatorName != null) {
- parsedQuery = { queryString, delegatorName };
- }
- return parsedQuery;
- }
- async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData | null]> {
- const { queryString, terms, delegatorName } = parsedQuery;
- if (delegatorName != null) {
- const nqDelegator = this.nqDelegators[delegatorName];
- if (nqDelegator != null) {
- return [nqDelegator, null];
- }
- }
- const data = {
- queryString,
- terms: terms as QueryTerms,
- };
- return [this.nqDelegators[SearchDelegatorName.DEFAULT], data];
- }
- async searchKeyword(keyword: string, user, userGroups, searchOpts): Promise<[Result<any> & MetaData, string]> {
- let parsedQuery;
- // parse
- try {
- parsedQuery = await this.parseSearchQuery(keyword);
- }
- catch (err) {
- logger.error('Error occurred while parseSearchQuery', err);
- throw err;
- }
- let delegator;
- let data;
- // resolve
- try {
- [delegator, data] = await this.resolve(parsedQuery);
- }
- catch (err) {
- logger.error('Error occurred while resolving search delegator', err);
- throw err;
- }
- return [await delegator.search(data, user, userGroups, searchOpts), delegator.name];
- }
- parseQueryString(queryString: string): QueryTerms {
- // terms
- const matchWords: string[] = [];
- const notMatchWords: string[] = [];
- const phraseWords: string[] = [];
- const notPhraseWords: string[] = [];
- const prefixPaths: string[] = [];
- const notPrefixPaths: string[] = [];
- const tags: string[] = [];
- const notTags: string[] = [];
- // First: Parse phrase keywords
- const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
- const phrases = queryString.match(phraseRegExp);
- if (phrases !== null) {
- queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
- phrases.forEach((phrase) => {
- phrase.trim();
- if (phrase.match(/^-/)) {
- notPhraseWords.push(phrase.replace(/^-/, ''));
- }
- else {
- phraseWords.push(phrase);
- }
- });
- }
- // Second: Parse other keywords (include minus keywords)
- queryString.split(' ').forEach((word) => {
- if (word === '') {
- return;
- }
- // https://regex101.com/r/pN9XfK/1
- const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
- // https://regex101.com/r/3qw9FQ/1
- const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
- if (matchNegative != null) {
- if (matchNegative[1] === 'prefix:') {
- notPrefixPaths.push(matchNegative[2]);
- }
- else if (matchNegative[1] === 'tag:') {
- notTags.push(matchNegative[2]);
- }
- else {
- notMatchWords.push(matchNegative[2]);
- }
- }
- else if (matchPositive != null) {
- if (matchPositive[1] === 'prefix:') {
- prefixPaths.push(matchPositive[2]);
- }
- else if (matchPositive[1] === 'tag:') {
- tags.push(matchPositive[2]);
- }
- else {
- matchWords.push(matchPositive[2]);
- }
- }
- });
- const terms = {
- match: matchWords,
- not_match: notMatchWords,
- phrase: phraseWords,
- not_phrase: notPhraseWords,
- prefix: prefixPaths,
- not_prefix: notPrefixPaths,
- tag: tags,
- not_tag: notTags,
- };
- return terms;
- }
- // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
- // So far, it determines by delegatorName passed by searchService.searchKeyword
- checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
- return delegatorName === SearchDelegatorName.DEFAULT;
- }
- /**
- * formatting result
- */
- async formatSearchResult(searchResult: Result<any> & MetaData, delegatorName): Promise<IFormattedSearchResult> {
- if (!this.checkIsFormattable(searchResult, delegatorName)) {
- const data = searchResult.data.map((page) => {
- return {
- pageData: page,
- pageMeta: {},
- };
- });
- return {
- data,
- totalCount: data.length,
- meta: searchResult.meta,
- };
- }
- /*
- * Format ElasticSearch result
- */
- const Page = this.crowi.model('Page') as PageModel;
- const User = this.crowi.model('User');
- const result = {} as IFormattedSearchResult;
- // get page data
- const pageIds = searchResult.data.map((page) => { return page._id });
- const findPageResult = await Page.findListByPageIds(pageIds);
- // set meta data
- result.meta = searchResult.meta;
- result.totalCount = findPageResult.totalCount;
- // set search result page data
- result.data = searchResult.data.map((data) => {
- const pageData = findPageResult.pages.find((pageData) => {
- return pageData.id === data._id;
- });
- // add tags and seenUserCount to pageData
- pageData._doc.tags = data._source.tag_names;
- pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
- // serialize lastUpdateUser
- if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
- pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
- }
- // const data = searchResult.data.find((data) => {
- // return pageData.id === data._id;
- // });
- // increment elasticSearchResult
- let elasticSearchResult;
- const highlightData = data._highlight;
- if (highlightData != null) {
- const snippet = highlightData['body.en'] || highlightData['body.ja'] || '';
- const pathMatch = highlightData['path.en'] || highlightData['path.ja'] || '';
- const isHtmlInPath = highlightData['path.en'] != null || highlightData['path.ja'] != null;
- elasticSearchResult = {
- snippet: filterXss.process(snippet),
- highlightedPath: filterXss.process(pathMatch),
- isHtmlInPath,
- };
- }
- // generate pageMeta data
- const pageMeta = {
- bookmarkCount: data._source.bookmark_count || 0,
- elasticSearchResult,
- };
- return { pageData, pageMeta };
- });
- return result;
- }
- }
- export default SearchService;
|