search.ts 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. import type { IPageHasId } from '@growi/core';
  2. import { serializeUserSecurely } from '@growi/core/dist/models/serializers';
  3. import mongoose from 'mongoose';
  4. import { FilterXSS } from 'xss';
  5. import { CommentEvent, commentEvent } from '~/features/comment/server';
  6. import {
  7. isIncludeAiMenthion,
  8. removeAiMenthion,
  9. } from '~/features/search/utils/ai';
  10. import { SearchDelegatorName } from '~/interfaces/named-query';
  11. import type {
  12. IFormattedSearchResult,
  13. IPageWithSearchMeta,
  14. ISearchResult,
  15. } from '~/interfaces/search';
  16. import loggerFactory from '~/utils/logger';
  17. import type Crowi from '../crowi';
  18. import type { ObjectIdLike } from '../interfaces/mongoose-utils';
  19. import type {
  20. ParsedQuery,
  21. QueryTerms,
  22. SearchableData,
  23. SearchDelegator,
  24. SearchQueryParser,
  25. SearchResolver,
  26. } from '../interfaces/search';
  27. import NamedQuery from '../models/named-query';
  28. import type { PageModel } from '../models/page';
  29. import { SearchError } from '../models/vo/search-error';
  30. import { hasIntersection } from '../util/compare-objectId';
  31. import { configManager } from './config-manager';
  32. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  33. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  34. // eslint-disable-next-line @typescript-eslint/no-unused-vars
  35. const logger = loggerFactory('growi:service:search');
  36. const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
  37. // options for filtering xss
  38. // Do not change the property key name to 'whitelist" because it depends on the 'xss' library
  39. const filterXssOptions = {
  40. whiteList: {
  41. em: ['class'],
  42. },
  43. };
  44. const filterXss = new FilterXSS(filterXssOptions);
  45. const normalizeQueryString = (_queryString: string): string => {
  46. let queryString = _queryString.trim();
  47. queryString = removeAiMenthion(queryString).replace(/\s+/g, ' ');
  48. return queryString;
  49. };
  50. const normalizeNQName = (nqName: string): string => {
  51. return nqName.trim();
  52. };
  53. const findPageListByIds = async (pageIds: ObjectIdLike[], crowi: any) => {
  54. const Page = crowi.model('Page') as unknown as PageModel;
  55. const User = crowi.model('User');
  56. const builder = new Page.PageQueryBuilder(
  57. Page.find({ _id: { $in: pageIds } }),
  58. false,
  59. );
  60. builder.addConditionToPagenate(undefined, undefined); // offset and limit are unnesessary
  61. builder.populateDataToList(User.USER_FIELDS_EXCEPT_CONFIDENTIAL); // populate lastUpdateUser
  62. builder.query = builder.query.populate({
  63. path: 'creator',
  64. select: User.USER_FIELDS_EXCEPT_CONFIDENTIAL,
  65. });
  66. const pages = await builder.query.clone().exec('find');
  67. const totalCount = await builder.query.exec('count');
  68. return {
  69. pages,
  70. totalCount,
  71. };
  72. };
  73. class SearchService implements SearchQueryParser, SearchResolver {
  74. crowi: Crowi;
  75. isErrorOccuredOnHealthcheck: boolean | null;
  76. isErrorOccuredOnSearching: boolean | null;
  77. fullTextSearchDelegator: any & ElasticsearchDelegator;
  78. nqDelegators: { [key in SearchDelegatorName]: SearchDelegator };
  79. constructor(crowi: Crowi) {
  80. this.crowi = crowi;
  81. this.isErrorOccuredOnHealthcheck = null;
  82. this.isErrorOccuredOnSearching = null;
  83. try {
  84. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  85. this.nqDelegators = this.generateNQDelegators(
  86. this.fullTextSearchDelegator,
  87. );
  88. logger.info('Succeeded to initialize search delegators');
  89. } catch (err) {
  90. logger.error(err);
  91. }
  92. if (this.isConfigured) {
  93. this.fullTextSearchDelegator.init();
  94. this.registerUpdateEvent();
  95. }
  96. }
  97. get isConfigured() {
  98. return this.fullTextSearchDelegator != null;
  99. }
  100. get isReachable() {
  101. return (
  102. this.isConfigured &&
  103. !this.isErrorOccuredOnHealthcheck &&
  104. !this.isErrorOccuredOnSearching
  105. );
  106. }
  107. get isElasticsearchEnabled() {
  108. const uri = configManager.getConfig('app:elasticsearchUri');
  109. return uri != null && uri.length > 0;
  110. }
  111. generateFullTextSearchDelegator() {
  112. logger.info('Initializing search delegator');
  113. if (this.isElasticsearchEnabled) {
  114. logger.info('Elasticsearch is enabled');
  115. return new ElasticsearchDelegator(this.crowi.socketIoService);
  116. }
  117. logger.info(
  118. 'No elasticsearch URI is specified so that full text search is disabled.',
  119. );
  120. }
  121. generateNQDelegators(defaultDelegator: ElasticsearchDelegator): {
  122. [key in SearchDelegatorName]: SearchDelegator;
  123. } {
  124. return {
  125. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  126. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]:
  127. new PrivateLegacyPagesDelegator() as unknown as SearchDelegator,
  128. };
  129. }
  130. registerUpdateEvent() {
  131. const pageEvent = this.crowi.event('page');
  132. pageEvent.on(
  133. 'create',
  134. this.fullTextSearchDelegator.syncPageUpdated.bind(
  135. this.fullTextSearchDelegator,
  136. ),
  137. );
  138. pageEvent.on(
  139. 'update',
  140. this.fullTextSearchDelegator.syncPageUpdated.bind(
  141. this.fullTextSearchDelegator,
  142. ),
  143. );
  144. pageEvent.on('delete', (targetPage, deletedPage, user) => {
  145. this.fullTextSearchDelegator.syncPageDeleted.bind(
  146. this.fullTextSearchDelegator,
  147. )(targetPage, user);
  148. this.fullTextSearchDelegator.syncPageUpdated.bind(
  149. this.fullTextSearchDelegator,
  150. )(deletedPage, user);
  151. });
  152. pageEvent.on('revert', (targetPage, revertedPage, user) => {
  153. this.fullTextSearchDelegator.syncPageDeleted.bind(
  154. this.fullTextSearchDelegator,
  155. )(targetPage, user);
  156. this.fullTextSearchDelegator.syncPageUpdated.bind(
  157. this.fullTextSearchDelegator,
  158. )(revertedPage, user);
  159. });
  160. pageEvent.on(
  161. 'deleteCompletely',
  162. this.fullTextSearchDelegator.syncPageDeleted.bind(
  163. this.fullTextSearchDelegator,
  164. ),
  165. );
  166. pageEvent.on(
  167. 'syncDescendantsDelete',
  168. this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(
  169. this.fullTextSearchDelegator,
  170. ),
  171. );
  172. pageEvent.on(
  173. 'updateMany',
  174. this.fullTextSearchDelegator.syncPagesUpdated.bind(
  175. this.fullTextSearchDelegator,
  176. ),
  177. );
  178. pageEvent.on(
  179. 'syncDescendantsUpdate',
  180. this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(
  181. this.fullTextSearchDelegator,
  182. ),
  183. );
  184. pageEvent.on(
  185. 'addSeenUsers',
  186. this.fullTextSearchDelegator.syncPageUpdated.bind(
  187. this.fullTextSearchDelegator,
  188. ),
  189. );
  190. pageEvent.on('rename', () => {
  191. this.fullTextSearchDelegator.syncPageDeleted.bind(
  192. this.fullTextSearchDelegator,
  193. );
  194. this.fullTextSearchDelegator.syncPageUpdated.bind(
  195. this.fullTextSearchDelegator,
  196. );
  197. });
  198. const bookmarkEvent = this.crowi.event('bookmark');
  199. bookmarkEvent.on(
  200. 'create',
  201. this.fullTextSearchDelegator.syncBookmarkChanged.bind(
  202. this.fullTextSearchDelegator,
  203. ),
  204. );
  205. bookmarkEvent.on(
  206. 'delete',
  207. this.fullTextSearchDelegator.syncBookmarkChanged.bind(
  208. this.fullTextSearchDelegator,
  209. ),
  210. );
  211. const tagEvent = this.crowi.event('tag');
  212. tagEvent.on(
  213. 'update',
  214. this.fullTextSearchDelegator.syncTagChanged.bind(
  215. this.fullTextSearchDelegator,
  216. ),
  217. );
  218. commentEvent.on(
  219. CommentEvent.CREATE,
  220. this.fullTextSearchDelegator.syncCommentChanged.bind(
  221. this.fullTextSearchDelegator,
  222. ),
  223. );
  224. commentEvent.on(
  225. CommentEvent.UPDATE,
  226. this.fullTextSearchDelegator.syncCommentChanged.bind(
  227. this.fullTextSearchDelegator,
  228. ),
  229. );
  230. commentEvent.on(
  231. CommentEvent.DELETE,
  232. this.fullTextSearchDelegator.syncCommentChanged.bind(
  233. this.fullTextSearchDelegator,
  234. ),
  235. );
  236. }
  237. resetErrorStatus() {
  238. this.isErrorOccuredOnHealthcheck = false;
  239. this.isErrorOccuredOnSearching = false;
  240. }
  241. async reconnectClient() {
  242. logger.info('Try to reconnect...');
  243. this.fullTextSearchDelegator.initClient();
  244. try {
  245. await this.getInfoForHealth();
  246. logger.info('Reconnecting succeeded.');
  247. this.resetErrorStatus();
  248. } catch (err) {
  249. throw err;
  250. }
  251. }
  252. async getInfo() {
  253. try {
  254. return await this.fullTextSearchDelegator.getInfo();
  255. } catch (err) {
  256. logger.error(err);
  257. throw err;
  258. }
  259. }
  260. async getInfoForHealth() {
  261. try {
  262. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  263. this.isErrorOccuredOnHealthcheck = false;
  264. return result;
  265. } catch (err) {
  266. logger.error(err);
  267. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  268. this.isErrorOccuredOnHealthcheck = true;
  269. throw err;
  270. }
  271. }
  272. async getInfoForAdmin() {
  273. return this.fullTextSearchDelegator.getInfoForAdmin();
  274. }
  275. async normalizeIndices() {
  276. return this.fullTextSearchDelegator.normalizeIndices();
  277. }
  278. async rebuildIndex() {
  279. return this.fullTextSearchDelegator.rebuildIndex();
  280. }
  281. async parseSearchQuery(
  282. queryString: string,
  283. nqName: string | null,
  284. ): Promise<ParsedQuery> {
  285. // eslint-disable-next-line no-param-reassign
  286. queryString = normalizeQueryString(queryString);
  287. const terms = this.parseQueryString(queryString);
  288. if (nqName == null) {
  289. return { queryString, terms };
  290. }
  291. const nq = await NamedQuery.findOne({ name: normalizeNQName(nqName) });
  292. // will delegate to full-text search
  293. if (nq == null) {
  294. logger.debug(
  295. `Delegated to full-text search since a named query document did not found. (nqName="${nqName}")`,
  296. );
  297. return { queryString, terms };
  298. }
  299. const { aliasOf, delegatorName } = nq;
  300. let parsedQuery: ParsedQuery;
  301. if (aliasOf != null) {
  302. parsedQuery = {
  303. queryString: normalizeQueryString(aliasOf),
  304. terms: this.parseQueryString(aliasOf),
  305. };
  306. } else {
  307. parsedQuery = { queryString, terms, delegatorName };
  308. }
  309. return parsedQuery;
  310. }
  311. async resolve(
  312. parsedQuery: ParsedQuery,
  313. ): Promise<[SearchDelegator, SearchableData]> {
  314. const {
  315. queryString,
  316. terms,
  317. delegatorName = SearchDelegatorName.DEFAULT,
  318. } = parsedQuery;
  319. const nqDeledator = this.nqDelegators[delegatorName];
  320. const data = {
  321. queryString,
  322. terms,
  323. };
  324. return [nqDeledator, data];
  325. }
  326. /**
  327. * Throws SearchError if data is corrupted.
  328. * @param {SearchableData} data
  329. * @param {SearchDelegator} delegator
  330. * @throws {SearchError} SearchError
  331. */
  332. private validateSearchableData(
  333. delegator: SearchDelegator,
  334. data: SearchableData,
  335. ): void {
  336. const { terms } = data;
  337. if (delegator.isTermsNormalized(terms)) {
  338. return;
  339. }
  340. const unavailableTermsKeys = delegator.validateTerms(terms);
  341. throw new SearchError(
  342. 'The query string includes unavailable terms.',
  343. unavailableTermsKeys,
  344. );
  345. }
  346. async searchKeyword(
  347. keyword: string,
  348. nqName: string | null,
  349. user,
  350. userGroups,
  351. searchOpts,
  352. ): Promise<[ISearchResult<unknown>, string | null]> {
  353. let parsedQuery: ParsedQuery;
  354. // parse
  355. try {
  356. parsedQuery = await this.parseSearchQuery(keyword, nqName);
  357. } catch (err) {
  358. logger.error('Error occurred while parseSearchQuery', err);
  359. throw err;
  360. }
  361. if (isIncludeAiMenthion(keyword)) {
  362. searchOpts.vector = true;
  363. }
  364. let delegator: SearchDelegator;
  365. let data: SearchableData;
  366. // resolve
  367. try {
  368. [delegator, data] = await this.resolve(parsedQuery);
  369. } catch (err) {
  370. logger.error('Error occurred while resolving search delegator', err);
  371. throw err;
  372. }
  373. // throws
  374. this.validateSearchableData(delegator, data);
  375. return [
  376. await delegator.search(data, user, userGroups, searchOpts),
  377. delegator.name ?? null,
  378. ];
  379. }
  380. parseQueryString(queryString: string): QueryTerms {
  381. // terms
  382. const matchWords: string[] = [];
  383. const notMatchWords: string[] = [];
  384. const phraseWords: string[] = [];
  385. const notPhraseWords: string[] = [];
  386. const prefixPaths: string[] = [];
  387. const notPrefixPaths: string[] = [];
  388. const tags: string[] = [];
  389. const notTags: string[] = [];
  390. // First: Parse phrase keywords
  391. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  392. const phrases = queryString.match(phraseRegExp);
  393. if (phrases !== null) {
  394. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  395. phrases.forEach((phrase) => {
  396. phrase.trim();
  397. if (phrase.match(/^-/)) {
  398. notPhraseWords.push(phrase.replace(/^-/, ''));
  399. } else {
  400. phraseWords.push(phrase);
  401. }
  402. });
  403. }
  404. // Second: Parse other keywords (include minus keywords)
  405. queryString.split(' ').forEach((word) => {
  406. if (word === '') {
  407. return;
  408. }
  409. // https://regex101.com/r/pN9XfK/1
  410. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  411. // https://regex101.com/r/3qw9FQ/1
  412. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  413. if (matchNegative != null) {
  414. if (matchNegative[1] === 'prefix:') {
  415. notPrefixPaths.push(matchNegative[2]);
  416. } else if (matchNegative[1] === 'tag:') {
  417. notTags.push(matchNegative[2]);
  418. } else {
  419. notMatchWords.push(matchNegative[2]);
  420. }
  421. } else if (matchPositive != null) {
  422. if (matchPositive[1] === 'prefix:') {
  423. prefixPaths.push(matchPositive[2]);
  424. } else if (matchPositive[1] === 'tag:') {
  425. tags.push(matchPositive[2]);
  426. } else {
  427. matchWords.push(matchPositive[2]);
  428. }
  429. }
  430. });
  431. const terms = {
  432. match: matchWords,
  433. not_match: notMatchWords,
  434. phrase: phraseWords,
  435. not_phrase: notPhraseWords,
  436. prefix: prefixPaths,
  437. not_prefix: notPrefixPaths,
  438. tag: tags,
  439. not_tag: notTags,
  440. };
  441. return terms;
  442. }
  443. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  444. // So far, it determines by delegatorName passed by searchService.searchKeyword
  445. checkIsFormattable(
  446. searchResult,
  447. delegatorName: SearchDelegatorName,
  448. ): boolean {
  449. return delegatorName === SearchDelegatorName.DEFAULT;
  450. }
  451. /**
  452. * formatting result
  453. */
  454. async formatSearchResult(
  455. searchResult: ISearchResult<any>,
  456. delegatorName: SearchDelegatorName,
  457. user,
  458. userGroups,
  459. ): Promise<IFormattedSearchResult> {
  460. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  461. const data: IPageWithSearchMeta[] = searchResult.data.map((page) => {
  462. return {
  463. data: page as IPageHasId,
  464. };
  465. });
  466. return {
  467. data,
  468. meta: searchResult.meta,
  469. };
  470. }
  471. /*
  472. * Format ElasticSearch result
  473. */
  474. const User = this.crowi.model('User');
  475. const result = {} as IFormattedSearchResult;
  476. // get page data
  477. const pageIds: string[] = searchResult.data.map((page) => {
  478. return page._id;
  479. });
  480. const findPageResult = await findPageListByIds(pageIds, this.crowi);
  481. // set meta data
  482. result.meta = searchResult.meta;
  483. // set search result page data
  484. const pages: (IPageWithSearchMeta | null)[] = searchResult.data.map(
  485. (data) => {
  486. const pageData = findPageResult.pages.find((pageData) => {
  487. return pageData.id === data._id;
  488. });
  489. if (pageData == null) {
  490. return null;
  491. }
  492. // add tags and seenUserCount to pageData
  493. pageData._doc.tags = data._source.tag_names;
  494. pageData._doc.seenUserCount =
  495. (pageData.seenUsers && pageData.seenUsers.length) || 0;
  496. // serialize lastUpdateUser
  497. if (
  498. pageData.lastUpdateUser != null &&
  499. pageData.lastUpdateUser instanceof User
  500. ) {
  501. pageData.lastUpdateUser = serializeUserSecurely(
  502. pageData.lastUpdateUser,
  503. );
  504. }
  505. // increment elasticSearchResult
  506. let elasticSearchResult:
  507. | { snippet: string | null; highlightedPath: string | null }
  508. | undefined;
  509. const highlightData = data._highlight;
  510. if (highlightData != null) {
  511. const snippet = this.canShowSnippet(pageData, user, userGroups)
  512. ? // eslint-disable-next-line max-len
  513. highlightData.body ||
  514. highlightData['body.en'] ||
  515. highlightData['body.ja'] ||
  516. highlightData.comments ||
  517. highlightData['comments.en'] ||
  518. highlightData['comments.ja']
  519. : null;
  520. const pathMatch =
  521. highlightData['path.en'] || highlightData['path.ja'];
  522. elasticSearchResult = {
  523. snippet:
  524. snippet != null && typeof snippet[0] === 'string'
  525. ? filterXss.process(snippet)
  526. : null,
  527. highlightedPath:
  528. pathMatch != null && typeof pathMatch[0] === 'string'
  529. ? filterXss.process(pathMatch)
  530. : null,
  531. };
  532. }
  533. // serialize creator
  534. if (pageData.creator != null && pageData.creator instanceof User) {
  535. pageData.creator = serializeUserSecurely(pageData.creator);
  536. }
  537. // generate pageMeta data
  538. const pageMeta = {
  539. bookmarkCount: data._source.bookmark_count || 0,
  540. elasticSearchResult,
  541. };
  542. return { data: pageData, meta: pageMeta };
  543. },
  544. );
  545. result.data = pages.filter(nonNullable);
  546. return result;
  547. }
  548. canShowSnippet(pageData, user, userGroups): boolean {
  549. const Page = mongoose.model('Page') as unknown as PageModel;
  550. const testGrant = pageData.grant;
  551. const testGrantedUser = pageData.grantedUsers?.[0];
  552. const testGrantedGroups = pageData.grantedGroups;
  553. if (testGrant === Page.GRANT_RESTRICTED) {
  554. return false;
  555. }
  556. if (testGrant === Page.GRANT_OWNER) {
  557. if (user == null) return false;
  558. return user._id.toString() === testGrantedUser.toString();
  559. }
  560. if (testGrant === Page.GRANT_USER_GROUP) {
  561. if (userGroups == null) return false;
  562. return hasIntersection(
  563. userGroups.map((id) => id.toString()),
  564. testGrantedGroups,
  565. );
  566. }
  567. return true;
  568. }
  569. }
  570. export default SearchService;