search.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. import type { IPageHasId } from '@growi/core';
  2. import mongoose from 'mongoose';
  3. import { FilterXSS } from 'xss';
  4. import { CommentEvent, commentEvent } from '~/features/comment/server';
  5. import { SearchDelegatorName } from '~/interfaces/named-query';
  6. import { IFormattedSearchResult, IPageWithSearchMeta, ISearchResult } from '~/interfaces/search';
  7. import loggerFactory from '~/utils/logger';
  8. import { ObjectIdLike } from '../interfaces/mongoose-utils';
  9. import {
  10. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, SearchableData, QueryTerms,
  11. } from '../interfaces/search';
  12. import NamedQuery from '../models/named-query';
  13. import { PageModel } from '../models/page';
  14. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  15. import { SearchError } from '../models/vo/search-error';
  16. import { hasIntersection } from '../util/compare-objectId';
  17. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  18. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  19. // eslint-disable-next-line @typescript-eslint/no-unused-vars
  20. const logger = loggerFactory('growi:service:search');
  21. const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
  22. // options for filtering xss
  23. // Do not change the property key name to 'whitelist" because it depends on the 'xss' library
  24. const filterXssOptions = {
  25. whiteList: {
  26. em: ['class'],
  27. },
  28. };
  29. const filterXss = new FilterXSS(filterXssOptions);
  30. const normalizeQueryString = (_queryString: string): string => {
  31. let queryString = _queryString.trim();
  32. queryString = queryString.replace(/\s+/g, ' ');
  33. return queryString;
  34. };
  35. const normalizeNQName = (nqName: string): string => {
  36. return nqName.trim();
  37. };
  38. const findPageListByIds = async(pageIds: ObjectIdLike[], crowi: any) => {
  39. const Page = crowi.model('Page') as unknown as PageModel;
  40. const User = crowi.model('User');
  41. const builder = new Page.PageQueryBuilder(Page.find(({ _id: { $in: pageIds } })), false);
  42. builder.addConditionToPagenate(undefined, undefined); // offset and limit are unnesessary
  43. builder.populateDataToList(User.USER_FIELDS_EXCEPT_CONFIDENTIAL); // populate lastUpdateUser
  44. builder.query = builder.query.populate({
  45. path: 'creator',
  46. select: User.USER_FIELDS_EXCEPT_CONFIDENTIAL,
  47. });
  48. const pages = await builder.query.clone().exec('find');
  49. const totalCount = await builder.query.exec('count');
  50. return {
  51. pages,
  52. totalCount,
  53. };
  54. };
  55. class SearchService implements SearchQueryParser, SearchResolver {
  56. crowi!: any;
  57. configManager!: any;
  58. isErrorOccuredOnHealthcheck: boolean | null;
  59. isErrorOccuredOnSearching: boolean | null;
  60. fullTextSearchDelegator: any & ElasticsearchDelegator;
  61. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator};
  62. constructor(crowi) {
  63. this.crowi = crowi;
  64. this.configManager = crowi.configManager;
  65. this.isErrorOccuredOnHealthcheck = null;
  66. this.isErrorOccuredOnSearching = null;
  67. try {
  68. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  69. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  70. logger.info('Succeeded to initialize search delegators');
  71. }
  72. catch (err) {
  73. logger.error(err);
  74. }
  75. if (this.isConfigured) {
  76. this.fullTextSearchDelegator.init();
  77. this.registerUpdateEvent();
  78. }
  79. }
  80. get isConfigured() {
  81. return this.fullTextSearchDelegator != null;
  82. }
  83. get isReachable() {
  84. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  85. }
  86. get isElasticsearchEnabled() {
  87. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  88. return uri != null && uri.length > 0;
  89. }
  90. generateFullTextSearchDelegator() {
  91. logger.info('Initializing search delegator');
  92. if (this.isElasticsearchEnabled) {
  93. logger.info('Elasticsearch is enabled');
  94. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  95. }
  96. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  97. }
  98. generateNQDelegators(defaultDelegator: ElasticsearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  99. return {
  100. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  101. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator() as unknown as SearchDelegator,
  102. };
  103. }
  104. registerUpdateEvent() {
  105. const pageEvent = this.crowi.event('page');
  106. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  107. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  108. pageEvent.on('delete', (targetPage, deletedPage, user) => {
  109. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator)(targetPage, user);
  110. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator)(deletedPage, user);
  111. });
  112. pageEvent.on('revert', (targetPage, revertedPage, user) => {
  113. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator)(targetPage, user);
  114. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator)(revertedPage, user);
  115. });
  116. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  117. pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
  118. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  119. pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  120. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  121. pageEvent.on('rename', () => {
  122. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
  123. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
  124. });
  125. const bookmarkEvent = this.crowi.event('bookmark');
  126. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  127. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  128. const tagEvent = this.crowi.event('tag');
  129. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  130. commentEvent.on(CommentEvent.CREATE, this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  131. commentEvent.on(CommentEvent.UPDATE, this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  132. commentEvent.on(CommentEvent.DELETE, this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  133. }
  134. resetErrorStatus() {
  135. this.isErrorOccuredOnHealthcheck = false;
  136. this.isErrorOccuredOnSearching = false;
  137. }
  138. async reconnectClient() {
  139. logger.info('Try to reconnect...');
  140. this.fullTextSearchDelegator.initClient();
  141. try {
  142. await this.getInfoForHealth();
  143. logger.info('Reconnecting succeeded.');
  144. this.resetErrorStatus();
  145. }
  146. catch (err) {
  147. throw err;
  148. }
  149. }
  150. async getInfo() {
  151. try {
  152. return await this.fullTextSearchDelegator.getInfo();
  153. }
  154. catch (err) {
  155. logger.error(err);
  156. throw err;
  157. }
  158. }
  159. async getInfoForHealth() {
  160. try {
  161. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  162. this.isErrorOccuredOnHealthcheck = false;
  163. return result;
  164. }
  165. catch (err) {
  166. logger.error(err);
  167. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  168. this.isErrorOccuredOnHealthcheck = true;
  169. throw err;
  170. }
  171. }
  172. async getInfoForAdmin() {
  173. return this.fullTextSearchDelegator.getInfoForAdmin();
  174. }
  175. async normalizeIndices() {
  176. return this.fullTextSearchDelegator.normalizeIndices();
  177. }
  178. async rebuildIndex() {
  179. return this.fullTextSearchDelegator.rebuildIndex();
  180. }
  181. async parseSearchQuery(queryString: string, nqName: string | null): Promise<ParsedQuery> {
  182. // eslint-disable-next-line no-param-reassign
  183. queryString = normalizeQueryString(queryString);
  184. const terms = this.parseQueryString(queryString);
  185. if (nqName == null) {
  186. return { queryString, terms };
  187. }
  188. const nq = await NamedQuery.findOne({ name: normalizeNQName(nqName) });
  189. // will delegate to full-text search
  190. if (nq == null) {
  191. logger.debug(`Delegated to full-text search since a named query document did not found. (nqName="${nqName}")`);
  192. return { queryString, terms };
  193. }
  194. const { aliasOf, delegatorName } = nq;
  195. let parsedQuery: ParsedQuery;
  196. if (aliasOf != null) {
  197. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  198. }
  199. else {
  200. parsedQuery = { queryString, terms, delegatorName };
  201. }
  202. return parsedQuery;
  203. }
  204. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData]> {
  205. const { queryString, terms, delegatorName = SearchDelegatorName.DEFAULT } = parsedQuery;
  206. const nqDeledator = this.nqDelegators[delegatorName];
  207. const data = {
  208. queryString,
  209. terms,
  210. };
  211. return [nqDeledator, data];
  212. }
  213. /**
  214. * Throws SearchError if data is corrupted.
  215. * @param {SearchableData} data
  216. * @param {SearchDelegator} delegator
  217. * @throws {SearchError} SearchError
  218. */
  219. private validateSearchableData(delegator: SearchDelegator, data: SearchableData): void {
  220. const { terms } = data;
  221. if (delegator.isTermsNormalized(terms)) {
  222. return;
  223. }
  224. const unavailableTermsKeys = delegator.validateTerms(terms);
  225. throw new SearchError('The query string includes unavailable terms.', unavailableTermsKeys);
  226. }
  227. async searchKeyword(keyword: string, nqName: string | null, user, userGroups, searchOpts): Promise<[ISearchResult<unknown>, string | null]> {
  228. let parsedQuery: ParsedQuery;
  229. // parse
  230. try {
  231. parsedQuery = await this.parseSearchQuery(keyword, nqName);
  232. }
  233. catch (err) {
  234. logger.error('Error occurred while parseSearchQuery', err);
  235. throw err;
  236. }
  237. let delegator: SearchDelegator;
  238. let data: SearchableData;
  239. // resolve
  240. try {
  241. [delegator, data] = await this.resolve(parsedQuery);
  242. }
  243. catch (err) {
  244. logger.error('Error occurred while resolving search delegator', err);
  245. throw err;
  246. }
  247. // throws
  248. this.validateSearchableData(delegator, data);
  249. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name ?? null];
  250. }
  251. parseQueryString(queryString: string): QueryTerms {
  252. // terms
  253. const matchWords: string[] = [];
  254. const notMatchWords: string[] = [];
  255. const phraseWords: string[] = [];
  256. const notPhraseWords: string[] = [];
  257. const prefixPaths: string[] = [];
  258. const notPrefixPaths: string[] = [];
  259. const tags: string[] = [];
  260. const notTags: string[] = [];
  261. // First: Parse phrase keywords
  262. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  263. const phrases = queryString.match(phraseRegExp);
  264. if (phrases !== null) {
  265. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  266. phrases.forEach((phrase) => {
  267. phrase.trim();
  268. if (phrase.match(/^-/)) {
  269. notPhraseWords.push(phrase.replace(/^-/, ''));
  270. }
  271. else {
  272. phraseWords.push(phrase);
  273. }
  274. });
  275. }
  276. // Second: Parse other keywords (include minus keywords)
  277. queryString.split(' ').forEach((word) => {
  278. if (word === '') {
  279. return;
  280. }
  281. // https://regex101.com/r/pN9XfK/1
  282. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  283. // https://regex101.com/r/3qw9FQ/1
  284. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  285. if (matchNegative != null) {
  286. if (matchNegative[1] === 'prefix:') {
  287. notPrefixPaths.push(matchNegative[2]);
  288. }
  289. else if (matchNegative[1] === 'tag:') {
  290. notTags.push(matchNegative[2]);
  291. }
  292. else {
  293. notMatchWords.push(matchNegative[2]);
  294. }
  295. }
  296. else if (matchPositive != null) {
  297. if (matchPositive[1] === 'prefix:') {
  298. prefixPaths.push(matchPositive[2]);
  299. }
  300. else if (matchPositive[1] === 'tag:') {
  301. tags.push(matchPositive[2]);
  302. }
  303. else {
  304. matchWords.push(matchPositive[2]);
  305. }
  306. }
  307. });
  308. const terms = {
  309. match: matchWords,
  310. not_match: notMatchWords,
  311. phrase: phraseWords,
  312. not_phrase: notPhraseWords,
  313. prefix: prefixPaths,
  314. not_prefix: notPrefixPaths,
  315. tag: tags,
  316. not_tag: notTags,
  317. };
  318. return terms;
  319. }
  320. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  321. // So far, it determines by delegatorName passed by searchService.searchKeyword
  322. checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
  323. return delegatorName === SearchDelegatorName.DEFAULT;
  324. }
  325. /**
  326. * formatting result
  327. */
  328. async formatSearchResult(searchResult: ISearchResult<any>, delegatorName: SearchDelegatorName, user, userGroups): Promise<IFormattedSearchResult> {
  329. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  330. const data: IPageWithSearchMeta[] = searchResult.data.map((page) => {
  331. return {
  332. data: page as IPageHasId,
  333. };
  334. });
  335. return {
  336. data,
  337. meta: searchResult.meta,
  338. };
  339. }
  340. /*
  341. * Format ElasticSearch result
  342. */
  343. const User = this.crowi.model('User');
  344. const result = {} as IFormattedSearchResult;
  345. // get page data
  346. const pageIds: string[] = searchResult.data.map((page) => { return page._id });
  347. const findPageResult = await findPageListByIds(pageIds, this.crowi);
  348. // set meta data
  349. result.meta = searchResult.meta;
  350. // set search result page data
  351. const pages: (IPageWithSearchMeta | null)[] = searchResult.data.map((data) => {
  352. const pageData = findPageResult.pages.find((pageData) => {
  353. return pageData.id === data._id;
  354. });
  355. if (pageData == null) {
  356. return null;
  357. }
  358. // add tags and seenUserCount to pageData
  359. pageData._doc.tags = data._source.tag_names;
  360. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  361. // serialize lastUpdateUser
  362. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  363. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  364. }
  365. // increment elasticSearchResult
  366. let elasticSearchResult;
  367. const highlightData = data._highlight;
  368. if (highlightData != null) {
  369. const snippet = this.canShowSnippet(pageData, user, userGroups)
  370. // eslint-disable-next-line max-len
  371. ? highlightData.body || highlightData['body.en'] || highlightData['body.ja'] || highlightData.comments || highlightData['comments.en'] || highlightData['comments.ja']
  372. : null;
  373. const pathMatch = highlightData['path.en'] || highlightData['path.ja'];
  374. elasticSearchResult = {
  375. snippet: snippet != null && typeof snippet[0] === 'string' ? filterXss.process(snippet) : null,
  376. highlightedPath: pathMatch != null && typeof pathMatch[0] === 'string' ? filterXss.process(pathMatch) : null,
  377. };
  378. }
  379. // serialize creator
  380. if (pageData.creator != null && pageData.creator instanceof User) {
  381. pageData.creator = serializeUserSecurely(pageData.creator);
  382. }
  383. // generate pageMeta data
  384. const pageMeta = {
  385. bookmarkCount: data._source.bookmark_count || 0,
  386. elasticSearchResult,
  387. };
  388. return { data: pageData, meta: pageMeta };
  389. });
  390. result.data = pages.filter(nonNullable);
  391. return result;
  392. }
  393. canShowSnippet(pageData, user, userGroups): boolean {
  394. const Page = mongoose.model('Page') as unknown as PageModel;
  395. const testGrant = pageData.grant;
  396. const testGrantedUser = pageData.grantedUsers?.[0];
  397. const testGrantedGroups = pageData.grantedGroups;
  398. if (testGrant === Page.GRANT_RESTRICTED) {
  399. return false;
  400. }
  401. if (testGrant === Page.GRANT_OWNER) {
  402. if (user == null) return false;
  403. return user._id.toString() === testGrantedUser.toString();
  404. }
  405. if (testGrant === Page.GRANT_USER_GROUP) {
  406. if (userGroups == null) return false;
  407. return hasIntersection(userGroups.map(id => id.toString()), testGrantedGroups);
  408. }
  409. return true;
  410. }
  411. }
  412. export default SearchService;