search.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. import type { IPageHasId } from '@growi/core';
  2. import mongoose from 'mongoose';
  3. import { FilterXSS } from 'xss';
  4. import { SearchDelegatorName } from '~/interfaces/named-query';
  5. import { IFormattedSearchResult, IPageWithSearchMeta, ISearchResult } from '~/interfaces/search';
  6. import loggerFactory from '~/utils/logger';
  7. import { ObjectIdLike } from '../interfaces/mongoose-utils';
  8. import {
  9. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, SearchableData, QueryTerms,
  10. } from '../interfaces/search';
  11. import NamedQuery from '../models/named-query';
  12. import { PageModel } from '../models/page';
  13. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  14. import { SearchError } from '../models/vo/search-error';
  15. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  16. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  17. // eslint-disable-next-line @typescript-eslint/no-unused-vars
  18. const logger = loggerFactory('growi:service:search');
  19. const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
  20. // options for filtering xss
  21. // Do not change the property key name to 'whitelist" because it depends on the 'xss' library
  22. const filterXssOptions = {
  23. whiteList: {
  24. em: ['class'],
  25. },
  26. };
  27. const filterXss = new FilterXSS(filterXssOptions);
  28. const normalizeQueryString = (_queryString: string): string => {
  29. let queryString = _queryString.trim();
  30. queryString = queryString.replace(/\s+/g, ' ');
  31. return queryString;
  32. };
  33. const normalizeNQName = (nqName: string): string => {
  34. return nqName.trim();
  35. };
  36. const findPageListByIds = async(pageIds: ObjectIdLike[], crowi: any) => {
  37. const Page = crowi.model('Page') as unknown as PageModel;
  38. const User = crowi.model('User');
  39. const builder = new Page.PageQueryBuilder(Page.find(({ _id: { $in: pageIds } })), false);
  40. builder.addConditionToPagenate(undefined, undefined); // offset and limit are unnesessary
  41. builder.populateDataToList(User.USER_FIELDS_EXCEPT_CONFIDENTIAL); // populate lastUpdateUser
  42. builder.query = builder.query.populate({
  43. path: 'creator',
  44. select: User.USER_FIELDS_EXCEPT_CONFIDENTIAL,
  45. });
  46. const pages = await builder.query.clone().exec('find');
  47. const totalCount = await builder.query.exec('count');
  48. return {
  49. pages,
  50. totalCount,
  51. };
  52. };
  53. class SearchService implements SearchQueryParser, SearchResolver {
  54. crowi!: any;
  55. configManager!: any;
  56. isErrorOccuredOnHealthcheck: boolean | null;
  57. isErrorOccuredOnSearching: boolean | null;
  58. fullTextSearchDelegator: any & ElasticsearchDelegator;
  59. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator};
  60. constructor(crowi) {
  61. this.crowi = crowi;
  62. this.configManager = crowi.configManager;
  63. this.isErrorOccuredOnHealthcheck = null;
  64. this.isErrorOccuredOnSearching = null;
  65. try {
  66. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  67. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  68. logger.info('Succeeded to initialize search delegators');
  69. }
  70. catch (err) {
  71. logger.error(err);
  72. }
  73. if (this.isConfigured) {
  74. this.fullTextSearchDelegator.init();
  75. this.registerUpdateEvent();
  76. }
  77. }
  78. get isConfigured() {
  79. return this.fullTextSearchDelegator != null;
  80. }
  81. get isReachable() {
  82. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  83. }
  84. get isElasticsearchEnabled() {
  85. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  86. return uri != null && uri.length > 0;
  87. }
  88. generateFullTextSearchDelegator() {
  89. logger.info('Initializing search delegator');
  90. if (this.isElasticsearchEnabled) {
  91. logger.info('Elasticsearch is enabled');
  92. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  93. }
  94. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  95. }
  96. generateNQDelegators(defaultDelegator: ElasticsearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  97. return {
  98. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  99. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator() as unknown as SearchDelegator,
  100. };
  101. }
  102. registerUpdateEvent() {
  103. const pageEvent = this.crowi.event('page');
  104. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  105. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  106. pageEvent.on('delete', (targetPage, deletedPage, user) => {
  107. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator)(targetPage, user);
  108. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator)(deletedPage, user);
  109. });
  110. pageEvent.on('revert', (targetPage, revertedPage, user) => {
  111. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator)(targetPage, user);
  112. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator)(revertedPage, user);
  113. });
  114. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  115. pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
  116. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  117. pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  118. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  119. pageEvent.on('rename', () => {
  120. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
  121. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
  122. });
  123. const bookmarkEvent = this.crowi.event('bookmark');
  124. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  125. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  126. const tagEvent = this.crowi.event('tag');
  127. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  128. const commentEvent = this.crowi.event('comment');
  129. commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  130. commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  131. commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  132. }
  133. resetErrorStatus() {
  134. this.isErrorOccuredOnHealthcheck = false;
  135. this.isErrorOccuredOnSearching = false;
  136. }
  137. async reconnectClient() {
  138. logger.info('Try to reconnect...');
  139. this.fullTextSearchDelegator.initClient();
  140. try {
  141. await this.getInfoForHealth();
  142. logger.info('Reconnecting succeeded.');
  143. this.resetErrorStatus();
  144. }
  145. catch (err) {
  146. throw err;
  147. }
  148. }
  149. async getInfo() {
  150. try {
  151. return await this.fullTextSearchDelegator.getInfo();
  152. }
  153. catch (err) {
  154. logger.error(err);
  155. throw err;
  156. }
  157. }
  158. async getInfoForHealth() {
  159. try {
  160. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  161. this.isErrorOccuredOnHealthcheck = false;
  162. return result;
  163. }
  164. catch (err) {
  165. logger.error(err);
  166. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  167. this.isErrorOccuredOnHealthcheck = true;
  168. throw err;
  169. }
  170. }
  171. async getInfoForAdmin() {
  172. return this.fullTextSearchDelegator.getInfoForAdmin();
  173. }
  174. async normalizeIndices() {
  175. return this.fullTextSearchDelegator.normalizeIndices();
  176. }
  177. async rebuildIndex() {
  178. return this.fullTextSearchDelegator.rebuildIndex();
  179. }
  180. async parseSearchQuery(queryString: string, nqName: string | null): Promise<ParsedQuery> {
  181. // eslint-disable-next-line no-param-reassign
  182. queryString = normalizeQueryString(queryString);
  183. const terms = this.parseQueryString(queryString);
  184. if (nqName == null) {
  185. return { queryString, terms };
  186. }
  187. const nq = await NamedQuery.findOne({ name: normalizeNQName(nqName) });
  188. // will delegate to full-text search
  189. if (nq == null) {
  190. logger.debug(`Delegated to full-text search since a named query document did not found. (nqName="${nqName}")`);
  191. return { queryString, terms };
  192. }
  193. const { aliasOf, delegatorName } = nq;
  194. let parsedQuery: ParsedQuery;
  195. if (aliasOf != null) {
  196. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  197. }
  198. else {
  199. parsedQuery = { queryString, terms, delegatorName };
  200. }
  201. return parsedQuery;
  202. }
  203. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData]> {
  204. const { queryString, terms, delegatorName = SearchDelegatorName.DEFAULT } = parsedQuery;
  205. const nqDeledator = this.nqDelegators[delegatorName];
  206. const data = {
  207. queryString,
  208. terms,
  209. };
  210. return [nqDeledator, data];
  211. }
  212. /**
  213. * Throws SearchError if data is corrupted.
  214. * @param {SearchableData} data
  215. * @param {SearchDelegator} delegator
  216. * @throws {SearchError} SearchError
  217. */
  218. private validateSearchableData(delegator: SearchDelegator, data: SearchableData): void {
  219. const { terms } = data;
  220. if (delegator.isTermsNormalized(terms)) {
  221. return;
  222. }
  223. const unavailableTermsKeys = delegator.validateTerms(terms);
  224. throw new SearchError('The query string includes unavailable terms.', unavailableTermsKeys);
  225. }
  226. async searchKeyword(keyword: string, nqName: string | null, user, userGroups, searchOpts): Promise<[ISearchResult<unknown>, string | null]> {
  227. let parsedQuery: ParsedQuery;
  228. // parse
  229. try {
  230. parsedQuery = await this.parseSearchQuery(keyword, nqName);
  231. }
  232. catch (err) {
  233. logger.error('Error occurred while parseSearchQuery', err);
  234. throw err;
  235. }
  236. let delegator: SearchDelegator;
  237. let data: SearchableData;
  238. // resolve
  239. try {
  240. [delegator, data] = await this.resolve(parsedQuery);
  241. }
  242. catch (err) {
  243. logger.error('Error occurred while resolving search delegator', err);
  244. throw err;
  245. }
  246. // throws
  247. this.validateSearchableData(delegator, data);
  248. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name ?? null];
  249. }
  250. parseQueryString(queryString: string): QueryTerms {
  251. // terms
  252. const matchWords: string[] = [];
  253. const notMatchWords: string[] = [];
  254. const phraseWords: string[] = [];
  255. const notPhraseWords: string[] = [];
  256. const prefixPaths: string[] = [];
  257. const notPrefixPaths: string[] = [];
  258. const tags: string[] = [];
  259. const notTags: string[] = [];
  260. // First: Parse phrase keywords
  261. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  262. const phrases = queryString.match(phraseRegExp);
  263. if (phrases !== null) {
  264. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  265. phrases.forEach((phrase) => {
  266. phrase.trim();
  267. if (phrase.match(/^-/)) {
  268. notPhraseWords.push(phrase.replace(/^-/, ''));
  269. }
  270. else {
  271. phraseWords.push(phrase);
  272. }
  273. });
  274. }
  275. // Second: Parse other keywords (include minus keywords)
  276. queryString.split(' ').forEach((word) => {
  277. if (word === '') {
  278. return;
  279. }
  280. // https://regex101.com/r/pN9XfK/1
  281. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  282. // https://regex101.com/r/3qw9FQ/1
  283. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  284. if (matchNegative != null) {
  285. if (matchNegative[1] === 'prefix:') {
  286. notPrefixPaths.push(matchNegative[2]);
  287. }
  288. else if (matchNegative[1] === 'tag:') {
  289. notTags.push(matchNegative[2]);
  290. }
  291. else {
  292. notMatchWords.push(matchNegative[2]);
  293. }
  294. }
  295. else if (matchPositive != null) {
  296. if (matchPositive[1] === 'prefix:') {
  297. prefixPaths.push(matchPositive[2]);
  298. }
  299. else if (matchPositive[1] === 'tag:') {
  300. tags.push(matchPositive[2]);
  301. }
  302. else {
  303. matchWords.push(matchPositive[2]);
  304. }
  305. }
  306. });
  307. const terms = {
  308. match: matchWords,
  309. not_match: notMatchWords,
  310. phrase: phraseWords,
  311. not_phrase: notPhraseWords,
  312. prefix: prefixPaths,
  313. not_prefix: notPrefixPaths,
  314. tag: tags,
  315. not_tag: notTags,
  316. };
  317. return terms;
  318. }
  319. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  320. // So far, it determines by delegatorName passed by searchService.searchKeyword
  321. checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
  322. return delegatorName === SearchDelegatorName.DEFAULT;
  323. }
  324. /**
  325. * formatting result
  326. */
  327. async formatSearchResult(searchResult: ISearchResult<any>, delegatorName: SearchDelegatorName, user, userGroups): Promise<IFormattedSearchResult> {
  328. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  329. const data: IPageWithSearchMeta[] = searchResult.data.map((page) => {
  330. return {
  331. data: page as IPageHasId,
  332. };
  333. });
  334. return {
  335. data,
  336. meta: searchResult.meta,
  337. };
  338. }
  339. /*
  340. * Format ElasticSearch result
  341. */
  342. const User = this.crowi.model('User');
  343. const result = {} as IFormattedSearchResult;
  344. // get page data
  345. const pageIds: string[] = searchResult.data.map((page) => { return page._id });
  346. const findPageResult = await findPageListByIds(pageIds, this.crowi);
  347. // set meta data
  348. result.meta = searchResult.meta;
  349. // set search result page data
  350. const pages: (IPageWithSearchMeta | null)[] = searchResult.data.map((data) => {
  351. const pageData = findPageResult.pages.find((pageData) => {
  352. return pageData.id === data._id;
  353. });
  354. if (pageData == null) {
  355. return null;
  356. }
  357. // add tags and seenUserCount to pageData
  358. pageData._doc.tags = data._source.tag_names;
  359. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  360. // serialize lastUpdateUser
  361. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  362. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  363. }
  364. // increment elasticSearchResult
  365. let elasticSearchResult;
  366. const highlightData = data._highlight;
  367. if (highlightData != null) {
  368. const snippet = this.canShowSnippet(pageData, user, userGroups)
  369. // eslint-disable-next-line max-len
  370. ? highlightData.body || highlightData['body.en'] || highlightData['body.ja'] || highlightData.comments || highlightData['comments.en'] || highlightData['comments.ja']
  371. : null;
  372. const pathMatch = highlightData['path.en'] || highlightData['path.ja'];
  373. elasticSearchResult = {
  374. snippet: snippet != null && typeof snippet[0] === 'string' ? filterXss.process(snippet) : null,
  375. highlightedPath: pathMatch != null && typeof pathMatch[0] === 'string' ? filterXss.process(pathMatch) : null,
  376. };
  377. }
  378. // serialize creator
  379. if (pageData.creator != null && pageData.creator instanceof User) {
  380. pageData.creator = serializeUserSecurely(pageData.creator);
  381. }
  382. // generate pageMeta data
  383. const pageMeta = {
  384. bookmarkCount: data._source.bookmark_count || 0,
  385. elasticSearchResult,
  386. };
  387. return { data: pageData, meta: pageMeta };
  388. });
  389. result.data = pages.filter(nonNullable);
  390. return result;
  391. }
  392. canShowSnippet(pageData, user, userGroups): boolean {
  393. const Page = mongoose.model('Page') as unknown as PageModel;
  394. const testGrant = pageData.grant;
  395. const testGrantedUser = pageData.grantedUsers?.[0];
  396. const testGrantedGroup = pageData.grantedGroup;
  397. if (testGrant === Page.GRANT_RESTRICTED) {
  398. return false;
  399. }
  400. if (testGrant === Page.GRANT_OWNER) {
  401. if (user == null) return false;
  402. return user._id.toString() === testGrantedUser.toString();
  403. }
  404. if (testGrant === Page.GRANT_USER_GROUP) {
  405. if (userGroups == null) return false;
  406. return userGroups.map(id => id.toString()).includes(testGrantedGroup.toString());
  407. }
  408. return true;
  409. }
  410. }
  411. export default SearchService;