search.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. import mongoose from 'mongoose';
  2. import { FilterXSS } from 'xss';
  3. import { SearchDelegatorName } from '~/interfaces/named-query';
  4. import { IPageHasId } from '~/interfaces/page';
  5. import { IFormattedSearchResult, IPageWithSearchMeta, ISearchResult } from '~/interfaces/search';
  6. import loggerFactory from '~/utils/logger';
  7. import { ObjectIdLike } from '../interfaces/mongoose-utils';
  8. import {
  9. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, SearchableData, QueryTerms,
  10. } from '../interfaces/search';
  11. import NamedQuery from '../models/named-query';
  12. import { PageModel } from '../models/page';
  13. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  14. import { SearchError } from '../models/vo/search-error';
  15. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  16. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  17. // eslint-disable-next-line no-unused-vars
  18. const logger = loggerFactory('growi:service:search');
  19. const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
  20. // options for filtering xss
  21. const filterXssOptions = {
  22. whiteList: {
  23. em: ['class'],
  24. },
  25. };
  26. const filterXss = new FilterXSS(filterXssOptions);
  27. const normalizeQueryString = (_queryString: string): string => {
  28. let queryString = _queryString.trim();
  29. queryString = queryString.replace(/\s+/g, ' ');
  30. return queryString;
  31. };
  32. const normalizeNQName = (nqName: string): string => {
  33. return nqName.trim();
  34. };
  35. const findPageListByIds = async(pageIds: ObjectIdLike[], crowi: any) => {
  36. const Page = crowi.model('Page') as unknown as PageModel;
  37. const User = crowi.model('User');
  38. const builder = new Page.PageQueryBuilder(Page.find(({ _id: { $in: pageIds } })), false);
  39. builder.addConditionToPagenate(undefined, undefined); // offset and limit are unnesessary
  40. builder.populateDataToList(User.USER_FIELDS_EXCEPT_CONFIDENTIAL); // populate lastUpdateUser
  41. builder.query = builder.query.populate({
  42. path: 'creator',
  43. select: User.USER_FIELDS_EXCEPT_CONFIDENTIAL,
  44. });
  45. const pages = await builder.query.clone().exec('find');
  46. const totalCount = await builder.query.exec('count');
  47. return {
  48. pages,
  49. totalCount,
  50. };
  51. };
  52. class SearchService implements SearchQueryParser, SearchResolver {
  53. crowi!: any;
  54. configManager!: any;
  55. isErrorOccuredOnHealthcheck: boolean | null;
  56. isErrorOccuredOnSearching: boolean | null;
  57. fullTextSearchDelegator: any & ElasticsearchDelegator;
  58. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator};
  59. constructor(crowi) {
  60. this.crowi = crowi;
  61. this.configManager = crowi.configManager;
  62. this.isErrorOccuredOnHealthcheck = null;
  63. this.isErrorOccuredOnSearching = null;
  64. try {
  65. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  66. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  67. logger.info('Succeeded to initialize search delegators');
  68. }
  69. catch (err) {
  70. logger.error(err);
  71. }
  72. if (this.isConfigured) {
  73. this.fullTextSearchDelegator.init();
  74. this.registerUpdateEvent();
  75. }
  76. }
  77. get isConfigured() {
  78. return this.fullTextSearchDelegator != null;
  79. }
  80. get isReachable() {
  81. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  82. }
  83. get isElasticsearchEnabled() {
  84. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  85. return uri != null && uri.length > 0;
  86. }
  87. generateFullTextSearchDelegator() {
  88. logger.info('Initializing search delegator');
  89. if (this.isElasticsearchEnabled) {
  90. logger.info('Elasticsearch is enabled');
  91. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  92. }
  93. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  94. }
  95. generateNQDelegators(defaultDelegator: ElasticsearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  96. return {
  97. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  98. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator() as unknown as SearchDelegator,
  99. };
  100. }
  101. registerUpdateEvent() {
  102. const pageEvent = this.crowi.event('page');
  103. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  104. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  105. pageEvent.on('delete', (targetPage, deletedPage, user) => {
  106. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator)(targetPage, user);
  107. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator)(deletedPage, user);
  108. });
  109. pageEvent.on('revert', (targetPage, revertedPage, user) => {
  110. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator)(targetPage, user);
  111. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator)(revertedPage, user);
  112. });
  113. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  114. pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
  115. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  116. pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  117. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  118. pageEvent.on('rename', () => {
  119. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
  120. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
  121. });
  122. const bookmarkEvent = this.crowi.event('bookmark');
  123. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  124. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  125. const tagEvent = this.crowi.event('tag');
  126. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  127. const commentEvent = this.crowi.event('comment');
  128. commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  129. commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  130. commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  131. }
  132. resetErrorStatus() {
  133. this.isErrorOccuredOnHealthcheck = false;
  134. this.isErrorOccuredOnSearching = false;
  135. }
  136. async reconnectClient() {
  137. logger.info('Try to reconnect...');
  138. this.fullTextSearchDelegator.initClient();
  139. try {
  140. await this.getInfoForHealth();
  141. logger.info('Reconnecting succeeded.');
  142. this.resetErrorStatus();
  143. }
  144. catch (err) {
  145. throw err;
  146. }
  147. }
  148. async getInfo() {
  149. try {
  150. return await this.fullTextSearchDelegator.getInfo();
  151. }
  152. catch (err) {
  153. logger.error(err);
  154. throw err;
  155. }
  156. }
  157. async getInfoForHealth() {
  158. try {
  159. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  160. this.isErrorOccuredOnHealthcheck = false;
  161. return result;
  162. }
  163. catch (err) {
  164. logger.error(err);
  165. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  166. this.isErrorOccuredOnHealthcheck = true;
  167. throw err;
  168. }
  169. }
  170. async getInfoForAdmin() {
  171. return this.fullTextSearchDelegator.getInfoForAdmin();
  172. }
  173. async normalizeIndices() {
  174. return this.fullTextSearchDelegator.normalizeIndices();
  175. }
  176. async rebuildIndex() {
  177. return this.fullTextSearchDelegator.rebuildIndex();
  178. }
  179. async parseSearchQuery(queryString: string, nqName: string | null): Promise<ParsedQuery> {
  180. // eslint-disable-next-line no-param-reassign
  181. queryString = normalizeQueryString(queryString);
  182. const terms = this.parseQueryString(queryString);
  183. if (nqName == null) {
  184. return { queryString, terms };
  185. }
  186. const nq = await NamedQuery.findOne({ name: normalizeNQName(nqName) });
  187. // will delegate to full-text search
  188. if (nq == null) {
  189. logger.debug(`Delegated to full-text search since a named query document did not found. (nqName="${nqName}")`);
  190. return { queryString, terms };
  191. }
  192. const { aliasOf, delegatorName } = nq;
  193. let parsedQuery: ParsedQuery;
  194. if (aliasOf != null) {
  195. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  196. }
  197. else {
  198. parsedQuery = { queryString, terms, delegatorName };
  199. }
  200. return parsedQuery;
  201. }
  202. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData]> {
  203. const { queryString, terms, delegatorName = SearchDelegatorName.DEFAULT } = parsedQuery;
  204. const nqDeledator = this.nqDelegators[delegatorName];
  205. const data = {
  206. queryString,
  207. terms,
  208. };
  209. return [nqDeledator, data];
  210. }
  211. /**
  212. * Throws SearchError if data is corrupted.
  213. * @param {SearchableData} data
  214. * @param {SearchDelegator} delegator
  215. * @throws {SearchError} SearchError
  216. */
  217. private validateSearchableData(delegator: SearchDelegator, data: SearchableData): void {
  218. const { terms } = data;
  219. if (delegator.isTermsNormalized(terms)) {
  220. return;
  221. }
  222. const unavailableTermsKeys = delegator.validateTerms(terms);
  223. throw new SearchError('The query string includes unavailable terms.', unavailableTermsKeys);
  224. }
  225. async searchKeyword(keyword: string, nqName: string | null, user, userGroups, searchOpts): Promise<[ISearchResult<unknown>, string | null]> {
  226. let parsedQuery: ParsedQuery;
  227. // parse
  228. try {
  229. parsedQuery = await this.parseSearchQuery(keyword, nqName);
  230. }
  231. catch (err) {
  232. logger.error('Error occurred while parseSearchQuery', err);
  233. throw err;
  234. }
  235. let delegator: SearchDelegator;
  236. let data: SearchableData;
  237. // resolve
  238. try {
  239. [delegator, data] = await this.resolve(parsedQuery);
  240. }
  241. catch (err) {
  242. logger.error('Error occurred while resolving search delegator', err);
  243. throw err;
  244. }
  245. // throws
  246. this.validateSearchableData(delegator, data);
  247. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name ?? null];
  248. }
  249. parseQueryString(queryString: string): QueryTerms {
  250. // terms
  251. const matchWords: string[] = [];
  252. const notMatchWords: string[] = [];
  253. const phraseWords: string[] = [];
  254. const notPhraseWords: string[] = [];
  255. const prefixPaths: string[] = [];
  256. const notPrefixPaths: string[] = [];
  257. const tags: string[] = [];
  258. const notTags: string[] = [];
  259. // First: Parse phrase keywords
  260. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  261. const phrases = queryString.match(phraseRegExp);
  262. if (phrases !== null) {
  263. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  264. phrases.forEach((phrase) => {
  265. phrase.trim();
  266. if (phrase.match(/^-/)) {
  267. notPhraseWords.push(phrase.replace(/^-/, ''));
  268. }
  269. else {
  270. phraseWords.push(phrase);
  271. }
  272. });
  273. }
  274. // Second: Parse other keywords (include minus keywords)
  275. queryString.split(' ').forEach((word) => {
  276. if (word === '') {
  277. return;
  278. }
  279. // https://regex101.com/r/pN9XfK/1
  280. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  281. // https://regex101.com/r/3qw9FQ/1
  282. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  283. if (matchNegative != null) {
  284. if (matchNegative[1] === 'prefix:') {
  285. notPrefixPaths.push(matchNegative[2]);
  286. }
  287. else if (matchNegative[1] === 'tag:') {
  288. notTags.push(matchNegative[2]);
  289. }
  290. else {
  291. notMatchWords.push(matchNegative[2]);
  292. }
  293. }
  294. else if (matchPositive != null) {
  295. if (matchPositive[1] === 'prefix:') {
  296. prefixPaths.push(matchPositive[2]);
  297. }
  298. else if (matchPositive[1] === 'tag:') {
  299. tags.push(matchPositive[2]);
  300. }
  301. else {
  302. matchWords.push(matchPositive[2]);
  303. }
  304. }
  305. });
  306. const terms = {
  307. match: matchWords,
  308. not_match: notMatchWords,
  309. phrase: phraseWords,
  310. not_phrase: notPhraseWords,
  311. prefix: prefixPaths,
  312. not_prefix: notPrefixPaths,
  313. tag: tags,
  314. not_tag: notTags,
  315. };
  316. return terms;
  317. }
  318. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  319. // So far, it determines by delegatorName passed by searchService.searchKeyword
  320. checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
  321. return delegatorName === SearchDelegatorName.DEFAULT;
  322. }
  323. /**
  324. * formatting result
  325. */
  326. async formatSearchResult(searchResult: ISearchResult<any>, delegatorName: SearchDelegatorName, user, userGroups): Promise<IFormattedSearchResult> {
  327. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  328. const data: IPageWithSearchMeta[] = searchResult.data.map((page) => {
  329. return {
  330. data: page as IPageHasId,
  331. };
  332. });
  333. return {
  334. data,
  335. meta: searchResult.meta,
  336. };
  337. }
  338. /*
  339. * Format ElasticSearch result
  340. */
  341. const User = this.crowi.model('User');
  342. const result = {} as IFormattedSearchResult;
  343. // get page data
  344. const pageIds: string[] = searchResult.data.map((page) => { return page._id });
  345. const findPageResult = await findPageListByIds(pageIds, this.crowi);
  346. // set meta data
  347. result.meta = searchResult.meta;
  348. // set search result page data
  349. const pages: (IPageWithSearchMeta | null)[] = searchResult.data.map((data) => {
  350. const pageData = findPageResult.pages.find((pageData) => {
  351. return pageData.id === data._id;
  352. });
  353. if (pageData == null) {
  354. return null;
  355. }
  356. // add tags and seenUserCount to pageData
  357. pageData._doc.tags = data._source.tag_names;
  358. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  359. // serialize lastUpdateUser
  360. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  361. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  362. }
  363. // increment elasticSearchResult
  364. let elasticSearchResult;
  365. const highlightData = data._highlight;
  366. if (highlightData != null) {
  367. const snippet = this.canShowSnippet(pageData, user, userGroups)
  368. // eslint-disable-next-line max-len
  369. ? highlightData.body || highlightData['body.en'] || highlightData['body.ja'] || highlightData.comments || highlightData['comments.en'] || highlightData['comments.ja']
  370. : null;
  371. const pathMatch = highlightData['path.en'] || highlightData['path.ja'];
  372. elasticSearchResult = {
  373. snippet: snippet != null && typeof snippet[0] === 'string' ? filterXss.process(snippet) : null,
  374. highlightedPath: pathMatch != null && typeof pathMatch[0] === 'string' ? filterXss.process(pathMatch) : null,
  375. };
  376. }
  377. // serialize creator
  378. if (pageData.creator != null && pageData.creator instanceof User) {
  379. pageData.creator = serializeUserSecurely(pageData.creator);
  380. }
  381. // generate pageMeta data
  382. const pageMeta = {
  383. bookmarkCount: data._source.bookmark_count || 0,
  384. elasticSearchResult,
  385. };
  386. return { data: pageData, meta: pageMeta };
  387. });
  388. result.data = pages.filter(nonNullable);
  389. return result;
  390. }
  391. canShowSnippet(pageData, user, userGroups): boolean {
  392. const Page = mongoose.model('Page') as unknown as PageModel;
  393. const testGrant = pageData.grant;
  394. const testGrantedUser = pageData.grantedUsers?.[0];
  395. const testGrantedGroup = pageData.grantedGroup;
  396. if (testGrant === Page.GRANT_RESTRICTED) {
  397. return false;
  398. }
  399. if (testGrant === Page.GRANT_OWNER) {
  400. if (user == null) return false;
  401. return user._id.toString() === testGrantedUser.toString();
  402. }
  403. if (testGrant === Page.GRANT_USER_GROUP) {
  404. if (userGroups == null) return false;
  405. return userGroups.map(id => id.toString()).includes(testGrantedGroup.toString());
  406. }
  407. return true;
  408. }
  409. }
  410. export default SearchService;