search.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511
  1. import mongoose from 'mongoose';
  2. import xss from 'xss';
  3. import { SearchDelegatorName } from '~/interfaces/named-query';
  4. import { IPageHasId } from '~/interfaces/page';
  5. import { IFormattedSearchResult, IPageWithSearchMeta, ISearchResult } from '~/interfaces/search';
  6. import loggerFactory from '~/utils/logger';
  7. import { ObjectIdLike } from '../interfaces/mongoose-utils';
  8. import {
  9. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, SearchableData, QueryTerms,
  10. } from '../interfaces/search';
  11. import NamedQuery from '../models/named-query';
  12. import { PageModel } from '../models/page';
  13. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  14. import { SearchError } from '../models/vo/search-error';
  15. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  16. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  17. // eslint-disable-next-line no-unused-vars
  18. const logger = loggerFactory('growi:service:search');
  19. const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
  20. // options for filtering xss
  21. const filterXssOptions = {
  22. whiteList: {
  23. em: ['class'],
  24. },
  25. };
  26. const filterXss = new xss.FilterXSS(filterXssOptions);
  27. const normalizeQueryString = (_queryString: string): string => {
  28. let queryString = _queryString.trim();
  29. queryString = queryString.replace(/\s+/g, ' ');
  30. return queryString;
  31. };
  32. const normalizeNQName = (nqName: string): string => {
  33. return nqName.trim();
  34. };
  35. const findPageListByIds = async(pageIds: ObjectIdLike[], crowi: any) => {
  36. const Page = crowi.model('Page') as unknown as PageModel;
  37. const User = crowi.model('User');
  38. const builder = new Page.PageQueryBuilder(Page.find(({ _id: { $in: pageIds } })), false);
  39. builder.addConditionToPagenate(undefined, undefined); // offset and limit are unnesessary
  40. builder.populateDataToList(User.USER_FIELDS_EXCEPT_CONFIDENTIAL); // populate lastUpdateUser
  41. builder.query = builder.query.populate({
  42. path: 'creator',
  43. select: User.USER_FIELDS_EXCEPT_CONFIDENTIAL,
  44. });
  45. const pages = await builder.query.clone().exec('find');
  46. const totalCount = await builder.query.exec('count');
  47. return {
  48. pages,
  49. totalCount,
  50. };
  51. };
  52. class SearchService implements SearchQueryParser, SearchResolver {
  53. crowi!: any;
  54. configManager!: any;
  55. isErrorOccuredOnHealthcheck: boolean | null;
  56. isErrorOccuredOnSearching: boolean | null;
  57. fullTextSearchDelegator: any & ElasticsearchDelegator;
  58. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator};
  59. constructor(crowi) {
  60. this.crowi = crowi;
  61. this.configManager = crowi.configManager;
  62. this.isErrorOccuredOnHealthcheck = null;
  63. this.isErrorOccuredOnSearching = null;
  64. try {
  65. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  66. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  67. logger.info('Succeeded to initialize search delegators');
  68. }
  69. catch (err) {
  70. logger.error(err);
  71. }
  72. if (this.isConfigured) {
  73. this.fullTextSearchDelegator.init();
  74. this.registerUpdateEvent();
  75. }
  76. }
  77. get isConfigured() {
  78. return this.fullTextSearchDelegator != null;
  79. }
  80. get isReachable() {
  81. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  82. }
  83. get isElasticsearchEnabled() {
  84. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  85. return uri != null && uri.length > 0;
  86. }
  87. generateFullTextSearchDelegator() {
  88. logger.info('Initializing search delegator');
  89. if (this.isElasticsearchEnabled) {
  90. logger.info('Elasticsearch is enabled');
  91. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  92. }
  93. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  94. }
  95. generateNQDelegators(defaultDelegator: ElasticsearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  96. return {
  97. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  98. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator() as unknown as SearchDelegator,
  99. };
  100. }
  101. registerUpdateEvent() {
  102. const pageEvent = this.crowi.event('page');
  103. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  104. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  105. pageEvent.on('delete', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  106. pageEvent.on('revert', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  107. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  108. pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
  109. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  110. pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  111. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  112. pageEvent.on('rename', () => {
  113. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
  114. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
  115. });
  116. const bookmarkEvent = this.crowi.event('bookmark');
  117. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  118. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  119. const tagEvent = this.crowi.event('tag');
  120. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  121. const commentEvent = this.crowi.event('comment');
  122. commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  123. commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  124. commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  125. }
  126. resetErrorStatus() {
  127. this.isErrorOccuredOnHealthcheck = false;
  128. this.isErrorOccuredOnSearching = false;
  129. }
  130. async reconnectClient() {
  131. logger.info('Try to reconnect...');
  132. this.fullTextSearchDelegator.initClient();
  133. try {
  134. await this.getInfoForHealth();
  135. logger.info('Reconnecting succeeded.');
  136. this.resetErrorStatus();
  137. }
  138. catch (err) {
  139. throw err;
  140. }
  141. }
  142. async getInfo() {
  143. try {
  144. return await this.fullTextSearchDelegator.getInfo();
  145. }
  146. catch (err) {
  147. logger.error(err);
  148. throw err;
  149. }
  150. }
  151. async getInfoForHealth() {
  152. try {
  153. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  154. this.isErrorOccuredOnHealthcheck = false;
  155. return result;
  156. }
  157. catch (err) {
  158. logger.error(err);
  159. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  160. this.isErrorOccuredOnHealthcheck = true;
  161. throw err;
  162. }
  163. }
  164. async getInfoForAdmin() {
  165. return this.fullTextSearchDelegator.getInfoForAdmin();
  166. }
  167. async normalizeIndices() {
  168. return this.fullTextSearchDelegator.normalizeIndices();
  169. }
  170. async rebuildIndex() {
  171. return this.fullTextSearchDelegator.rebuildIndex();
  172. }
  173. async parseSearchQuery(queryString: string, nqName: string | null): Promise<ParsedQuery> {
  174. // eslint-disable-next-line no-param-reassign
  175. queryString = normalizeQueryString(queryString);
  176. const terms = this.parseQueryString(queryString);
  177. if (nqName == null) {
  178. return { queryString, terms };
  179. }
  180. const nq = await NamedQuery.findOne({ name: normalizeNQName(nqName) });
  181. // will delegate to full-text search
  182. if (nq == null) {
  183. logger.debug(`Delegated to full-text search since a named query document did not found. (nqName="${nqName}")`);
  184. return { queryString, terms };
  185. }
  186. const { aliasOf, delegatorName } = nq;
  187. let parsedQuery: ParsedQuery;
  188. if (aliasOf != null) {
  189. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  190. }
  191. else {
  192. parsedQuery = { queryString, terms, delegatorName };
  193. }
  194. return parsedQuery;
  195. }
  196. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData]> {
  197. const { queryString, terms, delegatorName = SearchDelegatorName.DEFAULT } = parsedQuery;
  198. const nqDeledator = this.nqDelegators[delegatorName];
  199. const data = {
  200. queryString,
  201. terms,
  202. };
  203. return [nqDeledator, data];
  204. }
  205. /**
  206. * Throws SearchError if data is corrupted.
  207. * @param {SearchableData} data
  208. * @param {SearchDelegator} delegator
  209. * @throws {SearchError} SearchError
  210. */
  211. private validateSearchableData(delegator: SearchDelegator, data: SearchableData): void {
  212. const { terms } = data;
  213. if (delegator.isTermsNormalized(terms)) {
  214. return;
  215. }
  216. const unavailableTermsKeys = delegator.validateTerms(terms);
  217. throw new SearchError('The query string includes unavailable terms.', unavailableTermsKeys);
  218. }
  219. async searchKeyword(keyword: string, nqName: string | null, user, userGroups, searchOpts): Promise<[ISearchResult<unknown>, string | null]> {
  220. let parsedQuery: ParsedQuery;
  221. // parse
  222. try {
  223. parsedQuery = await this.parseSearchQuery(keyword, nqName);
  224. }
  225. catch (err) {
  226. logger.error('Error occurred while parseSearchQuery', err);
  227. throw err;
  228. }
  229. let delegator: SearchDelegator;
  230. let data: SearchableData;
  231. // resolve
  232. try {
  233. [delegator, data] = await this.resolve(parsedQuery);
  234. }
  235. catch (err) {
  236. logger.error('Error occurred while resolving search delegator', err);
  237. throw err;
  238. }
  239. // throws
  240. this.validateSearchableData(delegator, data);
  241. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name ?? null];
  242. }
  243. parseQueryString(queryString: string): QueryTerms {
  244. // terms
  245. const matchWords: string[] = [];
  246. const notMatchWords: string[] = [];
  247. const phraseWords: string[] = [];
  248. const notPhraseWords: string[] = [];
  249. const prefixPaths: string[] = [];
  250. const notPrefixPaths: string[] = [];
  251. const tags: string[] = [];
  252. const notTags: string[] = [];
  253. // First: Parse phrase keywords
  254. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  255. const phrases = queryString.match(phraseRegExp);
  256. if (phrases !== null) {
  257. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  258. phrases.forEach((phrase) => {
  259. phrase.trim();
  260. if (phrase.match(/^-/)) {
  261. notPhraseWords.push(phrase.replace(/^-/, ''));
  262. }
  263. else {
  264. phraseWords.push(phrase);
  265. }
  266. });
  267. }
  268. // Second: Parse other keywords (include minus keywords)
  269. queryString.split(' ').forEach((word) => {
  270. if (word === '') {
  271. return;
  272. }
  273. // https://regex101.com/r/pN9XfK/1
  274. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  275. // https://regex101.com/r/3qw9FQ/1
  276. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  277. if (matchNegative != null) {
  278. if (matchNegative[1] === 'prefix:') {
  279. notPrefixPaths.push(matchNegative[2]);
  280. }
  281. else if (matchNegative[1] === 'tag:') {
  282. notTags.push(matchNegative[2]);
  283. }
  284. else {
  285. notMatchWords.push(matchNegative[2]);
  286. }
  287. }
  288. else if (matchPositive != null) {
  289. if (matchPositive[1] === 'prefix:') {
  290. prefixPaths.push(matchPositive[2]);
  291. }
  292. else if (matchPositive[1] === 'tag:') {
  293. tags.push(matchPositive[2]);
  294. }
  295. else {
  296. matchWords.push(matchPositive[2]);
  297. }
  298. }
  299. });
  300. const terms = {
  301. match: matchWords,
  302. not_match: notMatchWords,
  303. phrase: phraseWords,
  304. not_phrase: notPhraseWords,
  305. prefix: prefixPaths,
  306. not_prefix: notPrefixPaths,
  307. tag: tags,
  308. not_tag: notTags,
  309. };
  310. return terms;
  311. }
  312. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  313. // So far, it determines by delegatorName passed by searchService.searchKeyword
  314. checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
  315. return delegatorName === SearchDelegatorName.DEFAULT;
  316. }
  317. /**
  318. * formatting result
  319. */
  320. async formatSearchResult(searchResult: ISearchResult<any>, delegatorName: SearchDelegatorName, user, userGroups): Promise<IFormattedSearchResult> {
  321. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  322. const data: IPageWithSearchMeta[] = searchResult.data.map((page) => {
  323. return {
  324. data: page as IPageHasId,
  325. };
  326. });
  327. return {
  328. data,
  329. meta: searchResult.meta,
  330. };
  331. }
  332. /*
  333. * Format ElasticSearch result
  334. */
  335. const User = this.crowi.model('User');
  336. const result = {} as IFormattedSearchResult;
  337. // get page data
  338. const pageIds: string[] = searchResult.data.map((page) => { return page._id });
  339. const findPageResult = await findPageListByIds(pageIds, this.crowi);
  340. // set meta data
  341. result.meta = searchResult.meta;
  342. // set search result page data
  343. const pages: (IPageWithSearchMeta | null)[] = searchResult.data.map((data) => {
  344. const pageData = findPageResult.pages.find((pageData) => {
  345. return pageData.id === data._id;
  346. });
  347. if (pageData == null) {
  348. return null;
  349. }
  350. // add tags and seenUserCount to pageData
  351. pageData._doc.tags = data._source.tag_names;
  352. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  353. // serialize lastUpdateUser
  354. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  355. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  356. }
  357. // increment elasticSearchResult
  358. let elasticSearchResult;
  359. const highlightData = data._highlight;
  360. if (highlightData != null) {
  361. const snippet = this.canShowSnippet(pageData, user, userGroups)
  362. ? highlightData['body.en'] || highlightData['body.ja'] || highlightData['comments.en'] || highlightData['comments.ja']
  363. : null;
  364. const pathMatch = highlightData['path.en'] || highlightData['path.ja'];
  365. const isHtmlInPath = highlightData['path.en'] != null || highlightData['path.ja'] != null;
  366. elasticSearchResult = {
  367. snippet: snippet != null && typeof snippet[0] === 'string' ? filterXss.process(snippet) : null,
  368. highlightedPath: pathMatch != null && typeof pathMatch[0] === 'string' ? filterXss.process(pathMatch) : null,
  369. isHtmlInPath,
  370. };
  371. }
  372. // serialize creator
  373. if (pageData.creator != null && pageData.creator instanceof User) {
  374. pageData.creator = serializeUserSecurely(pageData.creator);
  375. }
  376. // generate pageMeta data
  377. const pageMeta = {
  378. bookmarkCount: data._source.bookmark_count || 0,
  379. elasticSearchResult,
  380. };
  381. return { data: pageData, meta: pageMeta };
  382. });
  383. result.data = pages.filter(nonNullable);
  384. return result;
  385. }
  386. canShowSnippet(pageData, user, userGroups): boolean {
  387. const Page = mongoose.model('Page') as unknown as PageModel;
  388. const testGrant = pageData.grant;
  389. const testGrantedUser = pageData.grantedUsers?.[0];
  390. const testGrantedGroup = pageData.grantedGroup;
  391. if (testGrant === Page.GRANT_RESTRICTED) {
  392. return false;
  393. }
  394. if (testGrant === Page.GRANT_OWNER) {
  395. if (user == null) return false;
  396. return user._id.toString() === testGrantedUser.toString();
  397. }
  398. if (testGrant === Page.GRANT_USER_GROUP) {
  399. if (userGroups == null) return false;
  400. return userGroups.map(id => id.toString()).includes(testGrantedGroup.toString());
  401. }
  402. return true;
  403. }
  404. }
  405. export default SearchService;