search.ts 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. import xss from 'xss';
  2. import mongoose from 'mongoose';
  3. import { SearchDelegatorName } from '~/interfaces/named-query';
  4. import { IPageWithMeta } from '~/interfaces/page';
  5. import { IFormattedSearchResult, IPageSearchMeta, ISearchResult } from '~/interfaces/search';
  6. import loggerFactory from '~/utils/logger';
  7. import NamedQuery from '../models/named-query';
  8. import {
  9. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, SearchableData, QueryTerms,
  10. } from '../interfaces/search';
  11. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  12. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  13. import { PageModel } from '../models/page';
  14. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  15. import { ObjectIdLike } from '../interfaces/mongoose-utils';
  16. // eslint-disable-next-line no-unused-vars
  17. const logger = loggerFactory('growi:service:search');
  18. const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
  19. // options for filtering xss
  20. const filterXssOptions = {
  21. whiteList: {
  22. em: ['class'],
  23. },
  24. };
  25. const filterXss = new xss.FilterXSS(filterXssOptions);
  26. const normalizeQueryString = (_queryString: string): string => {
  27. let queryString = _queryString.trim();
  28. queryString = queryString.replace(/\s+/g, ' ');
  29. return queryString;
  30. };
  31. const findPageListByIds = async(pageIds: ObjectIdLike[], crowi: any) => {
  32. const Page = crowi.model('Page') as unknown as PageModel;
  33. const User = crowi.model('User');
  34. const builder = new Page.PageQueryBuilder(Page.find(({ _id: { $in: pageIds } })), false);
  35. builder.addConditionToPagenate(undefined, undefined); // offset and limit are unnesessary
  36. builder.populateDataToList(User.USER_FIELDS_EXCEPT_CONFIDENTIAL); // populate lastUpdateUser
  37. builder.query = builder.query.populate({
  38. path: 'creator',
  39. select: User.USER_FIELDS_EXCEPT_CONFIDENTIAL,
  40. });
  41. const pages = await builder.query.clone().exec('find');
  42. const totalCount = await builder.query.exec('count');
  43. return {
  44. pages,
  45. totalCount,
  46. };
  47. };
  48. class SearchService implements SearchQueryParser, SearchResolver {
  49. crowi!: any
  50. configManager!: any
  51. isErrorOccuredOnHealthcheck: boolean | null
  52. isErrorOccuredOnSearching: boolean | null
  53. fullTextSearchDelegator: any & SearchDelegator
  54. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator}
  55. constructor(crowi) {
  56. this.crowi = crowi;
  57. this.configManager = crowi.configManager;
  58. this.isErrorOccuredOnHealthcheck = null;
  59. this.isErrorOccuredOnSearching = null;
  60. try {
  61. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  62. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  63. logger.info('Succeeded to initialize search delegators');
  64. }
  65. catch (err) {
  66. logger.error(err);
  67. }
  68. if (this.isConfigured) {
  69. this.fullTextSearchDelegator.init();
  70. this.registerUpdateEvent();
  71. }
  72. }
  73. get isConfigured() {
  74. return this.fullTextSearchDelegator != null;
  75. }
  76. get isReachable() {
  77. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  78. }
  79. get isElasticsearchEnabled() {
  80. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  81. return uri != null && uri.length > 0;
  82. }
  83. generateFullTextSearchDelegator() {
  84. logger.info('Initializing search delegator');
  85. if (this.isElasticsearchEnabled) {
  86. logger.info('Elasticsearch is enabled');
  87. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  88. }
  89. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  90. }
  91. generateNQDelegators(defaultDelegator: SearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  92. return {
  93. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  94. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator(),
  95. };
  96. }
  97. registerUpdateEvent() {
  98. const pageEvent = this.crowi.event('page');
  99. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  100. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  101. pageEvent.on('delete', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  102. pageEvent.on('revert', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  103. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  104. pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
  105. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  106. pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  107. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  108. pageEvent.on('rename', () => {
  109. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
  110. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
  111. });
  112. const bookmarkEvent = this.crowi.event('bookmark');
  113. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  114. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  115. const tagEvent = this.crowi.event('tag');
  116. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  117. const commentEvent = this.crowi.event('comment');
  118. commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  119. commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  120. commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  121. }
  122. resetErrorStatus() {
  123. this.isErrorOccuredOnHealthcheck = false;
  124. this.isErrorOccuredOnSearching = false;
  125. }
  126. async reconnectClient() {
  127. logger.info('Try to reconnect...');
  128. this.fullTextSearchDelegator.initClient();
  129. try {
  130. await this.getInfoForHealth();
  131. logger.info('Reconnecting succeeded.');
  132. this.resetErrorStatus();
  133. }
  134. catch (err) {
  135. throw err;
  136. }
  137. }
  138. async getInfo() {
  139. try {
  140. return await this.fullTextSearchDelegator.getInfo();
  141. }
  142. catch (err) {
  143. logger.error(err);
  144. throw err;
  145. }
  146. }
  147. async getInfoForHealth() {
  148. try {
  149. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  150. this.isErrorOccuredOnHealthcheck = false;
  151. return result;
  152. }
  153. catch (err) {
  154. logger.error(err);
  155. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  156. this.isErrorOccuredOnHealthcheck = true;
  157. throw err;
  158. }
  159. }
  160. async getInfoForAdmin() {
  161. return this.fullTextSearchDelegator.getInfoForAdmin();
  162. }
  163. async normalizeIndices() {
  164. return this.fullTextSearchDelegator.normalizeIndices();
  165. }
  166. async rebuildIndex() {
  167. return this.fullTextSearchDelegator.rebuildIndex();
  168. }
  169. async parseSearchQuery(_queryString: string): Promise<ParsedQuery> {
  170. const regexp = new RegExp(/^\[nq:.+\]$/g); // https://regex101.com/r/FzDUvT/1
  171. const replaceRegexp = new RegExp(/\[nq:|\]/g);
  172. const queryString = normalizeQueryString(_queryString);
  173. // when Normal Query
  174. if (!regexp.test(queryString)) {
  175. return { queryString, terms: this.parseQueryString(queryString) };
  176. }
  177. // when Named Query
  178. const name = queryString.replace(replaceRegexp, '');
  179. const nq = await NamedQuery.findOne({ name });
  180. // will delegate to full-text search
  181. if (nq == null) {
  182. return { queryString, terms: this.parseQueryString(queryString) };
  183. }
  184. const { aliasOf, delegatorName } = nq;
  185. let parsedQuery;
  186. if (aliasOf != null) {
  187. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  188. }
  189. if (delegatorName != null) {
  190. parsedQuery = { queryString, delegatorName };
  191. }
  192. return parsedQuery;
  193. }
  194. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData | null]> {
  195. const { queryString, terms, delegatorName } = parsedQuery;
  196. if (delegatorName != null) {
  197. const nqDelegator = this.nqDelegators[delegatorName];
  198. if (nqDelegator != null) {
  199. return [nqDelegator, null];
  200. }
  201. }
  202. const data = {
  203. queryString,
  204. terms: terms as QueryTerms,
  205. };
  206. return [this.nqDelegators[SearchDelegatorName.DEFAULT], data];
  207. }
  208. async searchKeyword(keyword: string, user, userGroups, searchOpts): Promise<[ISearchResult<unknown>, string]> {
  209. let parsedQuery;
  210. // parse
  211. try {
  212. parsedQuery = await this.parseSearchQuery(keyword);
  213. }
  214. catch (err) {
  215. logger.error('Error occurred while parseSearchQuery', err);
  216. throw err;
  217. }
  218. let delegator;
  219. let data;
  220. // resolve
  221. try {
  222. [delegator, data] = await this.resolve(parsedQuery);
  223. }
  224. catch (err) {
  225. logger.error('Error occurred while resolving search delegator', err);
  226. throw err;
  227. }
  228. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name];
  229. }
  230. parseQueryString(queryString: string): QueryTerms {
  231. // terms
  232. const matchWords: string[] = [];
  233. const notMatchWords: string[] = [];
  234. const phraseWords: string[] = [];
  235. const notPhraseWords: string[] = [];
  236. const prefixPaths: string[] = [];
  237. const notPrefixPaths: string[] = [];
  238. const tags: string[] = [];
  239. const notTags: string[] = [];
  240. // First: Parse phrase keywords
  241. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  242. const phrases = queryString.match(phraseRegExp);
  243. if (phrases !== null) {
  244. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  245. phrases.forEach((phrase) => {
  246. phrase.trim();
  247. if (phrase.match(/^-/)) {
  248. notPhraseWords.push(phrase.replace(/^-/, ''));
  249. }
  250. else {
  251. phraseWords.push(phrase);
  252. }
  253. });
  254. }
  255. // Second: Parse other keywords (include minus keywords)
  256. queryString.split(' ').forEach((word) => {
  257. if (word === '') {
  258. return;
  259. }
  260. // https://regex101.com/r/pN9XfK/1
  261. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  262. // https://regex101.com/r/3qw9FQ/1
  263. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  264. if (matchNegative != null) {
  265. if (matchNegative[1] === 'prefix:') {
  266. notPrefixPaths.push(matchNegative[2]);
  267. }
  268. else if (matchNegative[1] === 'tag:') {
  269. notTags.push(matchNegative[2]);
  270. }
  271. else {
  272. notMatchWords.push(matchNegative[2]);
  273. }
  274. }
  275. else if (matchPositive != null) {
  276. if (matchPositive[1] === 'prefix:') {
  277. prefixPaths.push(matchPositive[2]);
  278. }
  279. else if (matchPositive[1] === 'tag:') {
  280. tags.push(matchPositive[2]);
  281. }
  282. else {
  283. matchWords.push(matchPositive[2]);
  284. }
  285. }
  286. });
  287. const terms = {
  288. match: matchWords,
  289. not_match: notMatchWords,
  290. phrase: phraseWords,
  291. not_phrase: notPhraseWords,
  292. prefix: prefixPaths,
  293. not_prefix: notPrefixPaths,
  294. tag: tags,
  295. not_tag: notTags,
  296. };
  297. return terms;
  298. }
  299. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  300. // So far, it determines by delegatorName passed by searchService.searchKeyword
  301. checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
  302. return delegatorName === SearchDelegatorName.DEFAULT;
  303. }
  304. /**
  305. * formatting result
  306. */
  307. async formatSearchResult(searchResult: ISearchResult<any>, delegatorName: SearchDelegatorName, user, userGroups): Promise<IFormattedSearchResult> {
  308. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  309. const data: IPageWithMeta<IPageSearchMeta>[] = searchResult.data.map((page) => {
  310. return {
  311. data: page,
  312. };
  313. });
  314. return {
  315. data,
  316. meta: searchResult.meta,
  317. };
  318. }
  319. /*
  320. * Format ElasticSearch result
  321. */
  322. const User = this.crowi.model('User');
  323. const result = {} as IFormattedSearchResult;
  324. // get page data
  325. const pageIds = searchResult.data.map((page) => { return page._id });
  326. const findPageResult = await findPageListByIds(pageIds, this.crowi);
  327. // set meta data
  328. result.meta = searchResult.meta;
  329. // set search result page data
  330. const pages: (IPageWithMeta<IPageSearchMeta> | null)[] = searchResult.data.map((data) => {
  331. const pageData = findPageResult.pages.find((pageData) => {
  332. return pageData.id === data._id;
  333. });
  334. if (pageData == null) {
  335. return null;
  336. }
  337. // add tags and seenUserCount to pageData
  338. pageData._doc.tags = data._source.tag_names;
  339. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  340. // serialize lastUpdateUser
  341. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  342. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  343. }
  344. // increment elasticSearchResult
  345. let elasticSearchResult;
  346. const highlightData = data._highlight;
  347. if (highlightData != null) {
  348. const snippet = this.canShowSnippet(pageData, user, userGroups)
  349. ? highlightData['body.en'] || highlightData['body.ja'] || highlightData['comments.en'] || highlightData['comments.ja']
  350. : null;
  351. const pathMatch = highlightData['path.en'] || highlightData['path.ja'];
  352. const isHtmlInPath = highlightData['path.en'] != null || highlightData['path.ja'] != null;
  353. elasticSearchResult = {
  354. snippet: snippet != null && typeof snippet[0] === 'string' ? filterXss.process(snippet) : null,
  355. highlightedPath: pathMatch != null && typeof pathMatch[0] === 'string' ? filterXss.process(pathMatch) : null,
  356. isHtmlInPath,
  357. };
  358. }
  359. // serialize creator
  360. if (pageData.creator != null && pageData.creator instanceof User) {
  361. pageData.creator = serializeUserSecurely(pageData.creator);
  362. }
  363. // generate pageMeta data
  364. const pageMeta = {
  365. bookmarkCount: data._source.bookmark_count || 0,
  366. elasticSearchResult,
  367. };
  368. return { data: pageData, meta: pageMeta };
  369. });
  370. result.data = pages.filter(nonNullable);
  371. return result;
  372. }
  373. canShowSnippet(pageData, user, userGroups): boolean {
  374. const Page = mongoose.model('Page') as unknown as PageModel;
  375. const testGrant = pageData.grant;
  376. const testGrantedUser = pageData.grantedUsers?.[0];
  377. const testGrantedGroup = pageData.grantedGroup;
  378. if (testGrant === Page.GRANT_RESTRICTED) {
  379. return false;
  380. }
  381. if (testGrant === Page.GRANT_OWNER) {
  382. if (user == null) return false;
  383. return user._id.toString() === testGrantedUser.toString();
  384. }
  385. if (testGrant === Page.GRANT_USER_GROUP) {
  386. if (userGroups == null) return false;
  387. return userGroups.map(id => id.toString()).includes(testGrantedGroup.toString());
  388. }
  389. return true;
  390. }
  391. }
  392. export default SearchService;