search.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. import xss from 'xss';
  2. import { SearchDelegatorName } from '~/interfaces/named-query';
  3. import { IPageWithMeta } from '~/interfaces/page';
  4. import { IFormattedSearchResult, IPageSearchMeta, ISearchResult } from '~/interfaces/search';
  5. import loggerFactory from '~/utils/logger';
  6. import NamedQuery from '../models/named-query';
  7. import {
  8. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, SearchableData, QueryTerms,
  9. } from '../interfaces/search';
  10. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  11. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  12. import { PageModel } from '../models/page';
  13. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  14. // eslint-disable-next-line no-unused-vars
  15. const logger = loggerFactory('growi:service:search');
  16. const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
  17. // options for filtering xss
  18. const filterXssOptions = {
  19. whiteList: {
  20. em: ['class'],
  21. },
  22. };
  23. const filterXss = new xss.FilterXSS(filterXssOptions);
  24. const normalizeQueryString = (_queryString: string): string => {
  25. let queryString = _queryString.trim();
  26. queryString = queryString.replace(/\s+/g, ' ');
  27. return queryString;
  28. };
  29. class SearchService implements SearchQueryParser, SearchResolver {
  30. crowi!: any
  31. configManager!: any
  32. isErrorOccuredOnHealthcheck: boolean | null
  33. isErrorOccuredOnSearching: boolean | null
  34. fullTextSearchDelegator: any & SearchDelegator
  35. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator}
  36. constructor(crowi) {
  37. this.crowi = crowi;
  38. this.configManager = crowi.configManager;
  39. this.isErrorOccuredOnHealthcheck = null;
  40. this.isErrorOccuredOnSearching = null;
  41. try {
  42. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  43. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  44. logger.info('Succeeded to initialize search delegators');
  45. }
  46. catch (err) {
  47. logger.error(err);
  48. }
  49. if (this.isConfigured) {
  50. this.fullTextSearchDelegator.init();
  51. this.registerUpdateEvent();
  52. }
  53. }
  54. get isConfigured() {
  55. return this.fullTextSearchDelegator != null;
  56. }
  57. get isReachable() {
  58. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  59. }
  60. get isElasticsearchEnabled() {
  61. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  62. return uri != null && uri.length > 0;
  63. }
  64. generateFullTextSearchDelegator() {
  65. logger.info('Initializing search delegator');
  66. if (this.isElasticsearchEnabled) {
  67. logger.info('Elasticsearch is enabled');
  68. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  69. }
  70. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  71. }
  72. generateNQDelegators(defaultDelegator: SearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  73. return {
  74. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  75. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator(),
  76. };
  77. }
  78. registerUpdateEvent() {
  79. const pageEvent = this.crowi.event('page');
  80. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  81. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  82. pageEvent.on('delete', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  83. pageEvent.on('revert', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  84. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  85. pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
  86. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  87. pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  88. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  89. pageEvent.on('rename', () => {
  90. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
  91. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
  92. });
  93. const bookmarkEvent = this.crowi.event('bookmark');
  94. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  95. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  96. const tagEvent = this.crowi.event('tag');
  97. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  98. const commentEvent = this.crowi.event('comment');
  99. commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  100. commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  101. commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  102. }
  103. resetErrorStatus() {
  104. this.isErrorOccuredOnHealthcheck = false;
  105. this.isErrorOccuredOnSearching = false;
  106. }
  107. async reconnectClient() {
  108. logger.info('Try to reconnect...');
  109. this.fullTextSearchDelegator.initClient();
  110. try {
  111. await this.getInfoForHealth();
  112. logger.info('Reconnecting succeeded.');
  113. this.resetErrorStatus();
  114. }
  115. catch (err) {
  116. throw err;
  117. }
  118. }
  119. async getInfo() {
  120. try {
  121. return await this.fullTextSearchDelegator.getInfo();
  122. }
  123. catch (err) {
  124. logger.error(err);
  125. throw err;
  126. }
  127. }
  128. async getInfoForHealth() {
  129. try {
  130. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  131. this.isErrorOccuredOnHealthcheck = false;
  132. return result;
  133. }
  134. catch (err) {
  135. logger.error(err);
  136. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  137. this.isErrorOccuredOnHealthcheck = true;
  138. throw err;
  139. }
  140. }
  141. async getInfoForAdmin() {
  142. return this.fullTextSearchDelegator.getInfoForAdmin();
  143. }
  144. async normalizeIndices() {
  145. return this.fullTextSearchDelegator.normalizeIndices();
  146. }
  147. async rebuildIndex() {
  148. return this.fullTextSearchDelegator.rebuildIndex();
  149. }
  150. async parseSearchQuery(_queryString: string): Promise<ParsedQuery> {
  151. const regexp = new RegExp(/^\[nq:.+\]$/g); // https://regex101.com/r/FzDUvT/1
  152. const replaceRegexp = new RegExp(/\[nq:|\]/g);
  153. const queryString = normalizeQueryString(_queryString);
  154. // when Normal Query
  155. if (!regexp.test(queryString)) {
  156. return { queryString, terms: this.parseQueryString(queryString) };
  157. }
  158. // when Named Query
  159. const name = queryString.replace(replaceRegexp, '');
  160. const nq = await NamedQuery.findOne({ name });
  161. // will delegate to full-text search
  162. if (nq == null) {
  163. return { queryString, terms: this.parseQueryString(queryString) };
  164. }
  165. const { aliasOf, delegatorName } = nq;
  166. let parsedQuery;
  167. if (aliasOf != null) {
  168. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  169. }
  170. if (delegatorName != null) {
  171. parsedQuery = { queryString, delegatorName };
  172. }
  173. return parsedQuery;
  174. }
  175. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData | null]> {
  176. const { queryString, terms, delegatorName } = parsedQuery;
  177. if (delegatorName != null) {
  178. const nqDelegator = this.nqDelegators[delegatorName];
  179. if (nqDelegator != null) {
  180. return [nqDelegator, null];
  181. }
  182. }
  183. const data = {
  184. queryString,
  185. terms: terms as QueryTerms,
  186. };
  187. return [this.nqDelegators[SearchDelegatorName.DEFAULT], data];
  188. }
  189. async searchKeyword(keyword: string, user, userGroups, searchOpts): Promise<[ISearchResult<unknown>, string]> {
  190. let parsedQuery;
  191. // parse
  192. try {
  193. parsedQuery = await this.parseSearchQuery(keyword);
  194. }
  195. catch (err) {
  196. logger.error('Error occurred while parseSearchQuery', err);
  197. throw err;
  198. }
  199. let delegator;
  200. let data;
  201. // resolve
  202. try {
  203. [delegator, data] = await this.resolve(parsedQuery);
  204. }
  205. catch (err) {
  206. logger.error('Error occurred while resolving search delegator', err);
  207. throw err;
  208. }
  209. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name];
  210. }
  211. parseQueryString(queryString: string): QueryTerms {
  212. // terms
  213. const matchWords: string[] = [];
  214. const notMatchWords: string[] = [];
  215. const phraseWords: string[] = [];
  216. const notPhraseWords: string[] = [];
  217. const prefixPaths: string[] = [];
  218. const notPrefixPaths: string[] = [];
  219. const tags: string[] = [];
  220. const notTags: string[] = [];
  221. // First: Parse phrase keywords
  222. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  223. const phrases = queryString.match(phraseRegExp);
  224. if (phrases !== null) {
  225. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  226. phrases.forEach((phrase) => {
  227. phrase.trim();
  228. if (phrase.match(/^-/)) {
  229. notPhraseWords.push(phrase.replace(/^-/, ''));
  230. }
  231. else {
  232. phraseWords.push(phrase);
  233. }
  234. });
  235. }
  236. // Second: Parse other keywords (include minus keywords)
  237. queryString.split(' ').forEach((word) => {
  238. if (word === '') {
  239. return;
  240. }
  241. // https://regex101.com/r/pN9XfK/1
  242. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  243. // https://regex101.com/r/3qw9FQ/1
  244. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  245. if (matchNegative != null) {
  246. if (matchNegative[1] === 'prefix:') {
  247. notPrefixPaths.push(matchNegative[2]);
  248. }
  249. else if (matchNegative[1] === 'tag:') {
  250. notTags.push(matchNegative[2]);
  251. }
  252. else {
  253. notMatchWords.push(matchNegative[2]);
  254. }
  255. }
  256. else if (matchPositive != null) {
  257. if (matchPositive[1] === 'prefix:') {
  258. prefixPaths.push(matchPositive[2]);
  259. }
  260. else if (matchPositive[1] === 'tag:') {
  261. tags.push(matchPositive[2]);
  262. }
  263. else {
  264. matchWords.push(matchPositive[2]);
  265. }
  266. }
  267. });
  268. const terms = {
  269. match: matchWords,
  270. not_match: notMatchWords,
  271. phrase: phraseWords,
  272. not_phrase: notPhraseWords,
  273. prefix: prefixPaths,
  274. not_prefix: notPrefixPaths,
  275. tag: tags,
  276. not_tag: notTags,
  277. };
  278. return terms;
  279. }
  280. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  281. // So far, it determines by delegatorName passed by searchService.searchKeyword
  282. checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
  283. return delegatorName === SearchDelegatorName.DEFAULT;
  284. }
  285. /**
  286. * formatting result
  287. */
  288. async formatSearchResult(searchResult: ISearchResult<any>, delegatorName): Promise<IFormattedSearchResult> {
  289. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  290. const data: IPageWithMeta<IPageSearchMeta>[] = searchResult.data.map((page) => {
  291. return {
  292. data: page,
  293. };
  294. });
  295. return {
  296. data,
  297. meta: searchResult.meta,
  298. };
  299. }
  300. /*
  301. * Format ElasticSearch result
  302. */
  303. const Page = this.crowi.model('Page') as unknown as PageModel;
  304. const User = this.crowi.model('User');
  305. const result = {} as IFormattedSearchResult;
  306. // get page data
  307. const pageIds = searchResult.data.map((page) => { return page._id });
  308. const findPageResult = await Page.findListByPageIds(pageIds);
  309. // set meta data
  310. result.meta = searchResult.meta;
  311. // set search result page data
  312. const pages: (IPageWithMeta<IPageSearchMeta> | null)[] = searchResult.data.map((data) => {
  313. const pageData = findPageResult.pages.find((pageData) => {
  314. return pageData.id === data._id;
  315. });
  316. if (pageData == null) {
  317. return null;
  318. }
  319. // add tags and seenUserCount to pageData
  320. pageData._doc.tags = data._source.tag_names;
  321. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  322. // serialize lastUpdateUser
  323. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  324. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  325. }
  326. // serialize creator
  327. if (pageData.creator != null && pageData.creator instanceof User) {
  328. pageData.creator = serializeUserSecurely(pageData.creator);
  329. }
  330. // const data = searchResult.data.find((data) => {
  331. // return pageData.id === data._id;
  332. // });
  333. // increment elasticSearchResult
  334. let elasticSearchResult;
  335. const highlightData = data._highlight;
  336. if (highlightData != null) {
  337. const snippet = highlightData['body.en'] || highlightData['body.ja'] || highlightData['comments.en'] || highlightData['comments.ja'] || '';
  338. const pathMatch = highlightData['path.en'] || highlightData['path.ja'] || '';
  339. const isHtmlInPath = highlightData['path.en'] != null || highlightData['path.ja'] != null;
  340. elasticSearchResult = {
  341. snippet: filterXss.process(snippet),
  342. highlightedPath: filterXss.process(pathMatch),
  343. isHtmlInPath,
  344. };
  345. }
  346. // generate pageMeta data
  347. const pageMeta = {
  348. bookmarkCount: data._source.bookmark_count || 0,
  349. elasticSearchResult,
  350. };
  351. console.log('pageData:', pageData);
  352. console.log('pageMEta:', pageMeta);
  353. return { data: pageData, meta: pageMeta };
  354. });
  355. result.data = pages.filter(nonNullable);
  356. return result;
  357. }
  358. }
  359. export default SearchService;