search.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. import xss from 'xss';
  2. import { SearchDelegatorName } from '~/interfaces/named-query';
  3. import { IFormattedSearchResult } from '~/interfaces/search';
  4. import loggerFactory from '~/utils/logger';
  5. import NamedQuery from '../models/named-query';
  6. import {
  7. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, Result, MetaData, SearchableData, QueryTerms,
  8. } from '../interfaces/search';
  9. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  10. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  11. import { PageModel } from '../models/page';
  12. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  13. // eslint-disable-next-line no-unused-vars
  14. const logger = loggerFactory('growi:service:search');
  15. // options for filtering xss
  16. const filterXssOptions = {
  17. whiteList: {
  18. em: ['class'],
  19. },
  20. };
  21. const filterXss = new xss.FilterXSS(filterXssOptions);
  22. const normalizeQueryString = (_queryString: string): string => {
  23. let queryString = _queryString.trim();
  24. queryString = queryString.replace(/\s+/g, ' ');
  25. return queryString;
  26. };
  27. class SearchService implements SearchQueryParser, SearchResolver {
  28. crowi!: any
  29. configManager!: any
  30. isErrorOccuredOnHealthcheck: boolean | null
  31. isErrorOccuredOnSearching: boolean | null
  32. fullTextSearchDelegator: any & SearchDelegator
  33. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator}
  34. constructor(crowi) {
  35. this.crowi = crowi;
  36. this.configManager = crowi.configManager;
  37. this.isErrorOccuredOnHealthcheck = null;
  38. this.isErrorOccuredOnSearching = null;
  39. try {
  40. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  41. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  42. logger.info('Succeeded to initialize search delegators');
  43. }
  44. catch (err) {
  45. logger.error(err);
  46. }
  47. if (this.isConfigured) {
  48. this.fullTextSearchDelegator.init();
  49. this.registerUpdateEvent();
  50. }
  51. }
  52. get isConfigured() {
  53. return this.fullTextSearchDelegator != null;
  54. }
  55. get isReachable() {
  56. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  57. }
  58. get isElasticsearchEnabled() {
  59. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  60. return uri != null && uri.length > 0;
  61. }
  62. generateFullTextSearchDelegator() {
  63. logger.info('Initializing search delegator');
  64. if (this.isElasticsearchEnabled) {
  65. logger.info('Elasticsearch is enabled');
  66. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  67. }
  68. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  69. }
  70. generateNQDelegators(defaultDelegator: SearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  71. return {
  72. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  73. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator(),
  74. };
  75. }
  76. registerUpdateEvent() {
  77. const pageEvent = this.crowi.event('page');
  78. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  79. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  80. pageEvent.on('delete', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  81. pageEvent.on('revert', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  82. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  83. pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
  84. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  85. pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  86. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  87. pageEvent.on('rename', () => {
  88. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
  89. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
  90. });
  91. const bookmarkEvent = this.crowi.event('bookmark');
  92. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  93. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  94. const tagEvent = this.crowi.event('tag');
  95. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  96. const commentEvent = this.crowi.event('comment');
  97. commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  98. commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  99. commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  100. }
  101. resetErrorStatus() {
  102. this.isErrorOccuredOnHealthcheck = false;
  103. this.isErrorOccuredOnSearching = false;
  104. }
  105. async reconnectClient() {
  106. logger.info('Try to reconnect...');
  107. this.fullTextSearchDelegator.initClient();
  108. try {
  109. await this.getInfoForHealth();
  110. logger.info('Reconnecting succeeded.');
  111. this.resetErrorStatus();
  112. }
  113. catch (err) {
  114. throw err;
  115. }
  116. }
  117. async getInfo() {
  118. try {
  119. return await this.fullTextSearchDelegator.getInfo();
  120. }
  121. catch (err) {
  122. logger.error(err);
  123. throw err;
  124. }
  125. }
  126. async getInfoForHealth() {
  127. try {
  128. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  129. this.isErrorOccuredOnHealthcheck = false;
  130. return result;
  131. }
  132. catch (err) {
  133. logger.error(err);
  134. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  135. this.isErrorOccuredOnHealthcheck = true;
  136. throw err;
  137. }
  138. }
  139. async getInfoForAdmin() {
  140. return this.fullTextSearchDelegator.getInfoForAdmin();
  141. }
  142. async normalizeIndices() {
  143. return this.fullTextSearchDelegator.normalizeIndices();
  144. }
  145. async rebuildIndex() {
  146. return this.fullTextSearchDelegator.rebuildIndex();
  147. }
  148. async parseSearchQuery(_queryString: string): Promise<ParsedQuery> {
  149. const regexp = new RegExp(/^\[nq:.+\]$/g); // https://regex101.com/r/FzDUvT/1
  150. const replaceRegexp = new RegExp(/\[nq:|\]/g);
  151. const queryString = normalizeQueryString(_queryString);
  152. // when Normal Query
  153. if (!regexp.test(queryString)) {
  154. return { queryString, terms: this.parseQueryString(queryString) };
  155. }
  156. // when Named Query
  157. const name = queryString.replace(replaceRegexp, '');
  158. const nq = await NamedQuery.findOne({ name });
  159. // will delegate to full-text search
  160. if (nq == null) {
  161. return { queryString, terms: this.parseQueryString(queryString) };
  162. }
  163. const { aliasOf, delegatorName } = nq;
  164. let parsedQuery;
  165. if (aliasOf != null) {
  166. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  167. }
  168. if (delegatorName != null) {
  169. parsedQuery = { queryString, delegatorName };
  170. }
  171. return parsedQuery;
  172. }
  173. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData | null]> {
  174. const { queryString, terms, delegatorName } = parsedQuery;
  175. if (delegatorName != null) {
  176. const nqDelegator = this.nqDelegators[delegatorName];
  177. if (nqDelegator != null) {
  178. return [nqDelegator, null];
  179. }
  180. }
  181. const data = {
  182. queryString,
  183. terms: terms as QueryTerms,
  184. };
  185. return [this.nqDelegators[SearchDelegatorName.DEFAULT], data];
  186. }
  187. async searchKeyword(keyword: string, user, userGroups, searchOpts): Promise<[Result<any> & MetaData, string]> {
  188. let parsedQuery;
  189. // parse
  190. try {
  191. parsedQuery = await this.parseSearchQuery(keyword);
  192. }
  193. catch (err) {
  194. logger.error('Error occurred while parseSearchQuery', err);
  195. throw err;
  196. }
  197. let delegator;
  198. let data;
  199. // resolve
  200. try {
  201. [delegator, data] = await this.resolve(parsedQuery);
  202. }
  203. catch (err) {
  204. logger.error('Error occurred while resolving search delegator', err);
  205. throw err;
  206. }
  207. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name];
  208. }
  209. parseQueryString(queryString: string): QueryTerms {
  210. // terms
  211. const matchWords: string[] = [];
  212. const notMatchWords: string[] = [];
  213. const phraseWords: string[] = [];
  214. const notPhraseWords: string[] = [];
  215. const prefixPaths: string[] = [];
  216. const notPrefixPaths: string[] = [];
  217. const tags: string[] = [];
  218. const notTags: string[] = [];
  219. // First: Parse phrase keywords
  220. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  221. const phrases = queryString.match(phraseRegExp);
  222. if (phrases !== null) {
  223. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  224. phrases.forEach((phrase) => {
  225. phrase.trim();
  226. if (phrase.match(/^-/)) {
  227. notPhraseWords.push(phrase.replace(/^-/, ''));
  228. }
  229. else {
  230. phraseWords.push(phrase);
  231. }
  232. });
  233. }
  234. // Second: Parse other keywords (include minus keywords)
  235. queryString.split(' ').forEach((word) => {
  236. if (word === '') {
  237. return;
  238. }
  239. // https://regex101.com/r/pN9XfK/1
  240. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  241. // https://regex101.com/r/3qw9FQ/1
  242. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  243. if (matchNegative != null) {
  244. if (matchNegative[1] === 'prefix:') {
  245. notPrefixPaths.push(matchNegative[2]);
  246. }
  247. else if (matchNegative[1] === 'tag:') {
  248. notTags.push(matchNegative[2]);
  249. }
  250. else {
  251. notMatchWords.push(matchNegative[2]);
  252. }
  253. }
  254. else if (matchPositive != null) {
  255. if (matchPositive[1] === 'prefix:') {
  256. prefixPaths.push(matchPositive[2]);
  257. }
  258. else if (matchPositive[1] === 'tag:') {
  259. tags.push(matchPositive[2]);
  260. }
  261. else {
  262. matchWords.push(matchPositive[2]);
  263. }
  264. }
  265. });
  266. const terms = {
  267. match: matchWords,
  268. not_match: notMatchWords,
  269. phrase: phraseWords,
  270. not_phrase: notPhraseWords,
  271. prefix: prefixPaths,
  272. not_prefix: notPrefixPaths,
  273. tag: tags,
  274. not_tag: notTags,
  275. };
  276. return terms;
  277. }
  278. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  279. // So far, it determines by delegatorName passed by searchService.searchKeyword
  280. checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
  281. return delegatorName === SearchDelegatorName.DEFAULT;
  282. }
  283. /**
  284. * formatting result
  285. */
  286. async formatSearchResult(searchResult: Result<any> & MetaData, delegatorName): Promise<IFormattedSearchResult> {
  287. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  288. const data = searchResult.data.map((page) => {
  289. return {
  290. pageData: page,
  291. pageMeta: {},
  292. };
  293. });
  294. return {
  295. data,
  296. totalCount: data.length,
  297. meta: searchResult.meta,
  298. };
  299. }
  300. /*
  301. * Format ElasticSearch result
  302. */
  303. const Page = this.crowi.model('Page') as PageModel;
  304. const User = this.crowi.model('User');
  305. const result = {} as IFormattedSearchResult;
  306. // get page data
  307. const pageIds = searchResult.data.map((page) => { return page._id });
  308. const findPageResult = await Page.findListByPageIds(pageIds);
  309. // set meta data
  310. result.meta = searchResult.meta;
  311. result.totalCount = findPageResult.totalCount;
  312. // set search result page data
  313. result.data = searchResult.data.map((data) => {
  314. const pageData = findPageResult.pages.find((pageData) => {
  315. return pageData.id === data._id;
  316. });
  317. // add tags and seenUserCount to pageData
  318. pageData._doc.tags = data._source.tag_names;
  319. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  320. // serialize lastUpdateUser
  321. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  322. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  323. }
  324. // const data = searchResult.data.find((data) => {
  325. // return pageData.id === data._id;
  326. // });
  327. // increment elasticSearchResult
  328. let elasticSearchResult;
  329. const highlightData = data._highlight;
  330. if (highlightData != null) {
  331. const snippet = highlightData['body.en'] || highlightData['body.ja'] || '';
  332. const pathMatch = highlightData['path.en'] || highlightData['path.ja'] || '';
  333. const isHtmlInPath = highlightData['path.en'] != null || highlightData['path.ja'] != null;
  334. elasticSearchResult = {
  335. snippet: filterXss.process(snippet),
  336. highlightedPath: filterXss.process(pathMatch),
  337. isHtmlInPath,
  338. };
  339. }
  340. // generate pageMeta data
  341. const pageMeta = {
  342. bookmarkCount: data._source.bookmark_count || 0,
  343. elasticSearchResult,
  344. };
  345. return { pageData, pageMeta };
  346. });
  347. return result;
  348. }
  349. }
  350. export default SearchService;