search.ts 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. import RE2 from 're2';
  2. import xss from 'xss';
  3. import { SearchDelegatorName } from '~/interfaces/named-query';
  4. import NamedQuery from '../models/named-query';
  5. import {
  6. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, Result, MetaData, SearchableData, QueryTerms,
  7. } from '../interfaces/search';
  8. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  9. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  10. import loggerFactory from '~/utils/logger';
  11. import { PageModel } from '../models/page';
  12. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  13. import { IPageSearchResultData } from '~/interfaces/search';
  14. // eslint-disable-next-line no-unused-vars
  15. const logger = loggerFactory('growi:service:search');
  16. // options for filtering xss
  17. const filterXssOptions = {
  18. whiteList: {
  19. em: ['class'],
  20. },
  21. };
  22. const filterXss = new xss.FilterXSS(filterXssOptions);
  23. const normalizeQueryString = (_queryString: string): string => {
  24. let queryString = _queryString.trim();
  25. queryString = queryString.replace(/\s+/g, ' ');
  26. return queryString;
  27. };
  28. export type FormattedSearchResult = {
  29. data: IPageSearchResultData[]
  30. totalCount: number
  31. meta: {
  32. total: number
  33. took?: number
  34. count?: number
  35. }
  36. }
  37. class SearchService implements SearchQueryParser, SearchResolver {
  38. crowi!: any
  39. configManager!: any
  40. isErrorOccuredOnHealthcheck: boolean | null
  41. isErrorOccuredOnSearching: boolean | null
  42. fullTextSearchDelegator: any & SearchDelegator
  43. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator}
  44. constructor(crowi) {
  45. this.crowi = crowi;
  46. this.configManager = crowi.configManager;
  47. this.isErrorOccuredOnHealthcheck = null;
  48. this.isErrorOccuredOnSearching = null;
  49. try {
  50. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  51. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  52. logger.info('Succeeded to initialize search delegators');
  53. }
  54. catch (err) {
  55. logger.error(err);
  56. }
  57. if (this.isConfigured) {
  58. this.fullTextSearchDelegator.init();
  59. this.registerUpdateEvent();
  60. }
  61. }
  62. get isConfigured() {
  63. return this.fullTextSearchDelegator != null;
  64. }
  65. get isReachable() {
  66. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  67. }
  68. get isElasticsearchEnabled() {
  69. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  70. return uri != null && uri.length > 0;
  71. }
  72. generateFullTextSearchDelegator() {
  73. logger.info('Initializing search delegator');
  74. if (this.isElasticsearchEnabled) {
  75. logger.info('Elasticsearch is enabled');
  76. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  77. }
  78. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  79. }
  80. generateNQDelegators(defaultDelegator: SearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  81. return {
  82. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  83. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator(),
  84. };
  85. }
  86. registerUpdateEvent() {
  87. const pageEvent = this.crowi.event('page');
  88. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  89. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  90. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPagesDeletedCompletely.bind(this.fullTextSearchDelegator));
  91. pageEvent.on('delete', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  92. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  93. pageEvent.on('syncDescendants', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  94. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  95. const bookmarkEvent = this.crowi.event('bookmark');
  96. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  97. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  98. const tagEvent = this.crowi.event('tag');
  99. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  100. const commentEvent = this.crowi.event('comment');
  101. commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  102. commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  103. commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  104. }
  105. resetErrorStatus() {
  106. this.isErrorOccuredOnHealthcheck = false;
  107. this.isErrorOccuredOnSearching = false;
  108. }
  109. async reconnectClient() {
  110. logger.info('Try to reconnect...');
  111. this.fullTextSearchDelegator.initClient();
  112. try {
  113. await this.getInfoForHealth();
  114. logger.info('Reconnecting succeeded.');
  115. this.resetErrorStatus();
  116. }
  117. catch (err) {
  118. throw err;
  119. }
  120. }
  121. async getInfo() {
  122. try {
  123. return await this.fullTextSearchDelegator.getInfo();
  124. }
  125. catch (err) {
  126. logger.error(err);
  127. throw err;
  128. }
  129. }
  130. async getInfoForHealth() {
  131. try {
  132. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  133. this.isErrorOccuredOnHealthcheck = false;
  134. return result;
  135. }
  136. catch (err) {
  137. logger.error(err);
  138. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  139. this.isErrorOccuredOnHealthcheck = true;
  140. throw err;
  141. }
  142. }
  143. async getInfoForAdmin() {
  144. return this.fullTextSearchDelegator.getInfoForAdmin();
  145. }
  146. async normalizeIndices() {
  147. return this.fullTextSearchDelegator.normalizeIndices();
  148. }
  149. async rebuildIndex() {
  150. return this.fullTextSearchDelegator.rebuildIndex();
  151. }
  152. async parseSearchQuery(_queryString: string): Promise<ParsedQuery> {
  153. const regexp = new RE2(/^\[nq:.+\]$/g); // https://regex101.com/r/FzDUvT/1
  154. const replaceRegexp = new RE2(/\[nq:|\]/g);
  155. const queryString = normalizeQueryString(_queryString);
  156. // when Normal Query
  157. if (!regexp.test(queryString)) {
  158. return { queryString, terms: this.parseQueryString(queryString) };
  159. }
  160. // when Named Query
  161. const name = queryString.replace(replaceRegexp, '');
  162. const nq = await NamedQuery.findOne({ name });
  163. // will delegate to full-text search
  164. if (nq == null) {
  165. return { queryString, terms: this.parseQueryString(queryString) };
  166. }
  167. const { aliasOf, delegatorName } = nq;
  168. let parsedQuery;
  169. if (aliasOf != null) {
  170. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  171. }
  172. if (delegatorName != null) {
  173. parsedQuery = { queryString, delegatorName };
  174. }
  175. return parsedQuery;
  176. }
  177. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData | null]> {
  178. const { queryString, terms, delegatorName } = parsedQuery;
  179. if (delegatorName != null) {
  180. const nqDelegator = this.nqDelegators[delegatorName];
  181. if (nqDelegator != null) {
  182. return [nqDelegator, null];
  183. }
  184. }
  185. const data = {
  186. queryString,
  187. terms: terms as QueryTerms,
  188. };
  189. return [this.nqDelegators[SearchDelegatorName.DEFAULT], data];
  190. }
  191. async searchKeyword(keyword: string, user, userGroups, searchOpts): Promise<[Result<any> & MetaData, string]> {
  192. let parsedQuery;
  193. // parse
  194. try {
  195. parsedQuery = await this.parseSearchQuery(keyword);
  196. }
  197. catch (err) {
  198. logger.error('Error occurred while parseSearchQuery', err);
  199. throw err;
  200. }
  201. let delegator;
  202. let data;
  203. // resolve
  204. try {
  205. [delegator, data] = await this.resolve(parsedQuery);
  206. }
  207. catch (err) {
  208. logger.error('Error occurred while resolving search delegator', err);
  209. throw err;
  210. }
  211. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name];
  212. }
  213. parseQueryString(queryString: string): QueryTerms {
  214. // terms
  215. const matchWords: string[] = [];
  216. const notMatchWords: string[] = [];
  217. const phraseWords: string[] = [];
  218. const notPhraseWords: string[] = [];
  219. const prefixPaths: string[] = [];
  220. const notPrefixPaths: string[] = [];
  221. const tags: string[] = [];
  222. const notTags: string[] = [];
  223. // First: Parse phrase keywords
  224. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  225. const phrases = queryString.match(phraseRegExp);
  226. if (phrases !== null) {
  227. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  228. phrases.forEach((phrase) => {
  229. phrase.trim();
  230. if (phrase.match(/^-/)) {
  231. notPhraseWords.push(phrase.replace(/^-/, ''));
  232. }
  233. else {
  234. phraseWords.push(phrase);
  235. }
  236. });
  237. }
  238. // Second: Parse other keywords (include minus keywords)
  239. queryString.split(' ').forEach((word) => {
  240. if (word === '') {
  241. return;
  242. }
  243. // https://regex101.com/r/pN9XfK/1
  244. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  245. // https://regex101.com/r/3qw9FQ/1
  246. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  247. if (matchNegative != null) {
  248. if (matchNegative[1] === 'prefix:') {
  249. notPrefixPaths.push(matchNegative[2]);
  250. }
  251. else if (matchNegative[1] === 'tag:') {
  252. notTags.push(matchNegative[2]);
  253. }
  254. else {
  255. notMatchWords.push(matchNegative[2]);
  256. }
  257. }
  258. else if (matchPositive != null) {
  259. if (matchPositive[1] === 'prefix:') {
  260. prefixPaths.push(matchPositive[2]);
  261. }
  262. else if (matchPositive[1] === 'tag:') {
  263. tags.push(matchPositive[2]);
  264. }
  265. else {
  266. matchWords.push(matchPositive[2]);
  267. }
  268. }
  269. });
  270. const terms = {
  271. match: matchWords,
  272. not_match: notMatchWords,
  273. phrase: phraseWords,
  274. not_phrase: notPhraseWords,
  275. prefix: prefixPaths,
  276. not_prefix: notPrefixPaths,
  277. tag: tags,
  278. not_tag: notTags,
  279. };
  280. return terms;
  281. }
  282. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  283. // So far, it determines by delegatorName passed by searchService.searchKeyword
  284. checkIsFormattable(searchResult, delegatorName): boolean {
  285. return delegatorName === SearchDelegatorName.DEFAULT;
  286. }
  287. /**
  288. * formatting result
  289. */
  290. async formatSearchResult(searchResult: Result<any> & MetaData, delegatorName): Promise<FormattedSearchResult> {
  291. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  292. const data = searchResult.data.map((page) => {
  293. return {
  294. pageData: page,
  295. pageMeta: {},
  296. };
  297. });
  298. return {
  299. data,
  300. totalCount: data.length,
  301. meta: searchResult.meta,
  302. };
  303. }
  304. /*
  305. * Format ElasticSearch result
  306. */
  307. const Page = this.crowi.model('Page') as PageModel;
  308. const User = this.crowi.model('User');
  309. const result = {} as FormattedSearchResult;
  310. // get page data
  311. const pageIds = searchResult.data.map((page) => { return page._id });
  312. const findPageResult = await Page.findListByPageIds(pageIds);
  313. // set meta data
  314. result.meta = searchResult.meta;
  315. result.totalCount = findPageResult.totalCount;
  316. // set search result page data
  317. result.data = searchResult.data.map((data) => {
  318. const pageData = findPageResult.pages.find((pageData) => {
  319. return pageData.id === data._id;
  320. });
  321. // add tags and seenUserCount to pageData
  322. pageData._doc.tags = data._source.tag_names;
  323. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  324. // serialize lastUpdateUser
  325. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  326. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  327. }
  328. // increment elasticSearchResult
  329. let elasticSearchResult;
  330. const highlightData = data._highlight;
  331. if (highlightData != null) {
  332. const snippet = highlightData['body.en'] || highlightData['body.ja'] || '';
  333. const pathMatch = highlightData['path.en'] || highlightData['path.ja'] || '';
  334. elasticSearchResult = {
  335. snippet: filterXss.process(snippet),
  336. highlightedPath: filterXss.process(pathMatch),
  337. };
  338. }
  339. // generate pageMeta data
  340. const pageMeta = {
  341. bookmarkCount: data._source.bookmark_count || 0,
  342. elasticSearchResult,
  343. };
  344. return { pageData, pageMeta };
  345. });
  346. return result;
  347. }
  348. }
  349. export default SearchService;