search.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. import xss from 'xss';
  2. import mongoose from 'mongoose';
  3. import { SearchDelegatorName } from '~/interfaces/named-query';
  4. import { IPageWithMeta } from '~/interfaces/page';
  5. import { IFormattedSearchResult, IPageSearchMeta, ISearchResult } from '~/interfaces/search';
  6. import loggerFactory from '~/utils/logger';
  7. import NamedQuery from '../models/named-query';
  8. import {
  9. SearchDelegator, SearchQueryParser, SearchResolver, ParsedQuery, SearchableData, QueryTerms,
  10. } from '../interfaces/search';
  11. import ElasticsearchDelegator from './search-delegator/elasticsearch';
  12. import PrivateLegacyPagesDelegator from './search-delegator/private-legacy-pages';
  13. import { PageModel } from '../models/page';
  14. import { serializeUserSecurely } from '../models/serializers/user-serializer';
  15. // eslint-disable-next-line no-unused-vars
  16. const logger = loggerFactory('growi:service:search');
  17. const nonNullable = <T>(value: T): value is NonNullable<T> => value != null;
  18. // options for filtering xss
  19. const filterXssOptions = {
  20. whiteList: {
  21. em: ['class'],
  22. },
  23. };
  24. const filterXss = new xss.FilterXSS(filterXssOptions);
  25. const normalizeQueryString = (_queryString: string): string => {
  26. let queryString = _queryString.trim();
  27. queryString = queryString.replace(/\s+/g, ' ');
  28. return queryString;
  29. };
  30. class SearchService implements SearchQueryParser, SearchResolver {
  31. crowi!: any
  32. configManager!: any
  33. isErrorOccuredOnHealthcheck: boolean | null
  34. isErrorOccuredOnSearching: boolean | null
  35. fullTextSearchDelegator: any & SearchDelegator
  36. nqDelegators: {[key in SearchDelegatorName]: SearchDelegator}
  37. constructor(crowi) {
  38. this.crowi = crowi;
  39. this.configManager = crowi.configManager;
  40. this.isErrorOccuredOnHealthcheck = null;
  41. this.isErrorOccuredOnSearching = null;
  42. try {
  43. this.fullTextSearchDelegator = this.generateFullTextSearchDelegator();
  44. this.nqDelegators = this.generateNQDelegators(this.fullTextSearchDelegator);
  45. logger.info('Succeeded to initialize search delegators');
  46. }
  47. catch (err) {
  48. logger.error(err);
  49. }
  50. if (this.isConfigured) {
  51. this.fullTextSearchDelegator.init();
  52. this.registerUpdateEvent();
  53. }
  54. }
  55. get isConfigured() {
  56. return this.fullTextSearchDelegator != null;
  57. }
  58. get isReachable() {
  59. return this.isConfigured && !this.isErrorOccuredOnHealthcheck && !this.isErrorOccuredOnSearching;
  60. }
  61. get isElasticsearchEnabled() {
  62. const uri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  63. return uri != null && uri.length > 0;
  64. }
  65. generateFullTextSearchDelegator() {
  66. logger.info('Initializing search delegator');
  67. if (this.isElasticsearchEnabled) {
  68. logger.info('Elasticsearch is enabled');
  69. return new ElasticsearchDelegator(this.configManager, this.crowi.socketIoService);
  70. }
  71. logger.info('No elasticsearch URI is specified so that full text search is disabled.');
  72. }
  73. generateNQDelegators(defaultDelegator: SearchDelegator): {[key in SearchDelegatorName]: SearchDelegator} {
  74. return {
  75. [SearchDelegatorName.DEFAULT]: defaultDelegator,
  76. [SearchDelegatorName.PRIVATE_LEGACY_PAGES]: new PrivateLegacyPagesDelegator(),
  77. };
  78. }
  79. registerUpdateEvent() {
  80. const pageEvent = this.crowi.event('page');
  81. pageEvent.on('create', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  82. pageEvent.on('update', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  83. pageEvent.on('delete', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  84. pageEvent.on('revert', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  85. pageEvent.on('deleteCompletely', this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator));
  86. pageEvent.on('syncDescendantsDelete', this.fullTextSearchDelegator.syncDescendantsPagesDeleted.bind(this.fullTextSearchDelegator));
  87. pageEvent.on('updateMany', this.fullTextSearchDelegator.syncPagesUpdated.bind(this.fullTextSearchDelegator));
  88. pageEvent.on('syncDescendantsUpdate', this.fullTextSearchDelegator.syncDescendantsPagesUpdated.bind(this.fullTextSearchDelegator));
  89. pageEvent.on('addSeenUsers', this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator));
  90. pageEvent.on('rename', () => {
  91. this.fullTextSearchDelegator.syncPageDeleted.bind(this.fullTextSearchDelegator);
  92. this.fullTextSearchDelegator.syncPageUpdated.bind(this.fullTextSearchDelegator);
  93. });
  94. const bookmarkEvent = this.crowi.event('bookmark');
  95. bookmarkEvent.on('create', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  96. bookmarkEvent.on('delete', this.fullTextSearchDelegator.syncBookmarkChanged.bind(this.fullTextSearchDelegator));
  97. const tagEvent = this.crowi.event('tag');
  98. tagEvent.on('update', this.fullTextSearchDelegator.syncTagChanged.bind(this.fullTextSearchDelegator));
  99. const commentEvent = this.crowi.event('comment');
  100. commentEvent.on('create', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  101. commentEvent.on('update', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  102. commentEvent.on('delete', this.fullTextSearchDelegator.syncCommentChanged.bind(this.fullTextSearchDelegator));
  103. }
  104. resetErrorStatus() {
  105. this.isErrorOccuredOnHealthcheck = false;
  106. this.isErrorOccuredOnSearching = false;
  107. }
  108. async reconnectClient() {
  109. logger.info('Try to reconnect...');
  110. this.fullTextSearchDelegator.initClient();
  111. try {
  112. await this.getInfoForHealth();
  113. logger.info('Reconnecting succeeded.');
  114. this.resetErrorStatus();
  115. }
  116. catch (err) {
  117. throw err;
  118. }
  119. }
  120. async getInfo() {
  121. try {
  122. return await this.fullTextSearchDelegator.getInfo();
  123. }
  124. catch (err) {
  125. logger.error(err);
  126. throw err;
  127. }
  128. }
  129. async getInfoForHealth() {
  130. try {
  131. const result = await this.fullTextSearchDelegator.getInfoForHealth();
  132. this.isErrorOccuredOnHealthcheck = false;
  133. return result;
  134. }
  135. catch (err) {
  136. logger.error(err);
  137. // switch error flag, `isErrorOccuredOnHealthcheck` to be `false`
  138. this.isErrorOccuredOnHealthcheck = true;
  139. throw err;
  140. }
  141. }
  142. async getInfoForAdmin() {
  143. return this.fullTextSearchDelegator.getInfoForAdmin();
  144. }
  145. async normalizeIndices() {
  146. return this.fullTextSearchDelegator.normalizeIndices();
  147. }
  148. async rebuildIndex() {
  149. return this.fullTextSearchDelegator.rebuildIndex();
  150. }
  151. async parseSearchQuery(_queryString: string): Promise<ParsedQuery> {
  152. const regexp = new RegExp(/^\[nq:.+\]$/g); // https://regex101.com/r/FzDUvT/1
  153. const replaceRegexp = new RegExp(/\[nq:|\]/g);
  154. const queryString = normalizeQueryString(_queryString);
  155. // when Normal Query
  156. if (!regexp.test(queryString)) {
  157. return { queryString, terms: this.parseQueryString(queryString) };
  158. }
  159. // when Named Query
  160. const name = queryString.replace(replaceRegexp, '');
  161. const nq = await NamedQuery.findOne({ name });
  162. // will delegate to full-text search
  163. if (nq == null) {
  164. return { queryString, terms: this.parseQueryString(queryString) };
  165. }
  166. const { aliasOf, delegatorName } = nq;
  167. let parsedQuery;
  168. if (aliasOf != null) {
  169. parsedQuery = { queryString: normalizeQueryString(aliasOf), terms: this.parseQueryString(aliasOf) };
  170. }
  171. if (delegatorName != null) {
  172. parsedQuery = { queryString, delegatorName };
  173. }
  174. return parsedQuery;
  175. }
  176. async resolve(parsedQuery: ParsedQuery): Promise<[SearchDelegator, SearchableData | null]> {
  177. const { queryString, terms, delegatorName } = parsedQuery;
  178. if (delegatorName != null) {
  179. const nqDelegator = this.nqDelegators[delegatorName];
  180. if (nqDelegator != null) {
  181. return [nqDelegator, null];
  182. }
  183. }
  184. const data = {
  185. queryString,
  186. terms: terms as QueryTerms,
  187. };
  188. return [this.nqDelegators[SearchDelegatorName.DEFAULT], data];
  189. }
  190. async searchKeyword(keyword: string, user, userGroups, searchOpts): Promise<[ISearchResult<unknown>, string]> {
  191. let parsedQuery;
  192. // parse
  193. try {
  194. parsedQuery = await this.parseSearchQuery(keyword);
  195. }
  196. catch (err) {
  197. logger.error('Error occurred while parseSearchQuery', err);
  198. throw err;
  199. }
  200. let delegator;
  201. let data;
  202. // resolve
  203. try {
  204. [delegator, data] = await this.resolve(parsedQuery);
  205. }
  206. catch (err) {
  207. logger.error('Error occurred while resolving search delegator', err);
  208. throw err;
  209. }
  210. return [await delegator.search(data, user, userGroups, searchOpts), delegator.name];
  211. }
  212. parseQueryString(queryString: string): QueryTerms {
  213. // terms
  214. const matchWords: string[] = [];
  215. const notMatchWords: string[] = [];
  216. const phraseWords: string[] = [];
  217. const notPhraseWords: string[] = [];
  218. const prefixPaths: string[] = [];
  219. const notPrefixPaths: string[] = [];
  220. const tags: string[] = [];
  221. const notTags: string[] = [];
  222. // First: Parse phrase keywords
  223. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  224. const phrases = queryString.match(phraseRegExp);
  225. if (phrases !== null) {
  226. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  227. phrases.forEach((phrase) => {
  228. phrase.trim();
  229. if (phrase.match(/^-/)) {
  230. notPhraseWords.push(phrase.replace(/^-/, ''));
  231. }
  232. else {
  233. phraseWords.push(phrase);
  234. }
  235. });
  236. }
  237. // Second: Parse other keywords (include minus keywords)
  238. queryString.split(' ').forEach((word) => {
  239. if (word === '') {
  240. return;
  241. }
  242. // https://regex101.com/r/pN9XfK/1
  243. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  244. // https://regex101.com/r/3qw9FQ/1
  245. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  246. if (matchNegative != null) {
  247. if (matchNegative[1] === 'prefix:') {
  248. notPrefixPaths.push(matchNegative[2]);
  249. }
  250. else if (matchNegative[1] === 'tag:') {
  251. notTags.push(matchNegative[2]);
  252. }
  253. else {
  254. notMatchWords.push(matchNegative[2]);
  255. }
  256. }
  257. else if (matchPositive != null) {
  258. if (matchPositive[1] === 'prefix:') {
  259. prefixPaths.push(matchPositive[2]);
  260. }
  261. else if (matchPositive[1] === 'tag:') {
  262. tags.push(matchPositive[2]);
  263. }
  264. else {
  265. matchWords.push(matchPositive[2]);
  266. }
  267. }
  268. });
  269. const terms = {
  270. match: matchWords,
  271. not_match: notMatchWords,
  272. phrase: phraseWords,
  273. not_phrase: notPhraseWords,
  274. prefix: prefixPaths,
  275. not_prefix: notPrefixPaths,
  276. tag: tags,
  277. not_tag: notTags,
  278. };
  279. return terms;
  280. }
  281. // TODO: optimize the way to check isFormattable e.g. check data schema of searchResult
  282. // So far, it determines by delegatorName passed by searchService.searchKeyword
  283. checkIsFormattable(searchResult, delegatorName: SearchDelegatorName): boolean {
  284. return delegatorName === SearchDelegatorName.DEFAULT;
  285. }
  286. /**
  287. * formatting result
  288. */
  289. async formatSearchResult(searchResult: ISearchResult<any>, delegatorName: SearchDelegatorName, user, userGroups): Promise<IFormattedSearchResult> {
  290. if (!this.checkIsFormattable(searchResult, delegatorName)) {
  291. const data: IPageWithMeta<IPageSearchMeta>[] = searchResult.data.map((page) => {
  292. return {
  293. data: page,
  294. };
  295. });
  296. return {
  297. data,
  298. meta: searchResult.meta,
  299. };
  300. }
  301. /*
  302. * Format ElasticSearch result
  303. */
  304. const Page = this.crowi.model('Page') as unknown as PageModel;
  305. const User = this.crowi.model('User');
  306. const result = {} as IFormattedSearchResult;
  307. // get page data
  308. const pageIds = searchResult.data.map((page) => { return page._id });
  309. const findPageResult = await Page.findListByPageIds(pageIds);
  310. // set meta data
  311. result.meta = searchResult.meta;
  312. // set search result page data
  313. const pages: (IPageWithMeta<IPageSearchMeta> | null)[] = searchResult.data.map((data) => {
  314. const pageData = findPageResult.pages.find((pageData) => {
  315. return pageData.id === data._id;
  316. });
  317. if (pageData == null) {
  318. return null;
  319. }
  320. // add tags and seenUserCount to pageData
  321. pageData._doc.tags = data._source.tag_names;
  322. pageData._doc.seenUserCount = (pageData.seenUsers && pageData.seenUsers.length) || 0;
  323. // serialize lastUpdateUser
  324. if (pageData.lastUpdateUser != null && pageData.lastUpdateUser instanceof User) {
  325. pageData.lastUpdateUser = serializeUserSecurely(pageData.lastUpdateUser);
  326. }
  327. // increment elasticSearchResult
  328. let elasticSearchResult;
  329. const highlightData = data._highlight;
  330. if (highlightData != null) {
  331. const snippet = this.canShowSnippet(pageData, user, userGroups) ? highlightData['body.en'] || highlightData['body.ja'] : null;
  332. const pathMatch = highlightData['path.en'] || highlightData['path.ja'];
  333. const isHtmlInPath = highlightData['path.en'] != null || highlightData['path.ja'] != null;
  334. elasticSearchResult = {
  335. snippet: typeof snippet === 'string' ? filterXss.process(snippet) : null,
  336. highlightedPath: typeof pathMatch === 'string' ? filterXss.process(pathMatch) : null,
  337. isHtmlInPath,
  338. };
  339. }
  340. // generate pageMeta data
  341. const pageMeta = {
  342. bookmarkCount: data._source.bookmark_count || 0,
  343. elasticSearchResult,
  344. };
  345. return { data: pageData, meta: pageMeta };
  346. });
  347. result.data = pages.filter(nonNullable);
  348. return result;
  349. }
  350. canShowSnippet(pageData, user, userGroups): boolean {
  351. const Page = mongoose.model('Page') as unknown as PageModel;
  352. const testGrant = pageData.grant;
  353. const testGrantedUser = pageData.grantedUsers?.[0];
  354. const testGrantedGroup = pageData.grantedGroup;
  355. if (testGrant === Page.GRANT_RESTRICTED) {
  356. return false;
  357. }
  358. if (testGrant === Page.GRANT_OWNER) {
  359. return user._id.toString() === testGrantedUser.toString();
  360. }
  361. if (testGrant === Page.GRANT_USER_GROUP) {
  362. return userGroups.map(d => d.toString()).include(testGrantedGroup.toString());
  363. }
  364. return true;
  365. }
  366. }
  367. export default SearchService;