search.js 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936
  1. /**
  2. * Search
  3. */
  4. const elasticsearch = require('elasticsearch');
  5. const debug = require('debug')('growi:lib:search');
  6. const logger = require('@alias/logger')('growi:lib:search');
  7. const { URL } = require('url');
  8. const {
  9. Writable, Transform,
  10. } = require('stream');
  11. const streamToPromise = require('stream-to-promise');
  12. const { createBatchStream } = require('./batch-stream');
  13. const BULK_REINDEX_SIZE = 100;
  14. function SearchClient(crowi, esUri) {
  15. this.DEFAULT_OFFSET = 0;
  16. this.DEFAULT_LIMIT = 50;
  17. this.esNodeName = '-';
  18. this.esNodeNames = [];
  19. this.esVersion = 'unknown';
  20. this.esVersions = [];
  21. this.esPlugin = [];
  22. this.esPlugins = [];
  23. this.esUri = esUri;
  24. this.crowi = crowi;
  25. this.searchEvent = crowi.event('search');
  26. this.configManager = this.crowi.configManager;
  27. // In Elasticsearch RegExp, we don't need to used ^ and $.
  28. // Ref: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-regexp-query.html#_standard_operators
  29. this.queries = {
  30. PORTAL: {
  31. regexp: {
  32. 'path.raw': '.*/',
  33. },
  34. },
  35. PUBLIC: {
  36. regexp: {
  37. 'path.raw': '.*[^/]',
  38. },
  39. },
  40. USER: {
  41. prefix: {
  42. 'path.raw': '/user/',
  43. },
  44. },
  45. };
  46. this.client = null;
  47. this.indexName = null;
  48. this.aliasName = null;
  49. this.mappingFile = `${crowi.resourceDir}search/mappings.json`;
  50. try {
  51. this.initClient();
  52. this.registerUpdateEvent();
  53. }
  54. catch (err) {
  55. logger.error(err);
  56. }
  57. }
  58. SearchClient.prototype.initClient = function() {
  59. let indexName = 'crowi';
  60. let host = this.esUri;
  61. let httpAuth = '';
  62. const isSearchboxSsl = this.configManager.getConfig('crowi', 'app:searchboxSslUrl') != null;
  63. const url = new URL(this.esUri);
  64. if (url.pathname !== '/') {
  65. host = isSearchboxSsl
  66. ? `${url.protocol}//${url.auth}${url.hostname}:443` // use 443 when Searchbox
  67. : `${url.protocol}//${url.host}`;
  68. indexName = url.pathname.substring(1); // omit heading slash
  69. if (url.auth != null) {
  70. httpAuth = url.auth;
  71. }
  72. }
  73. this.client = new elasticsearch.Client({
  74. host,
  75. httpAuth,
  76. requestTimeout: 5000,
  77. // log: 'debug',
  78. });
  79. this.indexName = indexName;
  80. this.aliasName = `${this.indexName}-alias`;
  81. };
  82. SearchClient.prototype.getInfo = function() {
  83. return this.client.info({});
  84. };
  85. SearchClient.prototype.checkESVersion = async function() {
  86. try {
  87. const nodes = await this.client.nodes.info();
  88. if (!nodes._nodes || !nodes.nodes) {
  89. throw new Error('no nodes info');
  90. }
  91. for (const [nodeName, nodeInfo] of Object.entries(nodes.nodes)) {
  92. this.esNodeName = nodeName;
  93. this.esNodeNames.push(nodeName);
  94. this.esVersion = nodeInfo.version;
  95. this.esVersions.push(nodeInfo.version);
  96. this.esPlugin = nodeInfo.plugins;
  97. this.esPlugins.push(nodeInfo.plugins);
  98. }
  99. }
  100. catch (error) {
  101. logger.error('es check version error:', error);
  102. }
  103. };
  104. SearchClient.prototype.registerUpdateEvent = function() {
  105. const pageEvent = this.crowi.event('page');
  106. pageEvent.on('create', this.syncPageUpdated.bind(this));
  107. pageEvent.on('update', this.syncPageUpdated.bind(this));
  108. pageEvent.on('delete', this.syncPageDeleted.bind(this));
  109. const bookmarkEvent = this.crowi.event('bookmark');
  110. bookmarkEvent.on('create', this.syncBookmarkChanged.bind(this));
  111. bookmarkEvent.on('delete', this.syncBookmarkChanged.bind(this));
  112. const tagEvent = this.crowi.event('tag');
  113. tagEvent.on('update', this.syncTagChanged.bind(this));
  114. };
  115. SearchClient.prototype.shouldIndexed = function(page) {
  116. return page.creator != null && page.revision != null && page.redirectTo == null;
  117. };
  118. SearchClient.prototype.initIndices = async function() {
  119. // FIXME: comment out experimentally
  120. // await this.checkESVersion();
  121. const { client, indexName, aliasName } = this;
  122. const tmpIndexName = `${indexName}-tmp`;
  123. // remove tmp index
  124. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  125. if (isExistsTmpIndex) {
  126. await client.indices.delete({ index: tmpIndexName });
  127. }
  128. // create index
  129. const isExistsIndex = await client.indices.exists({ index: indexName });
  130. if (!isExistsIndex) {
  131. await this.createIndex(indexName);
  132. }
  133. // create alias
  134. const isExistsAlias = await client.indices.existsAlias({ name: aliasName, index: indexName });
  135. if (!isExistsAlias) {
  136. await client.indices.putAlias({
  137. name: aliasName,
  138. index: indexName,
  139. });
  140. }
  141. };
  142. SearchClient.prototype.createIndex = async function(index) {
  143. const body = require(this.mappingFile);
  144. return this.client.indices.create({ index, body });
  145. };
  146. SearchClient.prototype.buildIndex = async function(uri) {
  147. await this.initIndices();
  148. const { client, indexName } = this;
  149. const aliasName = `${indexName}-alias`;
  150. const tmpIndexName = `${indexName}-tmp`;
  151. // reindex to tmp index
  152. await this.createIndex(tmpIndexName);
  153. await client.reindex({
  154. waitForCompletion: false,
  155. body: {
  156. source: { index: indexName },
  157. dest: { index: tmpIndexName },
  158. },
  159. });
  160. // update alias
  161. await client.indices.updateAliases({
  162. body: {
  163. actions: [
  164. { add: { alias: aliasName, index: tmpIndexName } },
  165. { remove: { alias: aliasName, index: indexName } },
  166. ],
  167. },
  168. });
  169. // flush index
  170. await client.indices.delete({
  171. index: indexName,
  172. });
  173. await this.createIndex(indexName);
  174. await this.addAllPages();
  175. // update alias
  176. await client.indices.updateAliases({
  177. body: {
  178. actions: [
  179. { add: { alias: aliasName, index: indexName } },
  180. { remove: { alias: aliasName, index: tmpIndexName } },
  181. ],
  182. },
  183. });
  184. // remove tmp index
  185. await client.indices.delete({ index: tmpIndexName });
  186. };
  187. /**
  188. * generate object that is related to page.grant*
  189. */
  190. function generateDocContentsRelatedToRestriction(page) {
  191. let grantedUserIds = null;
  192. if (page.grantedUsers != null && page.grantedUsers.length > 0) {
  193. grantedUserIds = page.grantedUsers.map((user) => {
  194. const userId = (user._id == null) ? user : user._id;
  195. return userId.toString();
  196. });
  197. }
  198. let grantedGroupId = null;
  199. if (page.grantedGroup != null) {
  200. const groupId = (page.grantedGroup._id == null) ? page.grantedGroup : page.grantedGroup._id;
  201. grantedGroupId = groupId.toString();
  202. }
  203. return {
  204. grant: page.grant,
  205. granted_users: grantedUserIds,
  206. granted_group: grantedGroupId,
  207. };
  208. }
  209. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  210. if (!Array.isArray(body)) {
  211. throw new Error('Body must be an array.');
  212. }
  213. const command = {
  214. index: {
  215. _index: this.indexName,
  216. _type: 'pages',
  217. _id: page._id.toString(),
  218. },
  219. };
  220. const bookmarkCount = page.bookmarkCount || 0;
  221. let document = {
  222. path: page.path,
  223. body: page.revision.body,
  224. username: page.creator.username,
  225. comment_count: page.commentCount,
  226. bookmark_count: bookmarkCount,
  227. like_count: page.liker.length || 0,
  228. created_at: page.createdAt,
  229. updated_at: page.updatedAt,
  230. tag_names: page.tagNames,
  231. };
  232. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  233. body.push(command);
  234. body.push(document);
  235. };
  236. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  237. if (!Array.isArray(body)) {
  238. throw new Error('Body must be an array.');
  239. }
  240. const command = {
  241. delete: {
  242. _index: this.indexName,
  243. _type: 'pages',
  244. _id: page._id.toString(),
  245. },
  246. };
  247. body.push(command);
  248. };
  249. SearchClient.prototype.addAllPages = async function() {
  250. const Page = this.crowi.model('Page');
  251. return this.updateOrInsertPages(() => Page.find(), true);
  252. };
  253. SearchClient.prototype.updateOrInsertPageById = async function(pageId) {
  254. const Page = this.crowi.model('Page');
  255. return this.updateOrInsertPages(() => Page.findById(pageId));
  256. };
  257. /**
  258. * @param {function} queryFactory factory method to generate a Mongoose Query instance
  259. */
  260. SearchClient.prototype.updateOrInsertPages = async function(queryFactory, isEmittingProgressEvent = false) {
  261. const Page = this.crowi.model('Page');
  262. const { PageQueryBuilder } = Page;
  263. const Bookmark = this.crowi.model('Bookmark');
  264. const PageTagRelation = this.crowi.model('PageTagRelation');
  265. const searchEvent = this.searchEvent;
  266. // prepare functions invoked from custom streams
  267. const prepareBodyForCreate = this.prepareBodyForCreate.bind(this);
  268. const shouldIndexed = this.shouldIndexed.bind(this);
  269. const bulkWrite = this.client.bulk.bind(this.client);
  270. const findQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  271. const countQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  272. const totalCount = await countQuery.count();
  273. const readStream = findQuery
  274. // populate data which will be referenced by prepareBodyForCreate()
  275. .populate([
  276. { path: 'creator', model: 'User', select: 'username' },
  277. { path: 'revision', model: 'Revision', select: 'body' },
  278. ])
  279. .snapshot()
  280. .lean()
  281. .cursor();
  282. let skipped = 0;
  283. const thinOutStream = new Transform({
  284. objectMode: true,
  285. async transform(doc, encoding, callback) {
  286. if (shouldIndexed(doc)) {
  287. this.push(doc);
  288. }
  289. else {
  290. skipped++;
  291. }
  292. callback();
  293. },
  294. });
  295. const batchStream = createBatchStream(BULK_REINDEX_SIZE);
  296. const appendBookmarkCountStream = new Transform({
  297. objectMode: true,
  298. async transform(chunk, encoding, callback) {
  299. const pageIds = chunk.map(doc => doc._id);
  300. const idToCountMap = await Bookmark.getPageIdToCountMap(pageIds);
  301. const idsHavingCount = Object.keys(idToCountMap);
  302. // append count
  303. chunk
  304. .filter(doc => idsHavingCount.includes(doc._id.toString()))
  305. .forEach((doc) => {
  306. // append count from idToCountMap
  307. doc.bookmarkCount = idToCountMap[doc._id.toString()];
  308. });
  309. this.push(chunk);
  310. callback();
  311. },
  312. });
  313. const appendTagNamesStream = new Transform({
  314. objectMode: true,
  315. async transform(chunk, encoding, callback) {
  316. const pageIds = chunk.map(doc => doc._id);
  317. const idToTagNamesMap = await PageTagRelation.getIdToTagNamesMap(pageIds);
  318. const idsHavingTagNames = Object.keys(idToTagNamesMap);
  319. // append tagNames
  320. chunk
  321. .filter(doc => idsHavingTagNames.includes(doc._id.toString()))
  322. .forEach((doc) => {
  323. // append tagName from idToTagNamesMap
  324. doc.tagNames = idToTagNamesMap[doc._id.toString()];
  325. });
  326. this.push(chunk);
  327. callback();
  328. },
  329. });
  330. let count = 0;
  331. const writeStream = new Writable({
  332. objectMode: true,
  333. async write(batch, encoding, callback) {
  334. const body = [];
  335. batch.forEach(doc => prepareBodyForCreate(body, doc));
  336. try {
  337. const res = await bulkWrite({
  338. body,
  339. requestTimeout: Infinity,
  340. });
  341. count += (res.items || []).length;
  342. logger.info(`Adding pages progressing: (count=${count}, errors=${res.errors}, took=${res.took}ms)`);
  343. if (isEmittingProgressEvent) {
  344. searchEvent.emit('addPageProgress', totalCount, count, skipped);
  345. }
  346. }
  347. catch (err) {
  348. logger.error('addAllPages error on add anyway: ', err);
  349. }
  350. callback();
  351. },
  352. final(callback) {
  353. logger.info(`Adding pages has terminated: (totalCount=${totalCount}, skipped=${skipped})`);
  354. if (isEmittingProgressEvent) {
  355. searchEvent.emit('finishAddPage', totalCount, count, skipped);
  356. }
  357. callback();
  358. },
  359. });
  360. readStream
  361. .pipe(thinOutStream)
  362. .pipe(batchStream)
  363. .pipe(appendBookmarkCountStream)
  364. .pipe(appendTagNamesStream)
  365. .pipe(writeStream);
  366. return streamToPromise(writeStream);
  367. };
  368. SearchClient.prototype.deletePages = function(pages) {
  369. const self = this;
  370. const body = [];
  371. pages.map((page) => {
  372. self.prepareBodyForDelete(body, page);
  373. return;
  374. });
  375. logger.debug('deletePages(): Sending Request to ES', body);
  376. return this.client.bulk({
  377. body,
  378. });
  379. };
  380. /**
  381. * search returning type:
  382. * {
  383. * meta: { total: Integer, results: Integer},
  384. * data: [ pages ...],
  385. * }
  386. */
  387. SearchClient.prototype.search = async function(query) {
  388. // for debug
  389. if (process.env.NODE_ENV === 'development') {
  390. const result = await this.client.indices.validateQuery({
  391. explain: true,
  392. body: {
  393. query: query.body.query,
  394. },
  395. });
  396. logger.debug('ES returns explanations: ', result.explanations);
  397. }
  398. const result = await this.client.search(query);
  399. // for debug
  400. logger.debug('ES result: ', result);
  401. return {
  402. meta: {
  403. took: result.took,
  404. total: result.hits.total,
  405. results: result.hits.hits.length,
  406. },
  407. data: result.hits.hits.map((elm) => {
  408. return { _id: elm._id, _score: elm._score, _source: elm._source };
  409. }),
  410. };
  411. };
  412. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option) {
  413. // getting path by default is almost for debug
  414. let fields = ['path', 'bookmark_count', 'tag_names'];
  415. if (option) {
  416. fields = option.fields || fields;
  417. }
  418. // default is only id field, sorted by updated_at
  419. const query = {
  420. index: this.aliasName,
  421. type: 'pages',
  422. body: {
  423. sort: [{ updated_at: { order: 'desc' } }],
  424. query: {}, // query
  425. _source: fields,
  426. },
  427. };
  428. this.appendResultSize(query);
  429. return query;
  430. };
  431. SearchClient.prototype.createSearchQuerySortedByScore = function(option) {
  432. let fields = ['path', 'bookmark_count', 'tag_names'];
  433. if (option) {
  434. fields = option.fields || fields;
  435. }
  436. // sort by score
  437. const query = {
  438. index: this.aliasName,
  439. type: 'pages',
  440. body: {
  441. sort: [{ _score: { order: 'desc' } }],
  442. query: {}, // query
  443. _source: fields,
  444. },
  445. };
  446. this.appendResultSize(query);
  447. return query;
  448. };
  449. SearchClient.prototype.appendResultSize = function(query, from, size) {
  450. query.from = from || this.DEFAULT_OFFSET;
  451. query.size = size || this.DEFAULT_LIMIT;
  452. };
  453. SearchClient.prototype.initializeBoolQuery = function(query) {
  454. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  455. if (!query.body.query.bool) {
  456. query.body.query.bool = {};
  457. }
  458. const isInitialized = (query) => { return !!query && Array.isArray(query) };
  459. if (!isInitialized(query.body.query.bool.filter)) {
  460. query.body.query.bool.filter = [];
  461. }
  462. if (!isInitialized(query.body.query.bool.must)) {
  463. query.body.query.bool.must = [];
  464. }
  465. if (!isInitialized(query.body.query.bool.must_not)) {
  466. query.body.query.bool.must_not = [];
  467. }
  468. return query;
  469. };
  470. SearchClient.prototype.appendCriteriaForQueryString = function(query, queryString) {
  471. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  472. // parse
  473. const parsedKeywords = this.parseQueryString(queryString);
  474. if (parsedKeywords.match.length > 0) {
  475. const q = {
  476. multi_match: {
  477. query: parsedKeywords.match.join(' '),
  478. type: 'most_fields',
  479. fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
  480. },
  481. };
  482. query.body.query.bool.must.push(q);
  483. }
  484. if (parsedKeywords.not_match.length > 0) {
  485. const q = {
  486. multi_match: {
  487. query: parsedKeywords.not_match.join(' '),
  488. fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
  489. operator: 'or',
  490. },
  491. };
  492. query.body.query.bool.must_not.push(q);
  493. }
  494. if (parsedKeywords.phrase.length > 0) {
  495. const phraseQueries = [];
  496. parsedKeywords.phrase.forEach((phrase) => {
  497. phraseQueries.push({
  498. multi_match: {
  499. query: phrase, // each phrase is quoteted words
  500. type: 'phrase',
  501. fields: [
  502. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  503. 'path.raw^2',
  504. 'body',
  505. ],
  506. },
  507. });
  508. });
  509. query.body.query.bool.must.push(phraseQueries);
  510. }
  511. if (parsedKeywords.not_phrase.length > 0) {
  512. const notPhraseQueries = [];
  513. parsedKeywords.not_phrase.forEach((phrase) => {
  514. notPhraseQueries.push({
  515. multi_match: {
  516. query: phrase, // each phrase is quoteted words
  517. type: 'phrase',
  518. fields: [
  519. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  520. 'path.raw^2',
  521. 'body',
  522. ],
  523. },
  524. });
  525. });
  526. query.body.query.bool.must_not.push(notPhraseQueries);
  527. }
  528. if (parsedKeywords.prefix.length > 0) {
  529. const queries = parsedKeywords.prefix.map((path) => {
  530. return { prefix: { 'path.raw': path } };
  531. });
  532. query.body.query.bool.filter.push({ bool: { should: queries } });
  533. }
  534. if (parsedKeywords.not_prefix.length > 0) {
  535. const queries = parsedKeywords.not_prefix.map((path) => {
  536. return { prefix: { 'path.raw': path } };
  537. });
  538. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  539. }
  540. if (parsedKeywords.tag.length > 0) {
  541. const queries = parsedKeywords.tag.map((tag) => {
  542. return { term: { tag_names: tag } };
  543. });
  544. query.body.query.bool.filter.push({ bool: { must: queries } });
  545. }
  546. if (parsedKeywords.not_tag.length > 0) {
  547. const queries = parsedKeywords.not_tag.map((tag) => {
  548. return { term: { tag_names: tag } };
  549. });
  550. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  551. }
  552. };
  553. SearchClient.prototype.filterPagesByViewer = async function(query, user, userGroups) {
  554. const showPagesRestrictedByOwner = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByOwner');
  555. const showPagesRestrictedByGroup = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByGroup');
  556. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  557. const Page = this.crowi.model('Page');
  558. const {
  559. GRANT_PUBLIC, GRANT_RESTRICTED, GRANT_SPECIFIED, GRANT_OWNER, GRANT_USER_GROUP,
  560. } = Page;
  561. const grantConditions = [
  562. { term: { grant: GRANT_PUBLIC } },
  563. ];
  564. // ensure to hit to GRANT_RESTRICTED pages that the user specified at own
  565. if (user != null) {
  566. grantConditions.push(
  567. {
  568. bool: {
  569. must: [
  570. { term: { grant: GRANT_RESTRICTED } },
  571. { term: { granted_users: user._id.toString() } },
  572. ],
  573. },
  574. },
  575. );
  576. }
  577. if (showPagesRestrictedByOwner) {
  578. grantConditions.push(
  579. { term: { grant: GRANT_SPECIFIED } },
  580. { term: { grant: GRANT_OWNER } },
  581. );
  582. }
  583. else if (user != null) {
  584. grantConditions.push(
  585. {
  586. bool: {
  587. must: [
  588. { term: { grant: GRANT_SPECIFIED } },
  589. { term: { granted_users: user._id.toString() } },
  590. ],
  591. },
  592. },
  593. {
  594. bool: {
  595. must: [
  596. { term: { grant: GRANT_OWNER } },
  597. { term: { granted_users: user._id.toString() } },
  598. ],
  599. },
  600. },
  601. );
  602. }
  603. if (showPagesRestrictedByGroup) {
  604. grantConditions.push(
  605. { term: { grant: GRANT_USER_GROUP } },
  606. );
  607. }
  608. else if (userGroups != null && userGroups.length > 0) {
  609. const userGroupIds = userGroups.map((group) => { return group._id.toString() });
  610. grantConditions.push(
  611. {
  612. bool: {
  613. must: [
  614. { term: { grant: GRANT_USER_GROUP } },
  615. { terms: { granted_group: userGroupIds } },
  616. ],
  617. },
  618. },
  619. );
  620. }
  621. query.body.query.bool.filter.push({ bool: { should: grantConditions } });
  622. };
  623. SearchClient.prototype.filterPortalPages = function(query) {
  624. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  625. query.body.query.bool.must_not.push(this.queries.USER);
  626. query.body.query.bool.filter.push(this.queries.PORTAL);
  627. };
  628. SearchClient.prototype.filterPublicPages = function(query) {
  629. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  630. query.body.query.bool.must_not.push(this.queries.USER);
  631. query.body.query.bool.filter.push(this.queries.PUBLIC);
  632. };
  633. SearchClient.prototype.filterUserPages = function(query) {
  634. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  635. query.body.query.bool.filter.push(this.queries.USER);
  636. };
  637. SearchClient.prototype.filterPagesByType = function(query, type) {
  638. const Page = this.crowi.model('Page');
  639. switch (type) {
  640. case Page.TYPE_PORTAL:
  641. return this.filterPortalPages(query);
  642. case Page.TYPE_PUBLIC:
  643. return this.filterPublicPages(query);
  644. case Page.TYPE_USER:
  645. return this.filterUserPages(query);
  646. default:
  647. return query;
  648. }
  649. };
  650. SearchClient.prototype.appendFunctionScore = function(query, queryString) {
  651. const User = this.crowi.model('User');
  652. const count = User.count({}) || 1;
  653. const minScore = queryString.length * 0.1 - 1; // increase with length
  654. logger.debug('min_score: ', minScore);
  655. query.body.query = {
  656. function_score: {
  657. query: { ...query.body.query },
  658. // // disable min_score -- 2019.02.28 Yuki Takei
  659. // // more precise adjustment is needed...
  660. // min_score: minScore,
  661. field_value_factor: {
  662. field: 'bookmark_count',
  663. modifier: 'log1p',
  664. factor: 10000 / count,
  665. missing: 0,
  666. },
  667. boost_mode: 'sum',
  668. },
  669. };
  670. };
  671. SearchClient.prototype.searchKeyword = async function(queryString, user, userGroups, option) {
  672. const from = option.offset || null;
  673. const size = option.limit || null;
  674. const type = option.type || null;
  675. const query = this.createSearchQuerySortedByScore();
  676. this.appendCriteriaForQueryString(query, queryString);
  677. this.filterPagesByType(query, type);
  678. await this.filterPagesByViewer(query, user, userGroups);
  679. this.appendResultSize(query, from, size);
  680. this.appendFunctionScore(query, queryString);
  681. return this.search(query);
  682. };
  683. SearchClient.prototype.parseQueryString = function(queryString) {
  684. const matchWords = [];
  685. const notMatchWords = [];
  686. const phraseWords = [];
  687. const notPhraseWords = [];
  688. const prefixPaths = [];
  689. const notPrefixPaths = [];
  690. const tags = [];
  691. const notTags = [];
  692. queryString.trim();
  693. queryString = queryString.replace(/\s+/g, ' '); // eslint-disable-line no-param-reassign
  694. // First: Parse phrase keywords
  695. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  696. const phrases = queryString.match(phraseRegExp);
  697. if (phrases !== null) {
  698. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  699. phrases.forEach((phrase) => {
  700. phrase.trim();
  701. if (phrase.match(/^-/)) {
  702. notPhraseWords.push(phrase.replace(/^-/, ''));
  703. }
  704. else {
  705. phraseWords.push(phrase);
  706. }
  707. });
  708. }
  709. // Second: Parse other keywords (include minus keywords)
  710. queryString.split(' ').forEach((word) => {
  711. if (word === '') {
  712. return;
  713. }
  714. // https://regex101.com/r/pN9XfK/1
  715. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  716. // https://regex101.com/r/3qw9FQ/1
  717. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  718. if (matchNegative != null) {
  719. if (matchNegative[1] === 'prefix:') {
  720. notPrefixPaths.push(matchNegative[2]);
  721. }
  722. else if (matchNegative[1] === 'tag:') {
  723. notTags.push(matchNegative[2]);
  724. }
  725. else {
  726. notMatchWords.push(matchNegative[2]);
  727. }
  728. }
  729. else if (matchPositive != null) {
  730. if (matchPositive[1] === 'prefix:') {
  731. prefixPaths.push(matchPositive[2]);
  732. }
  733. else if (matchPositive[1] === 'tag:') {
  734. tags.push(matchPositive[2]);
  735. }
  736. else {
  737. matchWords.push(matchPositive[2]);
  738. }
  739. }
  740. });
  741. return {
  742. match: matchWords,
  743. not_match: notMatchWords,
  744. phrase: phraseWords,
  745. not_phrase: notPhraseWords,
  746. prefix: prefixPaths,
  747. not_prefix: notPrefixPaths,
  748. tag: tags,
  749. not_tag: notTags,
  750. };
  751. };
  752. SearchClient.prototype.syncPageUpdated = async function(page, user) {
  753. logger.debug('SearchClient.syncPageUpdated', page.path);
  754. // delete if page should not indexed
  755. if (!this.shouldIndexed(page)) {
  756. try {
  757. await this.deletePages([page]);
  758. }
  759. catch (err) {
  760. logger.error('deletePages:ES Error', err);
  761. }
  762. return;
  763. }
  764. return this.updateOrInsertPageById(page._id);
  765. };
  766. SearchClient.prototype.syncPageDeleted = async function(page, user) {
  767. debug('SearchClient.syncPageDeleted', page.path);
  768. try {
  769. return await this.deletePages([page]);
  770. }
  771. catch (err) {
  772. logger.error('deletePages:ES Error', err);
  773. }
  774. };
  775. SearchClient.prototype.syncBookmarkChanged = async function(pageId) {
  776. logger.debug('SearchClient.syncBookmarkChanged', pageId);
  777. return this.updateOrInsertPageById(pageId);
  778. };
  779. SearchClient.prototype.syncTagChanged = async function(page) {
  780. logger.debug('SearchClient.syncTagChanged', page.path);
  781. return this.updateOrInsertPageById(page._id);
  782. };
  783. module.exports = SearchClient;