search.js 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935
  1. /**
  2. * Search
  3. */
  4. const elasticsearch = require('elasticsearch');
  5. const debug = require('debug')('growi:lib:search');
  6. const logger = require('@alias/logger')('growi:lib:search');
  7. const { URL } = require('url');
  8. const {
  9. Writable, Transform,
  10. } = require('stream');
  11. const streamToPromise = require('stream-to-promise');
  12. const { createBatchStream } = require('./batch-stream');
  13. const BULK_REINDEX_SIZE = 100;
  14. function SearchClient(crowi, esUri) {
  15. this.DEFAULT_OFFSET = 0;
  16. this.DEFAULT_LIMIT = 50;
  17. this.esNodeName = '-';
  18. this.esNodeNames = [];
  19. this.esVersion = 'unknown';
  20. this.esVersions = [];
  21. this.esPlugin = [];
  22. this.esPlugins = [];
  23. this.esUri = esUri;
  24. this.crowi = crowi;
  25. this.searchEvent = crowi.event('search');
  26. this.configManager = this.crowi.configManager;
  27. // In Elasticsearch RegExp, we don't need to used ^ and $.
  28. // Ref: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-regexp-query.html#_standard_operators
  29. this.queries = {
  30. PORTAL: {
  31. regexp: {
  32. 'path.raw': '.*/',
  33. },
  34. },
  35. PUBLIC: {
  36. regexp: {
  37. 'path.raw': '.*[^/]',
  38. },
  39. },
  40. USER: {
  41. prefix: {
  42. 'path.raw': '/user/',
  43. },
  44. },
  45. };
  46. this.client = null;
  47. this.indexName = null;
  48. this.aliasName = null;
  49. this.mappingFile = `${crowi.resourceDir}search/mappings.json`;
  50. try {
  51. this.initClient();
  52. this.registerUpdateEvent();
  53. }
  54. catch (err) {
  55. logger.error(err);
  56. }
  57. }
  58. SearchClient.prototype.initClient = function() {
  59. let indexName = 'crowi';
  60. let host = this.esUri;
  61. let httpAuth = '';
  62. const isSearchboxSsl = this.configManager.getConfig('crowi', 'app:searchboxSslUrl') != null;
  63. const url = new URL(this.esUri);
  64. if (url.pathname !== '/') {
  65. host = isSearchboxSsl
  66. ? `${url.protocol}//${url.auth}${url.hostname}:443` // use 443 when Searchbox
  67. : `${url.protocol}//${url.host}`;
  68. indexName = url.pathname.substring(1); // omit heading slash
  69. if (url.auth != null) {
  70. httpAuth = url.auth;
  71. }
  72. }
  73. this.client = new elasticsearch.Client({
  74. host,
  75. httpAuth,
  76. requestTimeout: 5000,
  77. // log: 'debug',
  78. });
  79. this.indexName = indexName;
  80. this.aliasName = `${this.indexName}-alias`;
  81. };
  82. SearchClient.prototype.getInfo = function() {
  83. return this.client.info({});
  84. };
  85. SearchClient.prototype.checkESVersion = async function() {
  86. try {
  87. const nodes = await this.client.nodes.info();
  88. if (!nodes._nodes || !nodes.nodes) {
  89. throw new Error('no nodes info');
  90. }
  91. for (const [nodeName, nodeInfo] of Object.entries(nodes.nodes)) {
  92. this.esNodeName = nodeName;
  93. this.esNodeNames.push(nodeName);
  94. this.esVersion = nodeInfo.version;
  95. this.esVersions.push(nodeInfo.version);
  96. this.esPlugin = nodeInfo.plugins;
  97. this.esPlugins.push(nodeInfo.plugins);
  98. }
  99. }
  100. catch (error) {
  101. logger.error('es check version error:', error);
  102. }
  103. };
  104. SearchClient.prototype.registerUpdateEvent = function() {
  105. const pageEvent = this.crowi.event('page');
  106. pageEvent.on('create', this.syncPageUpdated.bind(this));
  107. pageEvent.on('update', this.syncPageUpdated.bind(this));
  108. pageEvent.on('delete', this.syncPageDeleted.bind(this));
  109. const bookmarkEvent = this.crowi.event('bookmark');
  110. bookmarkEvent.on('create', this.syncBookmarkChanged.bind(this));
  111. bookmarkEvent.on('delete', this.syncBookmarkChanged.bind(this));
  112. const tagEvent = this.crowi.event('tag');
  113. tagEvent.on('update', this.syncTagChanged.bind(this));
  114. };
  115. SearchClient.prototype.shouldIndexed = function(page) {
  116. return page.creator != null && page.revision != null && page.redirectTo == null;
  117. };
  118. SearchClient.prototype.initIndices = async function() {
  119. await this.checkESVersion();
  120. const { client, indexName, aliasName } = this;
  121. const tmpIndexName = `${indexName}-tmp`;
  122. // remove tmp index
  123. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  124. if (isExistsTmpIndex) {
  125. await client.indices.delete({ index: tmpIndexName });
  126. }
  127. // create index
  128. const isExistsIndex = await client.indices.exists({ index: indexName });
  129. if (!isExistsIndex) {
  130. await this.createIndex(indexName);
  131. }
  132. // create alias
  133. const isExistsAlias = await client.indices.existsAlias({ name: aliasName, index: indexName });
  134. if (!isExistsAlias) {
  135. await client.indices.putAlias({
  136. name: aliasName,
  137. index: indexName,
  138. });
  139. }
  140. };
  141. SearchClient.prototype.createIndex = async function(index) {
  142. const body = require(this.mappingFile);
  143. return this.client.indices.create({ index, body });
  144. };
  145. SearchClient.prototype.buildIndex = async function(uri) {
  146. await this.initIndices();
  147. const { client, indexName } = this;
  148. const aliasName = `${indexName}-alias`;
  149. const tmpIndexName = `${indexName}-tmp`;
  150. // reindex to tmp index
  151. await this.createIndex(tmpIndexName);
  152. await client.reindex({
  153. waitForCompletion: false,
  154. body: {
  155. source: { index: indexName },
  156. dest: { index: tmpIndexName },
  157. },
  158. });
  159. // update alias
  160. await client.indices.updateAliases({
  161. body: {
  162. actions: [
  163. { add: { alias: aliasName, index: tmpIndexName } },
  164. { remove: { alias: aliasName, index: indexName } },
  165. ],
  166. },
  167. });
  168. // flush index
  169. await client.indices.delete({
  170. index: indexName,
  171. });
  172. await this.createIndex(indexName);
  173. await this.addAllPages();
  174. // update alias
  175. await client.indices.updateAliases({
  176. body: {
  177. actions: [
  178. { add: { alias: aliasName, index: indexName } },
  179. { remove: { alias: aliasName, index: tmpIndexName } },
  180. ],
  181. },
  182. });
  183. // remove tmp index
  184. await client.indices.delete({ index: tmpIndexName });
  185. };
  186. /**
  187. * generate object that is related to page.grant*
  188. */
  189. function generateDocContentsRelatedToRestriction(page) {
  190. let grantedUserIds = null;
  191. if (page.grantedUsers != null && page.grantedUsers.length > 0) {
  192. grantedUserIds = page.grantedUsers.map((user) => {
  193. const userId = (user._id == null) ? user : user._id;
  194. return userId.toString();
  195. });
  196. }
  197. let grantedGroupId = null;
  198. if (page.grantedGroup != null) {
  199. const groupId = (page.grantedGroup._id == null) ? page.grantedGroup : page.grantedGroup._id;
  200. grantedGroupId = groupId.toString();
  201. }
  202. return {
  203. grant: page.grant,
  204. granted_users: grantedUserIds,
  205. granted_group: grantedGroupId,
  206. };
  207. }
  208. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  209. if (!Array.isArray(body)) {
  210. throw new Error('Body must be an array.');
  211. }
  212. const command = {
  213. index: {
  214. _index: this.indexName,
  215. _type: 'pages',
  216. _id: page._id.toString(),
  217. },
  218. };
  219. const bookmarkCount = page.bookmarkCount || 0;
  220. let document = {
  221. path: page.path,
  222. body: page.revision.body,
  223. username: page.creator.username,
  224. comment_count: page.commentCount,
  225. bookmark_count: bookmarkCount,
  226. like_count: page.liker.length || 0,
  227. created_at: page.createdAt,
  228. updated_at: page.updatedAt,
  229. tag_names: page.tagNames,
  230. };
  231. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  232. body.push(command);
  233. body.push(document);
  234. };
  235. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  236. if (!Array.isArray(body)) {
  237. throw new Error('Body must be an array.');
  238. }
  239. const command = {
  240. delete: {
  241. _index: this.indexName,
  242. _type: 'pages',
  243. _id: page._id.toString(),
  244. },
  245. };
  246. body.push(command);
  247. };
  248. SearchClient.prototype.addAllPages = async function() {
  249. const Page = this.crowi.model('Page');
  250. return this.updateOrInsertPages(() => Page.find(), true);
  251. };
  252. SearchClient.prototype.updateOrInsertPageById = async function(pageId) {
  253. const Page = this.crowi.model('Page');
  254. return this.updateOrInsertPages(() => Page.findById(pageId));
  255. };
  256. /**
  257. * @param {function} queryFactory factory method to generate a Mongoose Query instance
  258. */
  259. SearchClient.prototype.updateOrInsertPages = async function(queryFactory, isEmittingProgressEvent = false) {
  260. const Page = this.crowi.model('Page');
  261. const { PageQueryBuilder } = Page;
  262. const Bookmark = this.crowi.model('Bookmark');
  263. const PageTagRelation = this.crowi.model('PageTagRelation');
  264. const searchEvent = this.searchEvent;
  265. // prepare functions invoked from custom streams
  266. const prepareBodyForCreate = this.prepareBodyForCreate.bind(this);
  267. const shouldIndexed = this.shouldIndexed.bind(this);
  268. const bulkWrite = this.client.bulk.bind(this.client);
  269. const findQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  270. const countQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  271. const totalCount = await countQuery.count();
  272. const readStream = findQuery
  273. // populate data which will be referenced by prepareBodyForCreate()
  274. .populate([
  275. { path: 'creator', model: 'User', select: 'username' },
  276. { path: 'revision', model: 'Revision', select: 'body' },
  277. ])
  278. .snapshot()
  279. .lean()
  280. .cursor();
  281. let skipped = 0;
  282. const thinOutStream = new Transform({
  283. objectMode: true,
  284. async transform(doc, encoding, callback) {
  285. if (shouldIndexed(doc)) {
  286. this.push(doc);
  287. }
  288. else {
  289. skipped++;
  290. }
  291. callback();
  292. },
  293. });
  294. const batchStream = createBatchStream(BULK_REINDEX_SIZE);
  295. const appendBookmarkCountStream = new Transform({
  296. objectMode: true,
  297. async transform(chunk, encoding, callback) {
  298. const pageIds = chunk.map(doc => doc._id);
  299. const idToCountMap = await Bookmark.getPageIdToCountMap(pageIds);
  300. const idsHavingCount = Object.keys(idToCountMap);
  301. // append count
  302. chunk
  303. .filter(doc => idsHavingCount.includes(doc._id.toString()))
  304. .forEach((doc) => {
  305. // append count from idToCountMap
  306. doc.bookmarkCount = idToCountMap[doc._id.toString()];
  307. });
  308. this.push(chunk);
  309. callback();
  310. },
  311. });
  312. const appendTagNamesStream = new Transform({
  313. objectMode: true,
  314. async transform(chunk, encoding, callback) {
  315. const pageIds = chunk.map(doc => doc._id);
  316. const idToTagNamesMap = await PageTagRelation.getIdToTagNamesMap(pageIds);
  317. const idsHavingTagNames = Object.keys(idToTagNamesMap);
  318. // append tagNames
  319. chunk
  320. .filter(doc => idsHavingTagNames.includes(doc._id.toString()))
  321. .forEach((doc) => {
  322. // append tagName from idToTagNamesMap
  323. doc.tagNames = idToTagNamesMap[doc._id.toString()];
  324. });
  325. this.push(chunk);
  326. callback();
  327. },
  328. });
  329. let count = 0;
  330. const writeStream = new Writable({
  331. objectMode: true,
  332. async write(batch, encoding, callback) {
  333. const body = [];
  334. batch.forEach(doc => prepareBodyForCreate(body, doc));
  335. try {
  336. const res = await bulkWrite({
  337. body,
  338. requestTimeout: Infinity,
  339. });
  340. count += (res.items || []).length;
  341. logger.info(`Adding pages progressing: (count=${count}, errors=${res.errors}, took=${res.took}ms)`);
  342. if (isEmittingProgressEvent) {
  343. searchEvent.emit('addPageProgress', totalCount, count, skipped);
  344. }
  345. }
  346. catch (err) {
  347. logger.error('addAllPages error on add anyway: ', err);
  348. }
  349. callback();
  350. },
  351. final(callback) {
  352. logger.info(`Adding pages has terminated: (totalCount=${totalCount}, skipped=${skipped})`);
  353. if (isEmittingProgressEvent) {
  354. searchEvent.emit('finishAddPage', totalCount, count, skipped);
  355. }
  356. callback();
  357. },
  358. });
  359. readStream
  360. .pipe(thinOutStream)
  361. .pipe(batchStream)
  362. .pipe(appendBookmarkCountStream)
  363. .pipe(appendTagNamesStream)
  364. .pipe(writeStream);
  365. return streamToPromise(writeStream);
  366. };
  367. SearchClient.prototype.deletePages = function(pages) {
  368. const self = this;
  369. const body = [];
  370. pages.map((page) => {
  371. self.prepareBodyForDelete(body, page);
  372. return;
  373. });
  374. logger.debug('deletePages(): Sending Request to ES', body);
  375. return this.client.bulk({
  376. body,
  377. });
  378. };
  379. /**
  380. * search returning type:
  381. * {
  382. * meta: { total: Integer, results: Integer},
  383. * data: [ pages ...],
  384. * }
  385. */
  386. SearchClient.prototype.search = async function(query) {
  387. // for debug
  388. if (process.env.NODE_ENV === 'development') {
  389. const result = await this.client.indices.validateQuery({
  390. explain: true,
  391. body: {
  392. query: query.body.query,
  393. },
  394. });
  395. logger.debug('ES returns explanations: ', result.explanations);
  396. }
  397. const result = await this.client.search(query);
  398. // for debug
  399. logger.debug('ES result: ', result);
  400. return {
  401. meta: {
  402. took: result.took,
  403. total: result.hits.total,
  404. results: result.hits.hits.length,
  405. },
  406. data: result.hits.hits.map((elm) => {
  407. return { _id: elm._id, _score: elm._score, _source: elm._source };
  408. }),
  409. };
  410. };
  411. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option) {
  412. // getting path by default is almost for debug
  413. let fields = ['path', 'bookmark_count', 'tag_names'];
  414. if (option) {
  415. fields = option.fields || fields;
  416. }
  417. // default is only id field, sorted by updated_at
  418. const query = {
  419. index: this.aliasName,
  420. type: 'pages',
  421. body: {
  422. sort: [{ updated_at: { order: 'desc' } }],
  423. query: {}, // query
  424. _source: fields,
  425. },
  426. };
  427. this.appendResultSize(query);
  428. return query;
  429. };
  430. SearchClient.prototype.createSearchQuerySortedByScore = function(option) {
  431. let fields = ['path', 'bookmark_count', 'tag_names'];
  432. if (option) {
  433. fields = option.fields || fields;
  434. }
  435. // sort by score
  436. const query = {
  437. index: this.aliasName,
  438. type: 'pages',
  439. body: {
  440. sort: [{ _score: { order: 'desc' } }],
  441. query: {}, // query
  442. _source: fields,
  443. },
  444. };
  445. this.appendResultSize(query);
  446. return query;
  447. };
  448. SearchClient.prototype.appendResultSize = function(query, from, size) {
  449. query.from = from || this.DEFAULT_OFFSET;
  450. query.size = size || this.DEFAULT_LIMIT;
  451. };
  452. SearchClient.prototype.initializeBoolQuery = function(query) {
  453. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  454. if (!query.body.query.bool) {
  455. query.body.query.bool = {};
  456. }
  457. const isInitialized = (query) => { return !!query && Array.isArray(query) };
  458. if (!isInitialized(query.body.query.bool.filter)) {
  459. query.body.query.bool.filter = [];
  460. }
  461. if (!isInitialized(query.body.query.bool.must)) {
  462. query.body.query.bool.must = [];
  463. }
  464. if (!isInitialized(query.body.query.bool.must_not)) {
  465. query.body.query.bool.must_not = [];
  466. }
  467. return query;
  468. };
  469. SearchClient.prototype.appendCriteriaForQueryString = function(query, queryString) {
  470. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  471. // parse
  472. const parsedKeywords = this.parseQueryString(queryString);
  473. if (parsedKeywords.match.length > 0) {
  474. const q = {
  475. multi_match: {
  476. query: parsedKeywords.match.join(' '),
  477. type: 'most_fields',
  478. fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
  479. },
  480. };
  481. query.body.query.bool.must.push(q);
  482. }
  483. if (parsedKeywords.not_match.length > 0) {
  484. const q = {
  485. multi_match: {
  486. query: parsedKeywords.not_match.join(' '),
  487. fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
  488. operator: 'or',
  489. },
  490. };
  491. query.body.query.bool.must_not.push(q);
  492. }
  493. if (parsedKeywords.phrase.length > 0) {
  494. const phraseQueries = [];
  495. parsedKeywords.phrase.forEach((phrase) => {
  496. phraseQueries.push({
  497. multi_match: {
  498. query: phrase, // each phrase is quoteted words
  499. type: 'phrase',
  500. fields: [
  501. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  502. 'path.raw^2',
  503. 'body',
  504. ],
  505. },
  506. });
  507. });
  508. query.body.query.bool.must.push(phraseQueries);
  509. }
  510. if (parsedKeywords.not_phrase.length > 0) {
  511. const notPhraseQueries = [];
  512. parsedKeywords.not_phrase.forEach((phrase) => {
  513. notPhraseQueries.push({
  514. multi_match: {
  515. query: phrase, // each phrase is quoteted words
  516. type: 'phrase',
  517. fields: [
  518. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  519. 'path.raw^2',
  520. 'body',
  521. ],
  522. },
  523. });
  524. });
  525. query.body.query.bool.must_not.push(notPhraseQueries);
  526. }
  527. if (parsedKeywords.prefix.length > 0) {
  528. const queries = parsedKeywords.prefix.map((path) => {
  529. return { prefix: { 'path.raw': path } };
  530. });
  531. query.body.query.bool.filter.push({ bool: { should: queries } });
  532. }
  533. if (parsedKeywords.not_prefix.length > 0) {
  534. const queries = parsedKeywords.not_prefix.map((path) => {
  535. return { prefix: { 'path.raw': path } };
  536. });
  537. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  538. }
  539. if (parsedKeywords.tag.length > 0) {
  540. const queries = parsedKeywords.tag.map((tag) => {
  541. return { term: { tag_names: tag } };
  542. });
  543. query.body.query.bool.filter.push({ bool: { must: queries } });
  544. }
  545. if (parsedKeywords.not_tag.length > 0) {
  546. const queries = parsedKeywords.not_tag.map((tag) => {
  547. return { term: { tag_names: tag } };
  548. });
  549. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  550. }
  551. };
  552. SearchClient.prototype.filterPagesByViewer = async function(query, user, userGroups) {
  553. const showPagesRestrictedByOwner = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByOwner');
  554. const showPagesRestrictedByGroup = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByGroup');
  555. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  556. const Page = this.crowi.model('Page');
  557. const {
  558. GRANT_PUBLIC, GRANT_RESTRICTED, GRANT_SPECIFIED, GRANT_OWNER, GRANT_USER_GROUP,
  559. } = Page;
  560. const grantConditions = [
  561. { term: { grant: GRANT_PUBLIC } },
  562. ];
  563. // ensure to hit to GRANT_RESTRICTED pages that the user specified at own
  564. if (user != null) {
  565. grantConditions.push(
  566. {
  567. bool: {
  568. must: [
  569. { term: { grant: GRANT_RESTRICTED } },
  570. { term: { granted_users: user._id.toString() } },
  571. ],
  572. },
  573. },
  574. );
  575. }
  576. if (showPagesRestrictedByOwner) {
  577. grantConditions.push(
  578. { term: { grant: GRANT_SPECIFIED } },
  579. { term: { grant: GRANT_OWNER } },
  580. );
  581. }
  582. else if (user != null) {
  583. grantConditions.push(
  584. {
  585. bool: {
  586. must: [
  587. { term: { grant: GRANT_SPECIFIED } },
  588. { term: { granted_users: user._id.toString() } },
  589. ],
  590. },
  591. },
  592. {
  593. bool: {
  594. must: [
  595. { term: { grant: GRANT_OWNER } },
  596. { term: { granted_users: user._id.toString() } },
  597. ],
  598. },
  599. },
  600. );
  601. }
  602. if (showPagesRestrictedByGroup) {
  603. grantConditions.push(
  604. { term: { grant: GRANT_USER_GROUP } },
  605. );
  606. }
  607. else if (userGroups != null && userGroups.length > 0) {
  608. const userGroupIds = userGroups.map((group) => { return group._id.toString() });
  609. grantConditions.push(
  610. {
  611. bool: {
  612. must: [
  613. { term: { grant: GRANT_USER_GROUP } },
  614. { terms: { granted_group: userGroupIds } },
  615. ],
  616. },
  617. },
  618. );
  619. }
  620. query.body.query.bool.filter.push({ bool: { should: grantConditions } });
  621. };
  622. SearchClient.prototype.filterPortalPages = function(query) {
  623. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  624. query.body.query.bool.must_not.push(this.queries.USER);
  625. query.body.query.bool.filter.push(this.queries.PORTAL);
  626. };
  627. SearchClient.prototype.filterPublicPages = function(query) {
  628. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  629. query.body.query.bool.must_not.push(this.queries.USER);
  630. query.body.query.bool.filter.push(this.queries.PUBLIC);
  631. };
  632. SearchClient.prototype.filterUserPages = function(query) {
  633. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  634. query.body.query.bool.filter.push(this.queries.USER);
  635. };
  636. SearchClient.prototype.filterPagesByType = function(query, type) {
  637. const Page = this.crowi.model('Page');
  638. switch (type) {
  639. case Page.TYPE_PORTAL:
  640. return this.filterPortalPages(query);
  641. case Page.TYPE_PUBLIC:
  642. return this.filterPublicPages(query);
  643. case Page.TYPE_USER:
  644. return this.filterUserPages(query);
  645. default:
  646. return query;
  647. }
  648. };
  649. SearchClient.prototype.appendFunctionScore = function(query, queryString) {
  650. const User = this.crowi.model('User');
  651. const count = User.count({}) || 1;
  652. const minScore = queryString.length * 0.1 - 1; // increase with length
  653. logger.debug('min_score: ', minScore);
  654. query.body.query = {
  655. function_score: {
  656. query: { ...query.body.query },
  657. // // disable min_score -- 2019.02.28 Yuki Takei
  658. // // more precise adjustment is needed...
  659. // min_score: minScore,
  660. field_value_factor: {
  661. field: 'bookmark_count',
  662. modifier: 'log1p',
  663. factor: 10000 / count,
  664. missing: 0,
  665. },
  666. boost_mode: 'sum',
  667. },
  668. };
  669. };
  670. SearchClient.prototype.searchKeyword = async function(queryString, user, userGroups, option) {
  671. const from = option.offset || null;
  672. const size = option.limit || null;
  673. const type = option.type || null;
  674. const query = this.createSearchQuerySortedByScore();
  675. this.appendCriteriaForQueryString(query, queryString);
  676. this.filterPagesByType(query, type);
  677. await this.filterPagesByViewer(query, user, userGroups);
  678. this.appendResultSize(query, from, size);
  679. this.appendFunctionScore(query, queryString);
  680. return this.search(query);
  681. };
  682. SearchClient.prototype.parseQueryString = function(queryString) {
  683. const matchWords = [];
  684. const notMatchWords = [];
  685. const phraseWords = [];
  686. const notPhraseWords = [];
  687. const prefixPaths = [];
  688. const notPrefixPaths = [];
  689. const tags = [];
  690. const notTags = [];
  691. queryString.trim();
  692. queryString = queryString.replace(/\s+/g, ' '); // eslint-disable-line no-param-reassign
  693. // First: Parse phrase keywords
  694. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  695. const phrases = queryString.match(phraseRegExp);
  696. if (phrases !== null) {
  697. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  698. phrases.forEach((phrase) => {
  699. phrase.trim();
  700. if (phrase.match(/^-/)) {
  701. notPhraseWords.push(phrase.replace(/^-/, ''));
  702. }
  703. else {
  704. phraseWords.push(phrase);
  705. }
  706. });
  707. }
  708. // Second: Parse other keywords (include minus keywords)
  709. queryString.split(' ').forEach((word) => {
  710. if (word === '') {
  711. return;
  712. }
  713. // https://regex101.com/r/pN9XfK/1
  714. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  715. // https://regex101.com/r/3qw9FQ/1
  716. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  717. if (matchNegative != null) {
  718. if (matchNegative[1] === 'prefix:') {
  719. notPrefixPaths.push(matchNegative[2]);
  720. }
  721. else if (matchNegative[1] === 'tag:') {
  722. notTags.push(matchNegative[2]);
  723. }
  724. else {
  725. notMatchWords.push(matchNegative[2]);
  726. }
  727. }
  728. else if (matchPositive != null) {
  729. if (matchPositive[1] === 'prefix:') {
  730. prefixPaths.push(matchPositive[2]);
  731. }
  732. else if (matchPositive[1] === 'tag:') {
  733. tags.push(matchPositive[2]);
  734. }
  735. else {
  736. matchWords.push(matchPositive[2]);
  737. }
  738. }
  739. });
  740. return {
  741. match: matchWords,
  742. not_match: notMatchWords,
  743. phrase: phraseWords,
  744. not_phrase: notPhraseWords,
  745. prefix: prefixPaths,
  746. not_prefix: notPrefixPaths,
  747. tag: tags,
  748. not_tag: notTags,
  749. };
  750. };
  751. SearchClient.prototype.syncPageUpdated = async function(page, user) {
  752. logger.debug('SearchClient.syncPageUpdated', page.path);
  753. // delete if page should not indexed
  754. if (!this.shouldIndexed(page)) {
  755. try {
  756. await this.deletePages([page]);
  757. }
  758. catch (err) {
  759. logger.error('deletePages:ES Error', err);
  760. }
  761. return;
  762. }
  763. return this.updateOrInsertPageById(page._id);
  764. };
  765. SearchClient.prototype.syncPageDeleted = async function(page, user) {
  766. debug('SearchClient.syncPageDeleted', page.path);
  767. try {
  768. return await this.deletePages([page]);
  769. }
  770. catch (err) {
  771. logger.error('deletePages:ES Error', err);
  772. }
  773. };
  774. SearchClient.prototype.syncBookmarkChanged = async function(pageId) {
  775. logger.debug('SearchClient.syncBookmarkChanged', pageId);
  776. return this.updateOrInsertPageById(pageId);
  777. };
  778. SearchClient.prototype.syncTagChanged = async function(page) {
  779. logger.debug('SearchClient.syncTagChanged', page.path);
  780. return this.updateOrInsertPageById(page._id);
  781. };
  782. module.exports = SearchClient;