search.js 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935
  1. /**
  2. * Search
  3. */
  4. const elasticsearch = require('elasticsearch');
  5. const debug = require('debug')('growi:lib:search');
  6. const logger = require('@alias/logger')('growi:lib:search');
  7. const {
  8. Writable, Transform,
  9. } = require('stream');
  10. const streamToPromise = require('stream-to-promise');
  11. const BULK_REINDEX_SIZE = 100;
  12. function SearchClient(crowi, esUri) {
  13. this.DEFAULT_OFFSET = 0;
  14. this.DEFAULT_LIMIT = 50;
  15. this.esNodeName = '-';
  16. this.esNodeNames = [];
  17. this.esVersion = 'unknown';
  18. this.esVersions = [];
  19. this.esPlugin = [];
  20. this.esPlugins = [];
  21. this.esUri = esUri;
  22. this.crowi = crowi;
  23. this.searchEvent = crowi.event('search');
  24. this.configManager = this.crowi.configManager;
  25. // In Elasticsearch RegExp, we don't need to used ^ and $.
  26. // Ref: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-regexp-query.html#_standard_operators
  27. this.queries = {
  28. PORTAL: {
  29. regexp: {
  30. 'path.raw': '.*/',
  31. },
  32. },
  33. PUBLIC: {
  34. regexp: {
  35. 'path.raw': '.*[^/]',
  36. },
  37. },
  38. USER: {
  39. prefix: {
  40. 'path.raw': '/user/',
  41. },
  42. },
  43. };
  44. const uri = this.parseUri(this.esUri);
  45. this.host = uri.host;
  46. this.indexName = uri.indexName;
  47. this.aliasName = `${this.indexName}-alias`;
  48. this.client = new elasticsearch.Client({
  49. host: this.host,
  50. requestTimeout: 5000,
  51. // log: 'debug',
  52. });
  53. this.registerUpdateEvent();
  54. this.mappingFile = `${crowi.resourceDir}search/mappings.json`;
  55. }
  56. SearchClient.prototype.getInfo = function() {
  57. return this.client.info({});
  58. };
  59. SearchClient.prototype.checkESVersion = async function() {
  60. try {
  61. const nodes = await this.client.nodes.info();
  62. if (!nodes._nodes || !nodes.nodes) {
  63. throw new Error('no nodes info');
  64. }
  65. for (const [nodeName, nodeInfo] of Object.entries(nodes.nodes)) {
  66. this.esNodeName = nodeName;
  67. this.esNodeNames.push(nodeName);
  68. this.esVersion = nodeInfo.version;
  69. this.esVersions.push(nodeInfo.version);
  70. this.esPlugin = nodeInfo.plugins;
  71. this.esPlugins.push(nodeInfo.plugins);
  72. }
  73. }
  74. catch (error) {
  75. logger.error('es check version error:', error);
  76. }
  77. };
  78. SearchClient.prototype.registerUpdateEvent = function() {
  79. const pageEvent = this.crowi.event('page');
  80. pageEvent.on('create', this.syncPageUpdated.bind(this));
  81. pageEvent.on('update', this.syncPageUpdated.bind(this));
  82. pageEvent.on('delete', this.syncPageDeleted.bind(this));
  83. const bookmarkEvent = this.crowi.event('bookmark');
  84. bookmarkEvent.on('create', this.syncBookmarkChanged.bind(this));
  85. bookmarkEvent.on('delete', this.syncBookmarkChanged.bind(this));
  86. const tagEvent = this.crowi.event('tag');
  87. tagEvent.on('update', this.syncTagChanged.bind(this));
  88. };
  89. SearchClient.prototype.shouldIndexed = function(page) {
  90. return page.creator != null && page.revision != null && page.redirectTo == null;
  91. };
  92. // BONSAI_URL is following format:
  93. // => https://{ID}:{PASSWORD}@{HOST}
  94. SearchClient.prototype.parseUri = function(uri) {
  95. let indexName = 'crowi';
  96. let host = uri;
  97. const match = uri.match(/^(https?:\/\/[^/]+)\/(.+)$/);
  98. if (match) {
  99. host = match[1];
  100. indexName = match[2];
  101. }
  102. return {
  103. host,
  104. indexName,
  105. };
  106. };
  107. SearchClient.prototype.initIndices = async function() {
  108. await this.checkESVersion();
  109. const { client, indexName, aliasName } = this;
  110. const tmpIndexName = `${indexName}-tmp`;
  111. // remove tmp index
  112. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  113. if (isExistsTmpIndex) {
  114. await client.indices.delete({ index: tmpIndexName });
  115. }
  116. // create index
  117. const isExistsIndex = await client.indices.exists({ index: indexName });
  118. if (!isExistsIndex) {
  119. await this.createIndex(indexName);
  120. }
  121. // create alias
  122. const isExistsAlias = await client.indices.existsAlias({ name: aliasName, index: indexName });
  123. if (!isExistsAlias) {
  124. await client.indices.putAlias({
  125. name: aliasName,
  126. index: indexName,
  127. });
  128. }
  129. };
  130. SearchClient.prototype.createIndex = async function(index) {
  131. const body = require(this.mappingFile);
  132. return this.client.indices.create({ index, body });
  133. };
  134. SearchClient.prototype.buildIndex = async function(uri) {
  135. await this.initIndices();
  136. const { client, indexName } = this;
  137. const aliasName = `${indexName}-alias`;
  138. const tmpIndexName = `${indexName}-tmp`;
  139. // reindex to tmp index
  140. await this.createIndex(tmpIndexName);
  141. await client.reindex({
  142. waitForCompletion: false,
  143. body: {
  144. source: { index: indexName },
  145. dest: { index: tmpIndexName },
  146. },
  147. });
  148. // update alias
  149. await client.indices.updateAliases({
  150. body: {
  151. actions: [
  152. { add: { alias: aliasName, index: tmpIndexName } },
  153. { remove: { alias: aliasName, index: indexName } },
  154. ],
  155. },
  156. });
  157. // flush index
  158. await client.indices.delete({
  159. index: indexName,
  160. });
  161. await this.createIndex(indexName);
  162. await this.addAllPages();
  163. // update alias
  164. await client.indices.updateAliases({
  165. body: {
  166. actions: [
  167. { add: { alias: aliasName, index: indexName } },
  168. { remove: { alias: aliasName, index: tmpIndexName } },
  169. ],
  170. },
  171. });
  172. // remove tmp index
  173. await client.indices.delete({ index: tmpIndexName });
  174. };
  175. /**
  176. * generate object that is related to page.grant*
  177. */
  178. function generateDocContentsRelatedToRestriction(page) {
  179. let grantedUserIds = null;
  180. if (page.grantedUsers != null && page.grantedUsers.length > 0) {
  181. grantedUserIds = page.grantedUsers.map((user) => {
  182. const userId = (user._id == null) ? user : user._id;
  183. return userId.toString();
  184. });
  185. }
  186. let grantedGroupId = null;
  187. if (page.grantedGroup != null) {
  188. const groupId = (page.grantedGroup._id == null) ? page.grantedGroup : page.grantedGroup._id;
  189. grantedGroupId = groupId.toString();
  190. }
  191. return {
  192. grant: page.grant,
  193. granted_users: grantedUserIds,
  194. granted_group: grantedGroupId,
  195. };
  196. }
  197. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  198. if (!Array.isArray(body)) {
  199. throw new Error('Body must be an array.');
  200. }
  201. const command = {
  202. index: {
  203. _index: this.indexName,
  204. _type: 'pages',
  205. _id: page._id.toString(),
  206. },
  207. };
  208. const bookmarkCount = page.bookmarkCount || 0;
  209. let document = {
  210. path: page.path,
  211. body: page.revision.body,
  212. username: page.creator.username,
  213. comment_count: page.commentCount,
  214. bookmark_count: bookmarkCount,
  215. like_count: page.liker.length || 0,
  216. created_at: page.createdAt,
  217. updated_at: page.updatedAt,
  218. tag_names: page.tagNames,
  219. };
  220. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  221. body.push(command);
  222. body.push(document);
  223. };
  224. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  225. if (!Array.isArray(body)) {
  226. throw new Error('Body must be an array.');
  227. }
  228. const command = {
  229. delete: {
  230. _index: this.indexName,
  231. _type: 'pages',
  232. _id: page._id.toString(),
  233. },
  234. };
  235. body.push(command);
  236. };
  237. SearchClient.prototype.addAllPages = async function() {
  238. const Page = this.crowi.model('Page');
  239. return this.updateOrInsertPages(() => Page.find(), true);
  240. };
  241. SearchClient.prototype.updateOrInsertPageById = async function(pageId) {
  242. const Page = this.crowi.model('Page');
  243. return this.updateOrInsertPages(() => Page.findById(pageId));
  244. };
  245. /**
  246. * @param {function} queryFactory factory method to generate a Mongoose Query instance
  247. */
  248. SearchClient.prototype.updateOrInsertPages = async function(queryFactory, isEmittingProgressEvent = false) {
  249. const Page = this.crowi.model('Page');
  250. const { PageQueryBuilder } = Page;
  251. const Bookmark = this.crowi.model('Bookmark');
  252. const PageTagRelation = this.crowi.model('PageTagRelation');
  253. const searchEvent = this.searchEvent;
  254. const prepareBodyForCreate = this.prepareBodyForCreate.bind(this);
  255. const shouldIndexed = this.shouldIndexed.bind(this);
  256. const bulkWrite = this.client.bulk.bind(this.client);
  257. const findQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  258. const countQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  259. const totalCount = await countQuery.count();
  260. const readStream = findQuery
  261. // populate data which will be referenced by prepareBodyForCreate()
  262. .populate([
  263. { path: 'creator', model: 'User', select: 'username' },
  264. { path: 'revision', model: 'Revision', select: 'body' },
  265. ])
  266. .snapshot()
  267. .lean()
  268. .cursor();
  269. let skipped = 0;
  270. const thinOutStream = new Transform({
  271. objectMode: true,
  272. async transform(doc, encoding, callback) {
  273. if (shouldIndexed(doc)) {
  274. this.push(doc);
  275. }
  276. else {
  277. skipped++;
  278. }
  279. callback();
  280. },
  281. });
  282. let batchBuffer = [];
  283. const batchingStream = new Transform({
  284. objectMode: true,
  285. transform(doc, encoding, callback) {
  286. batchBuffer.push(doc);
  287. if (batchBuffer.length >= BULK_REINDEX_SIZE) {
  288. this.push(batchBuffer);
  289. batchBuffer = [];
  290. }
  291. callback();
  292. },
  293. final(callback) {
  294. if (batchBuffer.length > 0) {
  295. this.push(batchBuffer);
  296. }
  297. callback();
  298. },
  299. });
  300. const appendBookmarkCountStream = new Transform({
  301. objectMode: true,
  302. async transform(chunk, encoding, callback) {
  303. const pageIds = chunk.map(doc => doc._id);
  304. const idToCountMap = await Bookmark.getPageIdToCountMap(pageIds);
  305. const idsHavingCount = Object.keys(idToCountMap);
  306. // append count
  307. chunk
  308. .filter(doc => idsHavingCount.includes(doc._id.toString()))
  309. .forEach((doc) => {
  310. // append count from idToCountMap
  311. doc.bookmarkCount = idToCountMap[doc._id.toString()];
  312. });
  313. this.push(chunk);
  314. callback();
  315. },
  316. });
  317. const appendTagNamesStream = new Transform({
  318. objectMode: true,
  319. async transform(chunk, encoding, callback) {
  320. const pageIds = chunk.map(doc => doc._id);
  321. const idToTagNamesMap = await PageTagRelation.getIdToTagNamesMap(pageIds);
  322. const idsHavingTagNames = Object.keys(idToTagNamesMap);
  323. // append tagNames
  324. chunk
  325. .filter(doc => idsHavingTagNames.includes(doc._id.toString()))
  326. .forEach((doc) => {
  327. // append tagName from idToTagNamesMap
  328. doc.tagNames = idToTagNamesMap[doc._id.toString()];
  329. });
  330. this.push(chunk);
  331. callback();
  332. },
  333. });
  334. let count = 0;
  335. const writeStream = new Writable({
  336. objectMode: true,
  337. async write(batch, encoding, callback) {
  338. const body = [];
  339. batch.forEach(doc => prepareBodyForCreate(body, doc));
  340. try {
  341. const res = await bulkWrite({
  342. body,
  343. requestTimeout: Infinity,
  344. });
  345. count += (res.items || []).length;
  346. logger.info(`Adding pages progressing: (count=${count}, errors=${res.errors}, took=${res.took}ms)`);
  347. if (isEmittingProgressEvent) {
  348. searchEvent.emit('addPageProgress', totalCount, count, skipped);
  349. }
  350. }
  351. catch (err) {
  352. logger.error('addAllPages error on add anyway: ', err);
  353. }
  354. callback();
  355. },
  356. final(callback) {
  357. logger.info(`Adding pages has terminated: (totalCount=${totalCount}, skipped=${skipped})`);
  358. if (isEmittingProgressEvent) {
  359. searchEvent.emit('finishAddPage', totalCount, count, skipped);
  360. }
  361. callback();
  362. },
  363. });
  364. readStream
  365. .pipe(thinOutStream)
  366. .pipe(batchingStream)
  367. .pipe(appendBookmarkCountStream)
  368. .pipe(appendTagNamesStream)
  369. .pipe(writeStream);
  370. return streamToPromise(writeStream);
  371. };
  372. SearchClient.prototype.deletePages = function(pages) {
  373. const self = this;
  374. const body = [];
  375. pages.map((page) => {
  376. self.prepareBodyForDelete(body, page);
  377. return;
  378. });
  379. logger.debug('deletePages(): Sending Request to ES', body);
  380. return this.client.bulk({
  381. body,
  382. });
  383. };
  384. /**
  385. * search returning type:
  386. * {
  387. * meta: { total: Integer, results: Integer},
  388. * data: [ pages ...],
  389. * }
  390. */
  391. SearchClient.prototype.search = async function(query) {
  392. // for debug
  393. if (process.env.NODE_ENV === 'development') {
  394. const result = await this.client.indices.validateQuery({
  395. explain: true,
  396. body: {
  397. query: query.body.query,
  398. },
  399. });
  400. logger.debug('ES returns explanations: ', result.explanations);
  401. }
  402. const result = await this.client.search(query);
  403. // for debug
  404. logger.debug('ES result: ', result);
  405. return {
  406. meta: {
  407. took: result.took,
  408. total: result.hits.total,
  409. results: result.hits.hits.length,
  410. },
  411. data: result.hits.hits.map((elm) => {
  412. return { _id: elm._id, _score: elm._score, _source: elm._source };
  413. }),
  414. };
  415. };
  416. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option) {
  417. // getting path by default is almost for debug
  418. let fields = ['path', 'bookmark_count', 'tag_names'];
  419. if (option) {
  420. fields = option.fields || fields;
  421. }
  422. // default is only id field, sorted by updated_at
  423. const query = {
  424. index: this.aliasName,
  425. type: 'pages',
  426. body: {
  427. sort: [{ updated_at: { order: 'desc' } }],
  428. query: {}, // query
  429. _source: fields,
  430. },
  431. };
  432. this.appendResultSize(query);
  433. return query;
  434. };
  435. SearchClient.prototype.createSearchQuerySortedByScore = function(option) {
  436. let fields = ['path', 'bookmark_count', 'tag_names'];
  437. if (option) {
  438. fields = option.fields || fields;
  439. }
  440. // sort by score
  441. const query = {
  442. index: this.aliasName,
  443. type: 'pages',
  444. body: {
  445. sort: [{ _score: { order: 'desc' } }],
  446. query: {}, // query
  447. _source: fields,
  448. },
  449. };
  450. this.appendResultSize(query);
  451. return query;
  452. };
  453. SearchClient.prototype.appendResultSize = function(query, from, size) {
  454. query.from = from || this.DEFAULT_OFFSET;
  455. query.size = size || this.DEFAULT_LIMIT;
  456. };
  457. SearchClient.prototype.initializeBoolQuery = function(query) {
  458. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  459. if (!query.body.query.bool) {
  460. query.body.query.bool = {};
  461. }
  462. const isInitialized = (query) => { return !!query && Array.isArray(query) };
  463. if (!isInitialized(query.body.query.bool.filter)) {
  464. query.body.query.bool.filter = [];
  465. }
  466. if (!isInitialized(query.body.query.bool.must)) {
  467. query.body.query.bool.must = [];
  468. }
  469. if (!isInitialized(query.body.query.bool.must_not)) {
  470. query.body.query.bool.must_not = [];
  471. }
  472. return query;
  473. };
  474. SearchClient.prototype.appendCriteriaForQueryString = function(query, queryString) {
  475. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  476. // parse
  477. const parsedKeywords = this.parseQueryString(queryString);
  478. if (parsedKeywords.match.length > 0) {
  479. const q = {
  480. multi_match: {
  481. query: parsedKeywords.match.join(' '),
  482. type: 'most_fields',
  483. fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
  484. },
  485. };
  486. query.body.query.bool.must.push(q);
  487. }
  488. if (parsedKeywords.not_match.length > 0) {
  489. const q = {
  490. multi_match: {
  491. query: parsedKeywords.not_match.join(' '),
  492. fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
  493. operator: 'or',
  494. },
  495. };
  496. query.body.query.bool.must_not.push(q);
  497. }
  498. if (parsedKeywords.phrase.length > 0) {
  499. const phraseQueries = [];
  500. parsedKeywords.phrase.forEach((phrase) => {
  501. phraseQueries.push({
  502. multi_match: {
  503. query: phrase, // each phrase is quoteted words
  504. type: 'phrase',
  505. fields: [
  506. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  507. 'path.raw^2',
  508. 'body',
  509. ],
  510. },
  511. });
  512. });
  513. query.body.query.bool.must.push(phraseQueries);
  514. }
  515. if (parsedKeywords.not_phrase.length > 0) {
  516. const notPhraseQueries = [];
  517. parsedKeywords.not_phrase.forEach((phrase) => {
  518. notPhraseQueries.push({
  519. multi_match: {
  520. query: phrase, // each phrase is quoteted words
  521. type: 'phrase',
  522. fields: [
  523. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  524. 'path.raw^2',
  525. 'body',
  526. ],
  527. },
  528. });
  529. });
  530. query.body.query.bool.must_not.push(notPhraseQueries);
  531. }
  532. if (parsedKeywords.prefix.length > 0) {
  533. const queries = parsedKeywords.prefix.map((path) => {
  534. return { prefix: { 'path.raw': path } };
  535. });
  536. query.body.query.bool.filter.push({ bool: { should: queries } });
  537. }
  538. if (parsedKeywords.not_prefix.length > 0) {
  539. const queries = parsedKeywords.not_prefix.map((path) => {
  540. return { prefix: { 'path.raw': path } };
  541. });
  542. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  543. }
  544. if (parsedKeywords.tag.length > 0) {
  545. const queries = parsedKeywords.tag.map((tag) => {
  546. return { term: { tag_names: tag } };
  547. });
  548. query.body.query.bool.filter.push({ bool: { must: queries } });
  549. }
  550. if (parsedKeywords.not_tag.length > 0) {
  551. const queries = parsedKeywords.not_tag.map((tag) => {
  552. return { term: { tag_names: tag } };
  553. });
  554. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  555. }
  556. };
  557. SearchClient.prototype.filterPagesByViewer = async function(query, user, userGroups) {
  558. const showPagesRestrictedByOwner = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByOwner');
  559. const showPagesRestrictedByGroup = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByGroup');
  560. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  561. const Page = this.crowi.model('Page');
  562. const {
  563. GRANT_PUBLIC, GRANT_RESTRICTED, GRANT_SPECIFIED, GRANT_OWNER, GRANT_USER_GROUP,
  564. } = Page;
  565. const grantConditions = [
  566. { term: { grant: GRANT_PUBLIC } },
  567. ];
  568. // ensure to hit to GRANT_RESTRICTED pages that the user specified at own
  569. if (user != null) {
  570. grantConditions.push(
  571. {
  572. bool: {
  573. must: [
  574. { term: { grant: GRANT_RESTRICTED } },
  575. { term: { granted_users: user._id.toString() } },
  576. ],
  577. },
  578. },
  579. );
  580. }
  581. if (showPagesRestrictedByOwner) {
  582. grantConditions.push(
  583. { term: { grant: GRANT_SPECIFIED } },
  584. { term: { grant: GRANT_OWNER } },
  585. );
  586. }
  587. else if (user != null) {
  588. grantConditions.push(
  589. {
  590. bool: {
  591. must: [
  592. { term: { grant: GRANT_SPECIFIED } },
  593. { term: { granted_users: user._id.toString() } },
  594. ],
  595. },
  596. },
  597. {
  598. bool: {
  599. must: [
  600. { term: { grant: GRANT_OWNER } },
  601. { term: { granted_users: user._id.toString() } },
  602. ],
  603. },
  604. },
  605. );
  606. }
  607. if (showPagesRestrictedByGroup) {
  608. grantConditions.push(
  609. { term: { grant: GRANT_USER_GROUP } },
  610. );
  611. }
  612. else if (userGroups != null && userGroups.length > 0) {
  613. const userGroupIds = userGroups.map((group) => { return group._id.toString() });
  614. grantConditions.push(
  615. {
  616. bool: {
  617. must: [
  618. { term: { grant: GRANT_USER_GROUP } },
  619. { terms: { granted_group: userGroupIds } },
  620. ],
  621. },
  622. },
  623. );
  624. }
  625. query.body.query.bool.filter.push({ bool: { should: grantConditions } });
  626. };
  627. SearchClient.prototype.filterPortalPages = function(query) {
  628. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  629. query.body.query.bool.must_not.push(this.queries.USER);
  630. query.body.query.bool.filter.push(this.queries.PORTAL);
  631. };
  632. SearchClient.prototype.filterPublicPages = function(query) {
  633. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  634. query.body.query.bool.must_not.push(this.queries.USER);
  635. query.body.query.bool.filter.push(this.queries.PUBLIC);
  636. };
  637. SearchClient.prototype.filterUserPages = function(query) {
  638. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  639. query.body.query.bool.filter.push(this.queries.USER);
  640. };
  641. SearchClient.prototype.filterPagesByType = function(query, type) {
  642. const Page = this.crowi.model('Page');
  643. switch (type) {
  644. case Page.TYPE_PORTAL:
  645. return this.filterPortalPages(query);
  646. case Page.TYPE_PUBLIC:
  647. return this.filterPublicPages(query);
  648. case Page.TYPE_USER:
  649. return this.filterUserPages(query);
  650. default:
  651. return query;
  652. }
  653. };
  654. SearchClient.prototype.appendFunctionScore = function(query, queryString) {
  655. const User = this.crowi.model('User');
  656. const count = User.count({}) || 1;
  657. const minScore = queryString.length * 0.1 - 1; // increase with length
  658. logger.debug('min_score: ', minScore);
  659. query.body.query = {
  660. function_score: {
  661. query: { ...query.body.query },
  662. // // disable min_score -- 2019.02.28 Yuki Takei
  663. // // more precise adjustment is needed...
  664. // min_score: minScore,
  665. field_value_factor: {
  666. field: 'bookmark_count',
  667. modifier: 'log1p',
  668. factor: 10000 / count,
  669. missing: 0,
  670. },
  671. boost_mode: 'sum',
  672. },
  673. };
  674. };
  675. SearchClient.prototype.searchKeyword = async function(queryString, user, userGroups, option) {
  676. const from = option.offset || null;
  677. const size = option.limit || null;
  678. const type = option.type || null;
  679. const query = this.createSearchQuerySortedByScore();
  680. this.appendCriteriaForQueryString(query, queryString);
  681. this.filterPagesByType(query, type);
  682. await this.filterPagesByViewer(query, user, userGroups);
  683. this.appendResultSize(query, from, size);
  684. this.appendFunctionScore(query, queryString);
  685. return this.search(query);
  686. };
  687. SearchClient.prototype.parseQueryString = function(queryString) {
  688. const matchWords = [];
  689. const notMatchWords = [];
  690. const phraseWords = [];
  691. const notPhraseWords = [];
  692. const prefixPaths = [];
  693. const notPrefixPaths = [];
  694. const tags = [];
  695. const notTags = [];
  696. queryString.trim();
  697. queryString = queryString.replace(/\s+/g, ' '); // eslint-disable-line no-param-reassign
  698. // First: Parse phrase keywords
  699. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  700. const phrases = queryString.match(phraseRegExp);
  701. if (phrases !== null) {
  702. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  703. phrases.forEach((phrase) => {
  704. phrase.trim();
  705. if (phrase.match(/^-/)) {
  706. notPhraseWords.push(phrase.replace(/^-/, ''));
  707. }
  708. else {
  709. phraseWords.push(phrase);
  710. }
  711. });
  712. }
  713. // Second: Parse other keywords (include minus keywords)
  714. queryString.split(' ').forEach((word) => {
  715. if (word === '') {
  716. return;
  717. }
  718. // https://regex101.com/r/pN9XfK/1
  719. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  720. // https://regex101.com/r/3qw9FQ/1
  721. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  722. if (matchNegative != null) {
  723. if (matchNegative[1] === 'prefix:') {
  724. notPrefixPaths.push(matchNegative[2]);
  725. }
  726. else if (matchNegative[1] === 'tag:') {
  727. notTags.push(matchNegative[2]);
  728. }
  729. else {
  730. notMatchWords.push(matchNegative[2]);
  731. }
  732. }
  733. else if (matchPositive != null) {
  734. if (matchPositive[1] === 'prefix:') {
  735. prefixPaths.push(matchPositive[2]);
  736. }
  737. else if (matchPositive[1] === 'tag:') {
  738. tags.push(matchPositive[2]);
  739. }
  740. else {
  741. matchWords.push(matchPositive[2]);
  742. }
  743. }
  744. });
  745. return {
  746. match: matchWords,
  747. not_match: notMatchWords,
  748. phrase: phraseWords,
  749. not_phrase: notPhraseWords,
  750. prefix: prefixPaths,
  751. not_prefix: notPrefixPaths,
  752. tag: tags,
  753. not_tag: notTags,
  754. };
  755. };
  756. SearchClient.prototype.syncPageUpdated = async function(page, user) {
  757. logger.debug('SearchClient.syncPageUpdated', page.path);
  758. // delete if page should not indexed
  759. if (!this.shouldIndexed(page)) {
  760. try {
  761. await this.deletePages([page]);
  762. }
  763. catch (err) {
  764. logger.error('deletePages:ES Error', err);
  765. }
  766. return;
  767. }
  768. return this.updateOrInsertPageById(page._id);
  769. };
  770. SearchClient.prototype.syncPageDeleted = async function(page, user) {
  771. debug('SearchClient.syncPageDeleted', page.path);
  772. try {
  773. return await this.deletePages([page]);
  774. }
  775. catch (err) {
  776. logger.error('deletePages:ES Error', err);
  777. }
  778. };
  779. SearchClient.prototype.syncBookmarkChanged = async function(pageId) {
  780. logger.debug('SearchClient.syncBookmarkChanged', pageId);
  781. return this.updateOrInsertPageById(pageId);
  782. };
  783. SearchClient.prototype.syncTagChanged = async function(page) {
  784. logger.debug('SearchClient.syncTagChanged', page.path);
  785. return this.updateOrInsertPageById(page._id);
  786. };
  787. module.exports = SearchClient;