search.js 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. /**
  2. * Search
  3. */
  4. const elasticsearch = require('elasticsearch');
  5. const debug = require('debug')('growi:lib:search');
  6. const logger = require('@alias/logger')('growi:lib:search');
  7. const { URL } = require('url');
  8. const {
  9. Writable, Transform,
  10. } = require('stream');
  11. const streamToPromise = require('stream-to-promise');
  12. const { createBatchStream } = require('./batch-stream');
  13. const BULK_REINDEX_SIZE = 100;
  14. function SearchClient(crowi, esUri) {
  15. this.DEFAULT_OFFSET = 0;
  16. this.DEFAULT_LIMIT = 50;
  17. this.esNodeName = '-';
  18. this.esNodeNames = [];
  19. this.esVersion = 'unknown';
  20. this.esVersions = [];
  21. this.esPlugin = [];
  22. this.esPlugins = [];
  23. this.esUri = esUri;
  24. this.crowi = crowi;
  25. this.searchEvent = crowi.event('search');
  26. this.configManager = this.crowi.configManager;
  27. // In Elasticsearch RegExp, we don't need to used ^ and $.
  28. // Ref: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-regexp-query.html#_standard_operators
  29. this.queries = {
  30. PORTAL: {
  31. regexp: {
  32. 'path.raw': '.*/',
  33. },
  34. },
  35. PUBLIC: {
  36. regexp: {
  37. 'path.raw': '.*[^/]',
  38. },
  39. },
  40. USER: {
  41. prefix: {
  42. 'path.raw': '/user/',
  43. },
  44. },
  45. };
  46. this.client = null;
  47. this.indexName = null;
  48. this.aliasName = null;
  49. this.mappingFile = `${crowi.resourceDir}search/mappings.json`;
  50. try {
  51. this.initClient();
  52. this.registerUpdateEvent();
  53. }
  54. catch (err) {
  55. logger.error(err);
  56. }
  57. }
  58. SearchClient.prototype.initClient = function() {
  59. let indexName = 'crowi';
  60. let host = this.esUri;
  61. let httpAuth = '';
  62. const isSearchboxSsl = this.configManager.getConfig('crowi', 'app:searchboxSslUrl') != null;
  63. const url = new URL(this.esUri);
  64. if (url.pathname !== '/') {
  65. host = isSearchboxSsl
  66. ? `${url.protocol}//${url.auth}${url.hostname}:443` // use 443 when Searchbox
  67. : `${url.protocol}//${url.host}`;
  68. indexName = url.pathname.substring(1); // omit heading slash
  69. if (url.auth != null) {
  70. httpAuth = url.auth;
  71. }
  72. }
  73. this.client = new elasticsearch.Client({
  74. host,
  75. httpAuth,
  76. requestTimeout: 5000,
  77. // log: 'debug',
  78. });
  79. this.indexName = indexName;
  80. this.aliasName = `${this.indexName}-alias`;
  81. };
  82. SearchClient.prototype.getInfo = function() {
  83. return this.client.info({});
  84. };
  85. SearchClient.prototype.checkESVersion = async function() {
  86. try {
  87. const nodes = await this.client.nodes.info();
  88. if (!nodes._nodes || !nodes.nodes) {
  89. throw new Error('no nodes info');
  90. }
  91. for (const [nodeName, nodeInfo] of Object.entries(nodes.nodes)) {
  92. this.esNodeName = nodeName;
  93. this.esNodeNames.push(nodeName);
  94. this.esVersion = nodeInfo.version;
  95. this.esVersions.push(nodeInfo.version);
  96. this.esPlugin = nodeInfo.plugins;
  97. this.esPlugins.push(nodeInfo.plugins);
  98. }
  99. }
  100. catch (error) {
  101. logger.error('es check version error:', error);
  102. }
  103. };
  104. SearchClient.prototype.registerUpdateEvent = function() {
  105. const pageEvent = this.crowi.event('page');
  106. pageEvent.on('create', this.syncPageUpdated.bind(this));
  107. pageEvent.on('update', this.syncPageUpdated.bind(this));
  108. pageEvent.on('delete', this.syncPageDeleted.bind(this));
  109. const bookmarkEvent = this.crowi.event('bookmark');
  110. bookmarkEvent.on('create', this.syncBookmarkChanged.bind(this));
  111. bookmarkEvent.on('delete', this.syncBookmarkChanged.bind(this));
  112. const tagEvent = this.crowi.event('tag');
  113. tagEvent.on('update', this.syncTagChanged.bind(this));
  114. };
  115. SearchClient.prototype.shouldIndexed = function(page) {
  116. return page.creator != null && page.revision != null && page.redirectTo == null;
  117. };
  118. SearchClient.prototype.initIndices = async function() {
  119. await this.checkESVersion();
  120. const { client, indexName, aliasName } = this;
  121. const tmpIndexName = `${indexName}-tmp`;
  122. // remove tmp index
  123. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  124. if (isExistsTmpIndex) {
  125. await client.indices.delete({ index: tmpIndexName });
  126. }
  127. // create index
  128. const isExistsIndex = await client.indices.exists({ index: indexName });
  129. if (!isExistsIndex) {
  130. await this.createIndex(indexName);
  131. }
  132. // create alias
  133. const isExistsAlias = await client.indices.existsAlias({ name: aliasName, index: indexName });
  134. if (!isExistsAlias) {
  135. await client.indices.putAlias({
  136. name: aliasName,
  137. index: indexName,
  138. });
  139. }
  140. };
  141. SearchClient.prototype.createIndex = async function(index) {
  142. const body = require(this.mappingFile);
  143. return this.client.indices.create({ index, body });
  144. };
  145. SearchClient.prototype.buildIndex = async function() {
  146. await this.initIndices();
  147. const isSearchboxSsl = this.configManager.getConfig('crowi', 'app:searchboxSslUrl') != null;
  148. return isSearchboxSsl
  149. ? this.buildIndexForSearchbox()
  150. : this.buildIndexDefault();
  151. };
  152. SearchClient.prototype.buildIndexDefault = async function() {
  153. const { client, indexName, aliasName } = this;
  154. const tmpIndexName = `${indexName}-tmp`;
  155. // reindex to tmp index
  156. await this.createIndex(tmpIndexName);
  157. await client.reindex({
  158. waitForCompletion: false,
  159. body: {
  160. source: { index: indexName },
  161. dest: { index: tmpIndexName },
  162. },
  163. });
  164. // update alias
  165. await client.indices.updateAliases({
  166. body: {
  167. actions: [
  168. { add: { alias: aliasName, index: tmpIndexName } },
  169. { remove: { alias: aliasName, index: indexName } },
  170. ],
  171. },
  172. });
  173. // flush index
  174. await client.indices.delete({
  175. index: indexName,
  176. });
  177. await this.createIndex(indexName);
  178. await this.addAllPages();
  179. // update alias
  180. await client.indices.updateAliases({
  181. body: {
  182. actions: [
  183. { add: { alias: aliasName, index: indexName } },
  184. { remove: { alias: aliasName, index: tmpIndexName } },
  185. ],
  186. },
  187. });
  188. // remove tmp index
  189. await client.indices.delete({ index: tmpIndexName });
  190. };
  191. SearchClient.prototype.buildIndexForSearchbox = async function() {
  192. const { client, indexName, aliasName } = this;
  193. // flush index
  194. await client.indices.delete({
  195. index: indexName,
  196. });
  197. await this.createIndex(indexName);
  198. await this.addAllPages();
  199. // update alias
  200. await client.indices.updateAliases({
  201. body: {
  202. actions: [
  203. { add: { alias: aliasName, index: indexName } },
  204. ],
  205. },
  206. });
  207. };
  208. /**
  209. * generate object that is related to page.grant*
  210. */
  211. function generateDocContentsRelatedToRestriction(page) {
  212. let grantedUserIds = null;
  213. if (page.grantedUsers != null && page.grantedUsers.length > 0) {
  214. grantedUserIds = page.grantedUsers.map((user) => {
  215. const userId = (user._id == null) ? user : user._id;
  216. return userId.toString();
  217. });
  218. }
  219. let grantedGroupId = null;
  220. if (page.grantedGroup != null) {
  221. const groupId = (page.grantedGroup._id == null) ? page.grantedGroup : page.grantedGroup._id;
  222. grantedGroupId = groupId.toString();
  223. }
  224. return {
  225. grant: page.grant,
  226. granted_users: grantedUserIds,
  227. granted_group: grantedGroupId,
  228. };
  229. }
  230. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  231. if (!Array.isArray(body)) {
  232. throw new Error('Body must be an array.');
  233. }
  234. const command = {
  235. index: {
  236. _index: this.indexName,
  237. _type: 'pages',
  238. _id: page._id.toString(),
  239. },
  240. };
  241. const bookmarkCount = page.bookmarkCount || 0;
  242. let document = {
  243. path: page.path,
  244. body: page.revision.body,
  245. username: page.creator.username,
  246. comment_count: page.commentCount,
  247. bookmark_count: bookmarkCount,
  248. like_count: page.liker.length || 0,
  249. created_at: page.createdAt,
  250. updated_at: page.updatedAt,
  251. tag_names: page.tagNames,
  252. };
  253. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  254. body.push(command);
  255. body.push(document);
  256. };
  257. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  258. if (!Array.isArray(body)) {
  259. throw new Error('Body must be an array.');
  260. }
  261. const command = {
  262. delete: {
  263. _index: this.indexName,
  264. _type: 'pages',
  265. _id: page._id.toString(),
  266. },
  267. };
  268. body.push(command);
  269. };
  270. SearchClient.prototype.addAllPages = async function() {
  271. const Page = this.crowi.model('Page');
  272. return this.updateOrInsertPages(() => Page.find(), true);
  273. };
  274. SearchClient.prototype.updateOrInsertPageById = async function(pageId) {
  275. const Page = this.crowi.model('Page');
  276. return this.updateOrInsertPages(() => Page.findById(pageId));
  277. };
  278. /**
  279. * @param {function} queryFactory factory method to generate a Mongoose Query instance
  280. */
  281. SearchClient.prototype.updateOrInsertPages = async function(queryFactory, isEmittingProgressEvent = false) {
  282. const Page = this.crowi.model('Page');
  283. const { PageQueryBuilder } = Page;
  284. const Bookmark = this.crowi.model('Bookmark');
  285. const PageTagRelation = this.crowi.model('PageTagRelation');
  286. const searchEvent = this.searchEvent;
  287. // prepare functions invoked from custom streams
  288. const prepareBodyForCreate = this.prepareBodyForCreate.bind(this);
  289. const shouldIndexed = this.shouldIndexed.bind(this);
  290. const bulkWrite = this.client.bulk.bind(this.client);
  291. const findQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  292. const countQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  293. const totalCount = await countQuery.count();
  294. const readStream = findQuery
  295. // populate data which will be referenced by prepareBodyForCreate()
  296. .populate([
  297. { path: 'creator', model: 'User', select: 'username' },
  298. { path: 'revision', model: 'Revision', select: 'body' },
  299. ])
  300. .snapshot()
  301. .lean()
  302. .cursor();
  303. let skipped = 0;
  304. const thinOutStream = new Transform({
  305. objectMode: true,
  306. async transform(doc, encoding, callback) {
  307. if (shouldIndexed(doc)) {
  308. this.push(doc);
  309. }
  310. else {
  311. skipped++;
  312. }
  313. callback();
  314. },
  315. });
  316. const batchStream = createBatchStream(BULK_REINDEX_SIZE);
  317. const appendBookmarkCountStream = new Transform({
  318. objectMode: true,
  319. async transform(chunk, encoding, callback) {
  320. const pageIds = chunk.map(doc => doc._id);
  321. const idToCountMap = await Bookmark.getPageIdToCountMap(pageIds);
  322. const idsHavingCount = Object.keys(idToCountMap);
  323. // append count
  324. chunk
  325. .filter(doc => idsHavingCount.includes(doc._id.toString()))
  326. .forEach((doc) => {
  327. // append count from idToCountMap
  328. doc.bookmarkCount = idToCountMap[doc._id.toString()];
  329. });
  330. this.push(chunk);
  331. callback();
  332. },
  333. });
  334. const appendTagNamesStream = new Transform({
  335. objectMode: true,
  336. async transform(chunk, encoding, callback) {
  337. const pageIds = chunk.map(doc => doc._id);
  338. const idToTagNamesMap = await PageTagRelation.getIdToTagNamesMap(pageIds);
  339. const idsHavingTagNames = Object.keys(idToTagNamesMap);
  340. // append tagNames
  341. chunk
  342. .filter(doc => idsHavingTagNames.includes(doc._id.toString()))
  343. .forEach((doc) => {
  344. // append tagName from idToTagNamesMap
  345. doc.tagNames = idToTagNamesMap[doc._id.toString()];
  346. });
  347. this.push(chunk);
  348. callback();
  349. },
  350. });
  351. let count = 0;
  352. const writeStream = new Writable({
  353. objectMode: true,
  354. async write(batch, encoding, callback) {
  355. const body = [];
  356. batch.forEach(doc => prepareBodyForCreate(body, doc));
  357. try {
  358. const res = await bulkWrite({
  359. body,
  360. requestTimeout: Infinity,
  361. });
  362. count += (res.items || []).length;
  363. logger.info(`Adding pages progressing: (count=${count}, errors=${res.errors}, took=${res.took}ms)`);
  364. if (isEmittingProgressEvent) {
  365. searchEvent.emit('addPageProgress', totalCount, count, skipped);
  366. }
  367. }
  368. catch (err) {
  369. logger.error('addAllPages error on add anyway: ', err);
  370. }
  371. callback();
  372. },
  373. final(callback) {
  374. logger.info(`Adding pages has terminated: (totalCount=${totalCount}, skipped=${skipped})`);
  375. if (isEmittingProgressEvent) {
  376. searchEvent.emit('finishAddPage', totalCount, count, skipped);
  377. }
  378. callback();
  379. },
  380. });
  381. readStream
  382. .pipe(thinOutStream)
  383. .pipe(batchStream)
  384. .pipe(appendBookmarkCountStream)
  385. .pipe(appendTagNamesStream)
  386. .pipe(writeStream);
  387. return streamToPromise(writeStream);
  388. };
  389. SearchClient.prototype.deletePages = function(pages) {
  390. const self = this;
  391. const body = [];
  392. pages.map((page) => {
  393. self.prepareBodyForDelete(body, page);
  394. return;
  395. });
  396. logger.debug('deletePages(): Sending Request to ES', body);
  397. return this.client.bulk({
  398. body,
  399. });
  400. };
  401. /**
  402. * search returning type:
  403. * {
  404. * meta: { total: Integer, results: Integer},
  405. * data: [ pages ...],
  406. * }
  407. */
  408. SearchClient.prototype.search = async function(query) {
  409. // for debug
  410. if (process.env.NODE_ENV === 'development') {
  411. const result = await this.client.indices.validateQuery({
  412. explain: true,
  413. body: {
  414. query: query.body.query,
  415. },
  416. });
  417. logger.debug('ES returns explanations: ', result.explanations);
  418. }
  419. const result = await this.client.search(query);
  420. // for debug
  421. logger.debug('ES result: ', result);
  422. return {
  423. meta: {
  424. took: result.took,
  425. total: result.hits.total,
  426. results: result.hits.hits.length,
  427. },
  428. data: result.hits.hits.map((elm) => {
  429. return { _id: elm._id, _score: elm._score, _source: elm._source };
  430. }),
  431. };
  432. };
  433. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option) {
  434. // getting path by default is almost for debug
  435. let fields = ['path', 'bookmark_count', 'tag_names'];
  436. if (option) {
  437. fields = option.fields || fields;
  438. }
  439. // default is only id field, sorted by updated_at
  440. const query = {
  441. index: this.aliasName,
  442. type: 'pages',
  443. body: {
  444. sort: [{ updated_at: { order: 'desc' } }],
  445. query: {}, // query
  446. _source: fields,
  447. },
  448. };
  449. this.appendResultSize(query);
  450. return query;
  451. };
  452. SearchClient.prototype.createSearchQuerySortedByScore = function(option) {
  453. let fields = ['path', 'bookmark_count', 'tag_names'];
  454. if (option) {
  455. fields = option.fields || fields;
  456. }
  457. // sort by score
  458. const query = {
  459. index: this.aliasName,
  460. type: 'pages',
  461. body: {
  462. sort: [{ _score: { order: 'desc' } }],
  463. query: {}, // query
  464. _source: fields,
  465. },
  466. };
  467. this.appendResultSize(query);
  468. return query;
  469. };
  470. SearchClient.prototype.appendResultSize = function(query, from, size) {
  471. query.from = from || this.DEFAULT_OFFSET;
  472. query.size = size || this.DEFAULT_LIMIT;
  473. };
  474. SearchClient.prototype.initializeBoolQuery = function(query) {
  475. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  476. if (!query.body.query.bool) {
  477. query.body.query.bool = {};
  478. }
  479. const isInitialized = (query) => { return !!query && Array.isArray(query) };
  480. if (!isInitialized(query.body.query.bool.filter)) {
  481. query.body.query.bool.filter = [];
  482. }
  483. if (!isInitialized(query.body.query.bool.must)) {
  484. query.body.query.bool.must = [];
  485. }
  486. if (!isInitialized(query.body.query.bool.must_not)) {
  487. query.body.query.bool.must_not = [];
  488. }
  489. return query;
  490. };
  491. SearchClient.prototype.appendCriteriaForQueryString = function(query, queryString) {
  492. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  493. // parse
  494. const parsedKeywords = this.parseQueryString(queryString);
  495. if (parsedKeywords.match.length > 0) {
  496. const q = {
  497. multi_match: {
  498. query: parsedKeywords.match.join(' '),
  499. type: 'most_fields',
  500. fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
  501. },
  502. };
  503. query.body.query.bool.must.push(q);
  504. }
  505. if (parsedKeywords.not_match.length > 0) {
  506. const q = {
  507. multi_match: {
  508. query: parsedKeywords.not_match.join(' '),
  509. fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
  510. operator: 'or',
  511. },
  512. };
  513. query.body.query.bool.must_not.push(q);
  514. }
  515. if (parsedKeywords.phrase.length > 0) {
  516. const phraseQueries = [];
  517. parsedKeywords.phrase.forEach((phrase) => {
  518. phraseQueries.push({
  519. multi_match: {
  520. query: phrase, // each phrase is quoteted words
  521. type: 'phrase',
  522. fields: [
  523. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  524. 'path.raw^2',
  525. 'body',
  526. ],
  527. },
  528. });
  529. });
  530. query.body.query.bool.must.push(phraseQueries);
  531. }
  532. if (parsedKeywords.not_phrase.length > 0) {
  533. const notPhraseQueries = [];
  534. parsedKeywords.not_phrase.forEach((phrase) => {
  535. notPhraseQueries.push({
  536. multi_match: {
  537. query: phrase, // each phrase is quoteted words
  538. type: 'phrase',
  539. fields: [
  540. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  541. 'path.raw^2',
  542. 'body',
  543. ],
  544. },
  545. });
  546. });
  547. query.body.query.bool.must_not.push(notPhraseQueries);
  548. }
  549. if (parsedKeywords.prefix.length > 0) {
  550. const queries = parsedKeywords.prefix.map((path) => {
  551. return { prefix: { 'path.raw': path } };
  552. });
  553. query.body.query.bool.filter.push({ bool: { should: queries } });
  554. }
  555. if (parsedKeywords.not_prefix.length > 0) {
  556. const queries = parsedKeywords.not_prefix.map((path) => {
  557. return { prefix: { 'path.raw': path } };
  558. });
  559. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  560. }
  561. if (parsedKeywords.tag.length > 0) {
  562. const queries = parsedKeywords.tag.map((tag) => {
  563. return { term: { tag_names: tag } };
  564. });
  565. query.body.query.bool.filter.push({ bool: { must: queries } });
  566. }
  567. if (parsedKeywords.not_tag.length > 0) {
  568. const queries = parsedKeywords.not_tag.map((tag) => {
  569. return { term: { tag_names: tag } };
  570. });
  571. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  572. }
  573. };
  574. SearchClient.prototype.filterPagesByViewer = async function(query, user, userGroups) {
  575. const showPagesRestrictedByOwner = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByOwner');
  576. const showPagesRestrictedByGroup = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByGroup');
  577. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  578. const Page = this.crowi.model('Page');
  579. const {
  580. GRANT_PUBLIC, GRANT_RESTRICTED, GRANT_SPECIFIED, GRANT_OWNER, GRANT_USER_GROUP,
  581. } = Page;
  582. const grantConditions = [
  583. { term: { grant: GRANT_PUBLIC } },
  584. ];
  585. // ensure to hit to GRANT_RESTRICTED pages that the user specified at own
  586. if (user != null) {
  587. grantConditions.push(
  588. {
  589. bool: {
  590. must: [
  591. { term: { grant: GRANT_RESTRICTED } },
  592. { term: { granted_users: user._id.toString() } },
  593. ],
  594. },
  595. },
  596. );
  597. }
  598. if (showPagesRestrictedByOwner) {
  599. grantConditions.push(
  600. { term: { grant: GRANT_SPECIFIED } },
  601. { term: { grant: GRANT_OWNER } },
  602. );
  603. }
  604. else if (user != null) {
  605. grantConditions.push(
  606. {
  607. bool: {
  608. must: [
  609. { term: { grant: GRANT_SPECIFIED } },
  610. { term: { granted_users: user._id.toString() } },
  611. ],
  612. },
  613. },
  614. {
  615. bool: {
  616. must: [
  617. { term: { grant: GRANT_OWNER } },
  618. { term: { granted_users: user._id.toString() } },
  619. ],
  620. },
  621. },
  622. );
  623. }
  624. if (showPagesRestrictedByGroup) {
  625. grantConditions.push(
  626. { term: { grant: GRANT_USER_GROUP } },
  627. );
  628. }
  629. else if (userGroups != null && userGroups.length > 0) {
  630. const userGroupIds = userGroups.map((group) => { return group._id.toString() });
  631. grantConditions.push(
  632. {
  633. bool: {
  634. must: [
  635. { term: { grant: GRANT_USER_GROUP } },
  636. { terms: { granted_group: userGroupIds } },
  637. ],
  638. },
  639. },
  640. );
  641. }
  642. query.body.query.bool.filter.push({ bool: { should: grantConditions } });
  643. };
  644. SearchClient.prototype.filterPortalPages = function(query) {
  645. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  646. query.body.query.bool.must_not.push(this.queries.USER);
  647. query.body.query.bool.filter.push(this.queries.PORTAL);
  648. };
  649. SearchClient.prototype.filterPublicPages = function(query) {
  650. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  651. query.body.query.bool.must_not.push(this.queries.USER);
  652. query.body.query.bool.filter.push(this.queries.PUBLIC);
  653. };
  654. SearchClient.prototype.filterUserPages = function(query) {
  655. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  656. query.body.query.bool.filter.push(this.queries.USER);
  657. };
  658. SearchClient.prototype.filterPagesByType = function(query, type) {
  659. const Page = this.crowi.model('Page');
  660. switch (type) {
  661. case Page.TYPE_PORTAL:
  662. return this.filterPortalPages(query);
  663. case Page.TYPE_PUBLIC:
  664. return this.filterPublicPages(query);
  665. case Page.TYPE_USER:
  666. return this.filterUserPages(query);
  667. default:
  668. return query;
  669. }
  670. };
  671. SearchClient.prototype.appendFunctionScore = function(query, queryString) {
  672. const User = this.crowi.model('User');
  673. const count = User.count({}) || 1;
  674. const minScore = queryString.length * 0.1 - 1; // increase with length
  675. logger.debug('min_score: ', minScore);
  676. query.body.query = {
  677. function_score: {
  678. query: { ...query.body.query },
  679. // // disable min_score -- 2019.02.28 Yuki Takei
  680. // // more precise adjustment is needed...
  681. // min_score: minScore,
  682. field_value_factor: {
  683. field: 'bookmark_count',
  684. modifier: 'log1p',
  685. factor: 10000 / count,
  686. missing: 0,
  687. },
  688. boost_mode: 'sum',
  689. },
  690. };
  691. };
  692. SearchClient.prototype.searchKeyword = async function(queryString, user, userGroups, option) {
  693. const from = option.offset || null;
  694. const size = option.limit || null;
  695. const type = option.type || null;
  696. const query = this.createSearchQuerySortedByScore();
  697. this.appendCriteriaForQueryString(query, queryString);
  698. this.filterPagesByType(query, type);
  699. await this.filterPagesByViewer(query, user, userGroups);
  700. this.appendResultSize(query, from, size);
  701. this.appendFunctionScore(query, queryString);
  702. return this.search(query);
  703. };
  704. SearchClient.prototype.parseQueryString = function(queryString) {
  705. const matchWords = [];
  706. const notMatchWords = [];
  707. const phraseWords = [];
  708. const notPhraseWords = [];
  709. const prefixPaths = [];
  710. const notPrefixPaths = [];
  711. const tags = [];
  712. const notTags = [];
  713. queryString.trim();
  714. queryString = queryString.replace(/\s+/g, ' '); // eslint-disable-line no-param-reassign
  715. // First: Parse phrase keywords
  716. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  717. const phrases = queryString.match(phraseRegExp);
  718. if (phrases !== null) {
  719. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  720. phrases.forEach((phrase) => {
  721. phrase.trim();
  722. if (phrase.match(/^-/)) {
  723. notPhraseWords.push(phrase.replace(/^-/, ''));
  724. }
  725. else {
  726. phraseWords.push(phrase);
  727. }
  728. });
  729. }
  730. // Second: Parse other keywords (include minus keywords)
  731. queryString.split(' ').forEach((word) => {
  732. if (word === '') {
  733. return;
  734. }
  735. // https://regex101.com/r/pN9XfK/1
  736. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  737. // https://regex101.com/r/3qw9FQ/1
  738. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  739. if (matchNegative != null) {
  740. if (matchNegative[1] === 'prefix:') {
  741. notPrefixPaths.push(matchNegative[2]);
  742. }
  743. else if (matchNegative[1] === 'tag:') {
  744. notTags.push(matchNegative[2]);
  745. }
  746. else {
  747. notMatchWords.push(matchNegative[2]);
  748. }
  749. }
  750. else if (matchPositive != null) {
  751. if (matchPositive[1] === 'prefix:') {
  752. prefixPaths.push(matchPositive[2]);
  753. }
  754. else if (matchPositive[1] === 'tag:') {
  755. tags.push(matchPositive[2]);
  756. }
  757. else {
  758. matchWords.push(matchPositive[2]);
  759. }
  760. }
  761. });
  762. return {
  763. match: matchWords,
  764. not_match: notMatchWords,
  765. phrase: phraseWords,
  766. not_phrase: notPhraseWords,
  767. prefix: prefixPaths,
  768. not_prefix: notPrefixPaths,
  769. tag: tags,
  770. not_tag: notTags,
  771. };
  772. };
  773. SearchClient.prototype.syncPageUpdated = async function(page, user) {
  774. logger.debug('SearchClient.syncPageUpdated', page.path);
  775. // delete if page should not indexed
  776. if (!this.shouldIndexed(page)) {
  777. try {
  778. await this.deletePages([page]);
  779. }
  780. catch (err) {
  781. logger.error('deletePages:ES Error', err);
  782. }
  783. return;
  784. }
  785. return this.updateOrInsertPageById(page._id);
  786. };
  787. SearchClient.prototype.syncPageDeleted = async function(page, user) {
  788. debug('SearchClient.syncPageDeleted', page.path);
  789. try {
  790. return await this.deletePages([page]);
  791. }
  792. catch (err) {
  793. logger.error('deletePages:ES Error', err);
  794. }
  795. };
  796. SearchClient.prototype.syncBookmarkChanged = async function(pageId) {
  797. logger.debug('SearchClient.syncBookmarkChanged', pageId);
  798. return this.updateOrInsertPageById(pageId);
  799. };
  800. SearchClient.prototype.syncTagChanged = async function(page) {
  801. logger.debug('SearchClient.syncTagChanged', page.path);
  802. return this.updateOrInsertPageById(page._id);
  803. };
  804. module.exports = SearchClient;