search.js 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919
  1. /**
  2. * Search
  3. */
  4. const elasticsearch = require('elasticsearch');
  5. const debug = require('debug')('growi:lib:search');
  6. const logger = require('@alias/logger')('growi:lib:search');
  7. const {
  8. Writable, Transform,
  9. } = require('stream');
  10. const streamToPromise = require('stream-to-promise');
  11. const { createBatchStream } = require('./batch-stream');
  12. const BULK_REINDEX_SIZE = 100;
  13. function SearchClient(crowi, esUri) {
  14. this.DEFAULT_OFFSET = 0;
  15. this.DEFAULT_LIMIT = 50;
  16. this.esNodeName = '-';
  17. this.esNodeNames = [];
  18. this.esVersion = 'unknown';
  19. this.esVersions = [];
  20. this.esPlugin = [];
  21. this.esPlugins = [];
  22. this.esUri = esUri;
  23. this.crowi = crowi;
  24. this.searchEvent = crowi.event('search');
  25. this.configManager = this.crowi.configManager;
  26. // In Elasticsearch RegExp, we don't need to used ^ and $.
  27. // Ref: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-regexp-query.html#_standard_operators
  28. this.queries = {
  29. PORTAL: {
  30. regexp: {
  31. 'path.raw': '.*/',
  32. },
  33. },
  34. PUBLIC: {
  35. regexp: {
  36. 'path.raw': '.*[^/]',
  37. },
  38. },
  39. USER: {
  40. prefix: {
  41. 'path.raw': '/user/',
  42. },
  43. },
  44. };
  45. const uri = this.parseUri(this.esUri);
  46. this.host = uri.host;
  47. this.indexName = uri.indexName;
  48. this.aliasName = `${this.indexName}-alias`;
  49. this.client = new elasticsearch.Client({
  50. host: this.host,
  51. requestTimeout: 5000,
  52. // log: 'debug',
  53. });
  54. this.registerUpdateEvent();
  55. this.mappingFile = `${crowi.resourceDir}search/mappings.json`;
  56. }
  57. SearchClient.prototype.getInfo = function() {
  58. return this.client.info({});
  59. };
  60. SearchClient.prototype.checkESVersion = async function() {
  61. try {
  62. const nodes = await this.client.nodes.info();
  63. if (!nodes._nodes || !nodes.nodes) {
  64. throw new Error('no nodes info');
  65. }
  66. for (const [nodeName, nodeInfo] of Object.entries(nodes.nodes)) {
  67. this.esNodeName = nodeName;
  68. this.esNodeNames.push(nodeName);
  69. this.esVersion = nodeInfo.version;
  70. this.esVersions.push(nodeInfo.version);
  71. this.esPlugin = nodeInfo.plugins;
  72. this.esPlugins.push(nodeInfo.plugins);
  73. }
  74. }
  75. catch (error) {
  76. logger.error('es check version error:', error);
  77. }
  78. };
  79. SearchClient.prototype.registerUpdateEvent = function() {
  80. const pageEvent = this.crowi.event('page');
  81. pageEvent.on('create', this.syncPageUpdated.bind(this));
  82. pageEvent.on('update', this.syncPageUpdated.bind(this));
  83. pageEvent.on('delete', this.syncPageDeleted.bind(this));
  84. const bookmarkEvent = this.crowi.event('bookmark');
  85. bookmarkEvent.on('create', this.syncBookmarkChanged.bind(this));
  86. bookmarkEvent.on('delete', this.syncBookmarkChanged.bind(this));
  87. const tagEvent = this.crowi.event('tag');
  88. tagEvent.on('update', this.syncTagChanged.bind(this));
  89. };
  90. SearchClient.prototype.shouldIndexed = function(page) {
  91. return page.creator != null && page.revision != null && page.redirectTo == null;
  92. };
  93. // BONSAI_URL is following format:
  94. // => https://{ID}:{PASSWORD}@{HOST}
  95. SearchClient.prototype.parseUri = function(uri) {
  96. let indexName = 'crowi';
  97. let host = uri;
  98. const match = uri.match(/^(https?:\/\/[^/]+)\/(.+)$/);
  99. if (match) {
  100. host = match[1];
  101. indexName = match[2];
  102. }
  103. return {
  104. host,
  105. indexName,
  106. };
  107. };
  108. SearchClient.prototype.initIndices = async function() {
  109. await this.checkESVersion();
  110. const { client, indexName, aliasName } = this;
  111. const tmpIndexName = `${indexName}-tmp`;
  112. // remove tmp index
  113. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  114. if (isExistsTmpIndex) {
  115. await client.indices.delete({ index: tmpIndexName });
  116. }
  117. // create index
  118. const isExistsIndex = await client.indices.exists({ index: indexName });
  119. if (!isExistsIndex) {
  120. await this.createIndex(indexName);
  121. }
  122. // create alias
  123. const isExistsAlias = await client.indices.existsAlias({ name: aliasName, index: indexName });
  124. if (!isExistsAlias) {
  125. await client.indices.putAlias({
  126. name: aliasName,
  127. index: indexName,
  128. });
  129. }
  130. };
  131. SearchClient.prototype.createIndex = async function(index) {
  132. const body = require(this.mappingFile);
  133. return this.client.indices.create({ index, body });
  134. };
  135. SearchClient.prototype.buildIndex = async function(uri) {
  136. await this.initIndices();
  137. const { client, indexName } = this;
  138. const aliasName = `${indexName}-alias`;
  139. const tmpIndexName = `${indexName}-tmp`;
  140. // reindex to tmp index
  141. await this.createIndex(tmpIndexName);
  142. await client.reindex({
  143. waitForCompletion: false,
  144. body: {
  145. source: { index: indexName },
  146. dest: { index: tmpIndexName },
  147. },
  148. });
  149. // update alias
  150. await client.indices.updateAliases({
  151. body: {
  152. actions: [
  153. { add: { alias: aliasName, index: tmpIndexName } },
  154. { remove: { alias: aliasName, index: indexName } },
  155. ],
  156. },
  157. });
  158. // flush index
  159. await client.indices.delete({
  160. index: indexName,
  161. });
  162. await this.createIndex(indexName);
  163. await this.addAllPages();
  164. // update alias
  165. await client.indices.updateAliases({
  166. body: {
  167. actions: [
  168. { add: { alias: aliasName, index: indexName } },
  169. { remove: { alias: aliasName, index: tmpIndexName } },
  170. ],
  171. },
  172. });
  173. // remove tmp index
  174. await client.indices.delete({ index: tmpIndexName });
  175. };
  176. /**
  177. * generate object that is related to page.grant*
  178. */
  179. function generateDocContentsRelatedToRestriction(page) {
  180. let grantedUserIds = null;
  181. if (page.grantedUsers != null && page.grantedUsers.length > 0) {
  182. grantedUserIds = page.grantedUsers.map((user) => {
  183. const userId = (user._id == null) ? user : user._id;
  184. return userId.toString();
  185. });
  186. }
  187. let grantedGroupId = null;
  188. if (page.grantedGroup != null) {
  189. const groupId = (page.grantedGroup._id == null) ? page.grantedGroup : page.grantedGroup._id;
  190. grantedGroupId = groupId.toString();
  191. }
  192. return {
  193. grant: page.grant,
  194. granted_users: grantedUserIds,
  195. granted_group: grantedGroupId,
  196. };
  197. }
  198. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  199. if (!Array.isArray(body)) {
  200. throw new Error('Body must be an array.');
  201. }
  202. const command = {
  203. index: {
  204. _index: this.indexName,
  205. _type: 'pages',
  206. _id: page._id.toString(),
  207. },
  208. };
  209. const bookmarkCount = page.bookmarkCount || 0;
  210. let document = {
  211. path: page.path,
  212. body: page.revision.body,
  213. username: page.creator.username,
  214. comment_count: page.commentCount,
  215. bookmark_count: bookmarkCount,
  216. like_count: page.liker.length || 0,
  217. created_at: page.createdAt,
  218. updated_at: page.updatedAt,
  219. tag_names: page.tagNames,
  220. };
  221. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  222. body.push(command);
  223. body.push(document);
  224. };
  225. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  226. if (!Array.isArray(body)) {
  227. throw new Error('Body must be an array.');
  228. }
  229. const command = {
  230. delete: {
  231. _index: this.indexName,
  232. _type: 'pages',
  233. _id: page._id.toString(),
  234. },
  235. };
  236. body.push(command);
  237. };
  238. SearchClient.prototype.addAllPages = async function() {
  239. const Page = this.crowi.model('Page');
  240. return this.updateOrInsertPages(() => Page.find(), true);
  241. };
  242. SearchClient.prototype.updateOrInsertPageById = async function(pageId) {
  243. const Page = this.crowi.model('Page');
  244. return this.updateOrInsertPages(() => Page.findById(pageId));
  245. };
  246. /**
  247. * @param {function} queryFactory factory method to generate a Mongoose Query instance
  248. */
  249. SearchClient.prototype.updateOrInsertPages = async function(queryFactory, isEmittingProgressEvent = false) {
  250. const Page = this.crowi.model('Page');
  251. const { PageQueryBuilder } = Page;
  252. const Bookmark = this.crowi.model('Bookmark');
  253. const PageTagRelation = this.crowi.model('PageTagRelation');
  254. const searchEvent = this.searchEvent;
  255. // prepare functions invoked from custom streams
  256. const prepareBodyForCreate = this.prepareBodyForCreate.bind(this);
  257. const shouldIndexed = this.shouldIndexed.bind(this);
  258. const bulkWrite = this.client.bulk.bind(this.client);
  259. const findQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  260. const countQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  261. const totalCount = await countQuery.count();
  262. const readStream = findQuery
  263. // populate data which will be referenced by prepareBodyForCreate()
  264. .populate([
  265. { path: 'creator', model: 'User', select: 'username' },
  266. { path: 'revision', model: 'Revision', select: 'body' },
  267. ])
  268. .snapshot()
  269. .lean()
  270. .cursor();
  271. let skipped = 0;
  272. const thinOutStream = new Transform({
  273. objectMode: true,
  274. async transform(doc, encoding, callback) {
  275. if (shouldIndexed(doc)) {
  276. this.push(doc);
  277. }
  278. else {
  279. skipped++;
  280. }
  281. callback();
  282. },
  283. });
  284. const batchStream = createBatchStream(BULK_REINDEX_SIZE);
  285. const appendBookmarkCountStream = new Transform({
  286. objectMode: true,
  287. async transform(chunk, encoding, callback) {
  288. const pageIds = chunk.map(doc => doc._id);
  289. const idToCountMap = await Bookmark.getPageIdToCountMap(pageIds);
  290. const idsHavingCount = Object.keys(idToCountMap);
  291. // append count
  292. chunk
  293. .filter(doc => idsHavingCount.includes(doc._id.toString()))
  294. .forEach((doc) => {
  295. // append count from idToCountMap
  296. doc.bookmarkCount = idToCountMap[doc._id.toString()];
  297. });
  298. this.push(chunk);
  299. callback();
  300. },
  301. });
  302. const appendTagNamesStream = new Transform({
  303. objectMode: true,
  304. async transform(chunk, encoding, callback) {
  305. const pageIds = chunk.map(doc => doc._id);
  306. const idToTagNamesMap = await PageTagRelation.getIdToTagNamesMap(pageIds);
  307. const idsHavingTagNames = Object.keys(idToTagNamesMap);
  308. // append tagNames
  309. chunk
  310. .filter(doc => idsHavingTagNames.includes(doc._id.toString()))
  311. .forEach((doc) => {
  312. // append tagName from idToTagNamesMap
  313. doc.tagNames = idToTagNamesMap[doc._id.toString()];
  314. });
  315. this.push(chunk);
  316. callback();
  317. },
  318. });
  319. let count = 0;
  320. const writeStream = new Writable({
  321. objectMode: true,
  322. async write(batch, encoding, callback) {
  323. const body = [];
  324. batch.forEach(doc => prepareBodyForCreate(body, doc));
  325. try {
  326. const res = await bulkWrite({
  327. body,
  328. requestTimeout: Infinity,
  329. });
  330. count += (res.items || []).length;
  331. logger.info(`Adding pages progressing: (count=${count}, errors=${res.errors}, took=${res.took}ms)`);
  332. if (isEmittingProgressEvent) {
  333. searchEvent.emit('addPageProgress', totalCount, count, skipped);
  334. }
  335. }
  336. catch (err) {
  337. logger.error('addAllPages error on add anyway: ', err);
  338. }
  339. callback();
  340. },
  341. final(callback) {
  342. logger.info(`Adding pages has terminated: (totalCount=${totalCount}, skipped=${skipped})`);
  343. if (isEmittingProgressEvent) {
  344. searchEvent.emit('finishAddPage', totalCount, count, skipped);
  345. }
  346. callback();
  347. },
  348. });
  349. readStream
  350. .pipe(thinOutStream)
  351. .pipe(batchStream)
  352. .pipe(appendBookmarkCountStream)
  353. .pipe(appendTagNamesStream)
  354. .pipe(writeStream);
  355. return streamToPromise(writeStream);
  356. };
  357. SearchClient.prototype.deletePages = function(pages) {
  358. const self = this;
  359. const body = [];
  360. pages.map((page) => {
  361. self.prepareBodyForDelete(body, page);
  362. return;
  363. });
  364. logger.debug('deletePages(): Sending Request to ES', body);
  365. return this.client.bulk({
  366. body,
  367. });
  368. };
  369. /**
  370. * search returning type:
  371. * {
  372. * meta: { total: Integer, results: Integer},
  373. * data: [ pages ...],
  374. * }
  375. */
  376. SearchClient.prototype.search = async function(query) {
  377. // for debug
  378. if (process.env.NODE_ENV === 'development') {
  379. const result = await this.client.indices.validateQuery({
  380. explain: true,
  381. body: {
  382. query: query.body.query,
  383. },
  384. });
  385. logger.debug('ES returns explanations: ', result.explanations);
  386. }
  387. const result = await this.client.search(query);
  388. // for debug
  389. logger.debug('ES result: ', result);
  390. return {
  391. meta: {
  392. took: result.took,
  393. total: result.hits.total,
  394. results: result.hits.hits.length,
  395. },
  396. data: result.hits.hits.map((elm) => {
  397. return { _id: elm._id, _score: elm._score, _source: elm._source };
  398. }),
  399. };
  400. };
  401. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option) {
  402. // getting path by default is almost for debug
  403. let fields = ['path', 'bookmark_count', 'tag_names'];
  404. if (option) {
  405. fields = option.fields || fields;
  406. }
  407. // default is only id field, sorted by updated_at
  408. const query = {
  409. index: this.aliasName,
  410. type: 'pages',
  411. body: {
  412. sort: [{ updated_at: { order: 'desc' } }],
  413. query: {}, // query
  414. _source: fields,
  415. },
  416. };
  417. this.appendResultSize(query);
  418. return query;
  419. };
  420. SearchClient.prototype.createSearchQuerySortedByScore = function(option) {
  421. let fields = ['path', 'bookmark_count', 'tag_names'];
  422. if (option) {
  423. fields = option.fields || fields;
  424. }
  425. // sort by score
  426. const query = {
  427. index: this.aliasName,
  428. type: 'pages',
  429. body: {
  430. sort: [{ _score: { order: 'desc' } }],
  431. query: {}, // query
  432. _source: fields,
  433. },
  434. };
  435. this.appendResultSize(query);
  436. return query;
  437. };
  438. SearchClient.prototype.appendResultSize = function(query, from, size) {
  439. query.from = from || this.DEFAULT_OFFSET;
  440. query.size = size || this.DEFAULT_LIMIT;
  441. };
  442. SearchClient.prototype.initializeBoolQuery = function(query) {
  443. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  444. if (!query.body.query.bool) {
  445. query.body.query.bool = {};
  446. }
  447. const isInitialized = (query) => { return !!query && Array.isArray(query) };
  448. if (!isInitialized(query.body.query.bool.filter)) {
  449. query.body.query.bool.filter = [];
  450. }
  451. if (!isInitialized(query.body.query.bool.must)) {
  452. query.body.query.bool.must = [];
  453. }
  454. if (!isInitialized(query.body.query.bool.must_not)) {
  455. query.body.query.bool.must_not = [];
  456. }
  457. return query;
  458. };
  459. SearchClient.prototype.appendCriteriaForQueryString = function(query, queryString) {
  460. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  461. // parse
  462. const parsedKeywords = this.parseQueryString(queryString);
  463. if (parsedKeywords.match.length > 0) {
  464. const q = {
  465. multi_match: {
  466. query: parsedKeywords.match.join(' '),
  467. type: 'most_fields',
  468. fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
  469. },
  470. };
  471. query.body.query.bool.must.push(q);
  472. }
  473. if (parsedKeywords.not_match.length > 0) {
  474. const q = {
  475. multi_match: {
  476. query: parsedKeywords.not_match.join(' '),
  477. fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
  478. operator: 'or',
  479. },
  480. };
  481. query.body.query.bool.must_not.push(q);
  482. }
  483. if (parsedKeywords.phrase.length > 0) {
  484. const phraseQueries = [];
  485. parsedKeywords.phrase.forEach((phrase) => {
  486. phraseQueries.push({
  487. multi_match: {
  488. query: phrase, // each phrase is quoteted words
  489. type: 'phrase',
  490. fields: [
  491. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  492. 'path.raw^2',
  493. 'body',
  494. ],
  495. },
  496. });
  497. });
  498. query.body.query.bool.must.push(phraseQueries);
  499. }
  500. if (parsedKeywords.not_phrase.length > 0) {
  501. const notPhraseQueries = [];
  502. parsedKeywords.not_phrase.forEach((phrase) => {
  503. notPhraseQueries.push({
  504. multi_match: {
  505. query: phrase, // each phrase is quoteted words
  506. type: 'phrase',
  507. fields: [
  508. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  509. 'path.raw^2',
  510. 'body',
  511. ],
  512. },
  513. });
  514. });
  515. query.body.query.bool.must_not.push(notPhraseQueries);
  516. }
  517. if (parsedKeywords.prefix.length > 0) {
  518. const queries = parsedKeywords.prefix.map((path) => {
  519. return { prefix: { 'path.raw': path } };
  520. });
  521. query.body.query.bool.filter.push({ bool: { should: queries } });
  522. }
  523. if (parsedKeywords.not_prefix.length > 0) {
  524. const queries = parsedKeywords.not_prefix.map((path) => {
  525. return { prefix: { 'path.raw': path } };
  526. });
  527. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  528. }
  529. if (parsedKeywords.tag.length > 0) {
  530. const queries = parsedKeywords.tag.map((tag) => {
  531. return { term: { tag_names: tag } };
  532. });
  533. query.body.query.bool.filter.push({ bool: { must: queries } });
  534. }
  535. if (parsedKeywords.not_tag.length > 0) {
  536. const queries = parsedKeywords.not_tag.map((tag) => {
  537. return { term: { tag_names: tag } };
  538. });
  539. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  540. }
  541. };
  542. SearchClient.prototype.filterPagesByViewer = async function(query, user, userGroups) {
  543. const showPagesRestrictedByOwner = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByOwner');
  544. const showPagesRestrictedByGroup = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByGroup');
  545. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  546. const Page = this.crowi.model('Page');
  547. const {
  548. GRANT_PUBLIC, GRANT_RESTRICTED, GRANT_SPECIFIED, GRANT_OWNER, GRANT_USER_GROUP,
  549. } = Page;
  550. const grantConditions = [
  551. { term: { grant: GRANT_PUBLIC } },
  552. ];
  553. // ensure to hit to GRANT_RESTRICTED pages that the user specified at own
  554. if (user != null) {
  555. grantConditions.push(
  556. {
  557. bool: {
  558. must: [
  559. { term: { grant: GRANT_RESTRICTED } },
  560. { term: { granted_users: user._id.toString() } },
  561. ],
  562. },
  563. },
  564. );
  565. }
  566. if (showPagesRestrictedByOwner) {
  567. grantConditions.push(
  568. { term: { grant: GRANT_SPECIFIED } },
  569. { term: { grant: GRANT_OWNER } },
  570. );
  571. }
  572. else if (user != null) {
  573. grantConditions.push(
  574. {
  575. bool: {
  576. must: [
  577. { term: { grant: GRANT_SPECIFIED } },
  578. { term: { granted_users: user._id.toString() } },
  579. ],
  580. },
  581. },
  582. {
  583. bool: {
  584. must: [
  585. { term: { grant: GRANT_OWNER } },
  586. { term: { granted_users: user._id.toString() } },
  587. ],
  588. },
  589. },
  590. );
  591. }
  592. if (showPagesRestrictedByGroup) {
  593. grantConditions.push(
  594. { term: { grant: GRANT_USER_GROUP } },
  595. );
  596. }
  597. else if (userGroups != null && userGroups.length > 0) {
  598. const userGroupIds = userGroups.map((group) => { return group._id.toString() });
  599. grantConditions.push(
  600. {
  601. bool: {
  602. must: [
  603. { term: { grant: GRANT_USER_GROUP } },
  604. { terms: { granted_group: userGroupIds } },
  605. ],
  606. },
  607. },
  608. );
  609. }
  610. query.body.query.bool.filter.push({ bool: { should: grantConditions } });
  611. };
  612. SearchClient.prototype.filterPortalPages = function(query) {
  613. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  614. query.body.query.bool.must_not.push(this.queries.USER);
  615. query.body.query.bool.filter.push(this.queries.PORTAL);
  616. };
  617. SearchClient.prototype.filterPublicPages = function(query) {
  618. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  619. query.body.query.bool.must_not.push(this.queries.USER);
  620. query.body.query.bool.filter.push(this.queries.PUBLIC);
  621. };
  622. SearchClient.prototype.filterUserPages = function(query) {
  623. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  624. query.body.query.bool.filter.push(this.queries.USER);
  625. };
  626. SearchClient.prototype.filterPagesByType = function(query, type) {
  627. const Page = this.crowi.model('Page');
  628. switch (type) {
  629. case Page.TYPE_PORTAL:
  630. return this.filterPortalPages(query);
  631. case Page.TYPE_PUBLIC:
  632. return this.filterPublicPages(query);
  633. case Page.TYPE_USER:
  634. return this.filterUserPages(query);
  635. default:
  636. return query;
  637. }
  638. };
  639. SearchClient.prototype.appendFunctionScore = function(query, queryString) {
  640. const User = this.crowi.model('User');
  641. const count = User.count({}) || 1;
  642. const minScore = queryString.length * 0.1 - 1; // increase with length
  643. logger.debug('min_score: ', minScore);
  644. query.body.query = {
  645. function_score: {
  646. query: { ...query.body.query },
  647. // // disable min_score -- 2019.02.28 Yuki Takei
  648. // // more precise adjustment is needed...
  649. // min_score: minScore,
  650. field_value_factor: {
  651. field: 'bookmark_count',
  652. modifier: 'log1p',
  653. factor: 10000 / count,
  654. missing: 0,
  655. },
  656. boost_mode: 'sum',
  657. },
  658. };
  659. };
  660. SearchClient.prototype.searchKeyword = async function(queryString, user, userGroups, option) {
  661. const from = option.offset || null;
  662. const size = option.limit || null;
  663. const type = option.type || null;
  664. const query = this.createSearchQuerySortedByScore();
  665. this.appendCriteriaForQueryString(query, queryString);
  666. this.filterPagesByType(query, type);
  667. await this.filterPagesByViewer(query, user, userGroups);
  668. this.appendResultSize(query, from, size);
  669. this.appendFunctionScore(query, queryString);
  670. return this.search(query);
  671. };
  672. SearchClient.prototype.parseQueryString = function(queryString) {
  673. const matchWords = [];
  674. const notMatchWords = [];
  675. const phraseWords = [];
  676. const notPhraseWords = [];
  677. const prefixPaths = [];
  678. const notPrefixPaths = [];
  679. const tags = [];
  680. const notTags = [];
  681. queryString.trim();
  682. queryString = queryString.replace(/\s+/g, ' '); // eslint-disable-line no-param-reassign
  683. // First: Parse phrase keywords
  684. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  685. const phrases = queryString.match(phraseRegExp);
  686. if (phrases !== null) {
  687. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  688. phrases.forEach((phrase) => {
  689. phrase.trim();
  690. if (phrase.match(/^-/)) {
  691. notPhraseWords.push(phrase.replace(/^-/, ''));
  692. }
  693. else {
  694. phraseWords.push(phrase);
  695. }
  696. });
  697. }
  698. // Second: Parse other keywords (include minus keywords)
  699. queryString.split(' ').forEach((word) => {
  700. if (word === '') {
  701. return;
  702. }
  703. // https://regex101.com/r/pN9XfK/1
  704. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  705. // https://regex101.com/r/3qw9FQ/1
  706. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  707. if (matchNegative != null) {
  708. if (matchNegative[1] === 'prefix:') {
  709. notPrefixPaths.push(matchNegative[2]);
  710. }
  711. else if (matchNegative[1] === 'tag:') {
  712. notTags.push(matchNegative[2]);
  713. }
  714. else {
  715. notMatchWords.push(matchNegative[2]);
  716. }
  717. }
  718. else if (matchPositive != null) {
  719. if (matchPositive[1] === 'prefix:') {
  720. prefixPaths.push(matchPositive[2]);
  721. }
  722. else if (matchPositive[1] === 'tag:') {
  723. tags.push(matchPositive[2]);
  724. }
  725. else {
  726. matchWords.push(matchPositive[2]);
  727. }
  728. }
  729. });
  730. return {
  731. match: matchWords,
  732. not_match: notMatchWords,
  733. phrase: phraseWords,
  734. not_phrase: notPhraseWords,
  735. prefix: prefixPaths,
  736. not_prefix: notPrefixPaths,
  737. tag: tags,
  738. not_tag: notTags,
  739. };
  740. };
  741. SearchClient.prototype.syncPageUpdated = async function(page, user) {
  742. logger.debug('SearchClient.syncPageUpdated', page.path);
  743. // delete if page should not indexed
  744. if (!this.shouldIndexed(page)) {
  745. try {
  746. await this.deletePages([page]);
  747. }
  748. catch (err) {
  749. logger.error('deletePages:ES Error', err);
  750. }
  751. return;
  752. }
  753. return this.updateOrInsertPageById(page._id);
  754. };
  755. SearchClient.prototype.syncPageDeleted = async function(page, user) {
  756. debug('SearchClient.syncPageDeleted', page.path);
  757. try {
  758. return await this.deletePages([page]);
  759. }
  760. catch (err) {
  761. logger.error('deletePages:ES Error', err);
  762. }
  763. };
  764. SearchClient.prototype.syncBookmarkChanged = async function(pageId) {
  765. logger.debug('SearchClient.syncBookmarkChanged', pageId);
  766. return this.updateOrInsertPageById(pageId);
  767. };
  768. SearchClient.prototype.syncTagChanged = async function(page) {
  769. logger.debug('SearchClient.syncTagChanged', page.path);
  770. return this.updateOrInsertPageById(page._id);
  771. };
  772. module.exports = SearchClient;