search.js 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934
  1. /**
  2. * Search
  3. */
  4. const elasticsearch = require('elasticsearch');
  5. const debug = require('debug')('growi:lib:search');
  6. const logger = require('@alias/logger')('growi:lib:search');
  7. const BULK_REINDEX_SIZE = 100;
  8. function SearchClient(crowi, esUri) {
  9. this.DEFAULT_OFFSET = 0;
  10. this.DEFAULT_LIMIT = 50;
  11. this.esNodeName = '-';
  12. this.esNodeNames = [];
  13. this.esVersion = 'unknown';
  14. this.esVersions = [];
  15. this.esPlugin = [];
  16. this.esPlugins = [];
  17. this.esUri = esUri;
  18. this.crowi = crowi;
  19. this.searchEvent = crowi.event('search');
  20. this.configManager = this.crowi.configManager;
  21. // In Elasticsearch RegExp, we don't need to used ^ and $.
  22. // Ref: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-regexp-query.html#_standard_operators
  23. this.queries = {
  24. PORTAL: {
  25. regexp: {
  26. 'path.raw': '.*/',
  27. },
  28. },
  29. PUBLIC: {
  30. regexp: {
  31. 'path.raw': '.*[^/]',
  32. },
  33. },
  34. USER: {
  35. prefix: {
  36. 'path.raw': '/user/',
  37. },
  38. },
  39. };
  40. const uri = this.parseUri(this.esUri);
  41. this.host = uri.host;
  42. this.indexName = uri.indexName;
  43. this.aliasName = `${this.indexName}-alias`;
  44. this.client = new elasticsearch.Client({
  45. host: this.host,
  46. requestTimeout: 5000,
  47. // log: 'debug',
  48. });
  49. this.registerUpdateEvent();
  50. this.mappingFile = `${crowi.resourceDir}search/mappings.json`;
  51. }
  52. SearchClient.prototype.getInfo = function() {
  53. return this.client.info({});
  54. };
  55. SearchClient.prototype.checkESVersion = async function() {
  56. try {
  57. const nodes = await this.client.nodes.info();
  58. if (!nodes._nodes || !nodes.nodes) {
  59. throw new Error('no nodes info');
  60. }
  61. for (const [nodeName, nodeInfo] of Object.entries(nodes.nodes)) {
  62. this.esNodeName = nodeName;
  63. this.esNodeNames.push(nodeName);
  64. this.esVersion = nodeInfo.version;
  65. this.esVersions.push(nodeInfo.version);
  66. this.esPlugin = nodeInfo.plugins;
  67. this.esPlugins.push(nodeInfo.plugins);
  68. }
  69. }
  70. catch (error) {
  71. logger.error('es check version error:', error);
  72. }
  73. };
  74. SearchClient.prototype.registerUpdateEvent = function() {
  75. const pageEvent = this.crowi.event('page');
  76. pageEvent.on('create', this.syncPageCreated.bind(this));
  77. pageEvent.on('update', this.syncPageUpdated.bind(this));
  78. pageEvent.on('updateTag', this.syncPageUpdated.bind(this));
  79. pageEvent.on('delete', this.syncPageDeleted.bind(this));
  80. const bookmarkEvent = this.crowi.event('bookmark');
  81. bookmarkEvent.on('create', this.syncBookmarkChanged.bind(this));
  82. bookmarkEvent.on('delete', this.syncBookmarkChanged.bind(this));
  83. const tagEvent = this.crowi.event('tag');
  84. tagEvent.on('update', this.syncTagChanged.bind(this));
  85. };
  86. SearchClient.prototype.shouldIndexed = function(page) {
  87. return (page.redirectTo == null);
  88. };
  89. // BONSAI_URL is following format:
  90. // => https://{ID}:{PASSWORD}@{HOST}
  91. SearchClient.prototype.parseUri = function(uri) {
  92. let indexName = 'crowi';
  93. let host = uri;
  94. const match = uri.match(/^(https?:\/\/[^/]+)\/(.+)$/);
  95. if (match) {
  96. host = match[1];
  97. indexName = match[2];
  98. }
  99. return {
  100. host,
  101. indexName,
  102. };
  103. };
  104. SearchClient.prototype.initIndices = async function() {
  105. await this.checkESVersion();
  106. const { client, indexName, aliasName } = this;
  107. const tmpIndexName = `${indexName}-tmp`;
  108. // remove tmp index
  109. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  110. if (isExistsTmpIndex) {
  111. await client.indices.delete({ index: tmpIndexName });
  112. }
  113. // create index
  114. const isExistsIndex = await client.indices.exists({ index: indexName });
  115. if (!isExistsIndex) {
  116. await this.createIndex(indexName);
  117. }
  118. // create alias
  119. const isExistsAlias = await client.indices.existsAlias({ name: aliasName, index: indexName });
  120. if (!isExistsAlias) {
  121. await client.indices.putAlias({
  122. name: aliasName,
  123. index: indexName,
  124. });
  125. }
  126. };
  127. SearchClient.prototype.createIndex = async function(index) {
  128. const body = require(this.mappingFile);
  129. return this.client.indices.create({ index, body });
  130. };
  131. SearchClient.prototype.buildIndex = async function(uri) {
  132. await this.initIndices();
  133. const { client, indexName } = this;
  134. const aliasName = `${indexName}-alias`;
  135. const tmpIndexName = `${indexName}-tmp`;
  136. // reindex to tmp index
  137. await this.createIndex(tmpIndexName);
  138. await client.reindex({
  139. waitForCompletion: false,
  140. body: {
  141. source: { index: indexName },
  142. dest: { index: tmpIndexName },
  143. },
  144. });
  145. // update alias
  146. await client.indices.updateAliases({
  147. body: {
  148. actions: [
  149. { add: { alias: aliasName, index: tmpIndexName } },
  150. { remove: { alias: aliasName, index: indexName } },
  151. ],
  152. },
  153. });
  154. // flush index
  155. await client.indices.delete({
  156. index: indexName,
  157. });
  158. await this.createIndex(indexName);
  159. await this.addAllPages();
  160. // update alias
  161. await client.indices.updateAliases({
  162. body: {
  163. actions: [
  164. { add: { alias: aliasName, index: indexName } },
  165. { remove: { alias: aliasName, index: tmpIndexName } },
  166. ],
  167. },
  168. });
  169. // remove tmp index
  170. await client.indices.delete({ index: tmpIndexName });
  171. };
  172. /**
  173. * generate object that is related to page.grant*
  174. */
  175. function generateDocContentsRelatedToRestriction(page) {
  176. let grantedUserIds = null;
  177. if (page.grantedUsers != null && page.grantedUsers.length > 0) {
  178. grantedUserIds = page.grantedUsers.map((user) => {
  179. const userId = (user._id == null) ? user : user._id;
  180. return userId.toString();
  181. });
  182. }
  183. let grantedGroupId = null;
  184. if (page.grantedGroup != null) {
  185. const groupId = (page.grantedGroup._id == null) ? page.grantedGroup : page.grantedGroup._id;
  186. grantedGroupId = groupId.toString();
  187. }
  188. return {
  189. grant: page.grant,
  190. granted_users: grantedUserIds,
  191. granted_group: grantedGroupId,
  192. };
  193. }
  194. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  195. if (!Array.isArray(body)) {
  196. throw new Error('Body must be an array.');
  197. }
  198. const command = {
  199. update: {
  200. _index: this.aliasName,
  201. _type: 'pages',
  202. _id: page._id.toString(),
  203. },
  204. };
  205. let document = {
  206. path: page.path,
  207. body: page.revision.body,
  208. comment_count: page.commentCount,
  209. bookmark_count: page.bookmarkCount || 0,
  210. like_count: page.liker.length || 0,
  211. updated_at: page.updatedAt,
  212. tag_names: page.tagNames,
  213. };
  214. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  215. body.push(command);
  216. body.push({
  217. doc: document,
  218. doc_as_upsert: true,
  219. });
  220. };
  221. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  222. if (!Array.isArray(body)) {
  223. throw new Error('Body must be an array.');
  224. }
  225. const command = {
  226. index: {
  227. _index: this.indexName,
  228. _type: 'pages',
  229. _id: page._id.toString(),
  230. },
  231. };
  232. const bookmarkCount = page.bookmarkCount || 0;
  233. let document = {
  234. path: page.path,
  235. body: page.revision.body,
  236. username: page.creator.username,
  237. comment_count: page.commentCount,
  238. bookmark_count: bookmarkCount,
  239. like_count: page.liker.length || 0,
  240. created_at: page.createdAt,
  241. updated_at: page.updatedAt,
  242. tag_names: page.tagNames,
  243. };
  244. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  245. body.push(command);
  246. body.push(document);
  247. };
  248. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  249. if (!Array.isArray(body)) {
  250. throw new Error('Body must be an array.');
  251. }
  252. const command = {
  253. delete: {
  254. _index: this.aliasName,
  255. _type: 'pages',
  256. _id: page._id.toString(),
  257. },
  258. };
  259. body.push(command);
  260. };
  261. SearchClient.prototype.addPages = async function(pages) {
  262. const Bookmark = this.crowi.model('Bookmark');
  263. const PageTagRelation = this.crowi.model('PageTagRelation');
  264. const body = [];
  265. /* eslint-disable no-await-in-loop */
  266. for (const page of pages) {
  267. page.bookmarkCount = await Bookmark.countByPageId(page._id);
  268. const tagRelations = await PageTagRelation.find({ relatedPage: page._id }).populate('relatedTag');
  269. page.tagNames = tagRelations.map((relation) => { return relation.relatedTag.name });
  270. this.prepareBodyForCreate(body, page);
  271. }
  272. /* eslint-enable no-await-in-loop */
  273. logger.debug('addPages(): Sending Request to ES', body);
  274. return this.client.bulk({
  275. body,
  276. });
  277. };
  278. SearchClient.prototype.updatePages = async function(pages) {
  279. const self = this;
  280. const PageTagRelation = this.crowi.model('PageTagRelation');
  281. const body = [];
  282. /* eslint-disable no-await-in-loop */
  283. for (const page of pages) {
  284. const tagRelations = await PageTagRelation.find({ relatedPage: page._id }).populate('relatedTag');
  285. page.tagNames = tagRelations.map((relation) => { return relation.relatedTag.name });
  286. self.prepareBodyForUpdate(body, page);
  287. }
  288. logger.debug('updatePages(): Sending Request to ES', body);
  289. return this.client.bulk({
  290. body,
  291. });
  292. };
  293. SearchClient.prototype.deletePages = function(pages) {
  294. const self = this;
  295. const body = [];
  296. pages.map((page) => {
  297. self.prepareBodyForDelete(body, page);
  298. return;
  299. });
  300. logger.debug('deletePages(): Sending Request to ES', body);
  301. return this.client.bulk({
  302. body,
  303. });
  304. };
  305. SearchClient.prototype.addAllPages = async function() {
  306. const self = this;
  307. const Page = this.crowi.model('Page');
  308. const allPageCount = await Page.allPageCount();
  309. const Bookmark = this.crowi.model('Bookmark');
  310. const PageTagRelation = this.crowi.model('PageTagRelation');
  311. const cursor = Page.getStreamOfFindAll();
  312. let body = [];
  313. let sent = 0;
  314. let skipped = 0;
  315. let total = 0;
  316. return new Promise((resolve, reject) => {
  317. const bulkSend = (body) => {
  318. self.client
  319. .bulk({
  320. body,
  321. requestTimeout: Infinity,
  322. })
  323. .then((res) => {
  324. logger.info('addAllPages add anyway (items, errors, took): ', (res.items || []).length, res.errors, res.took, 'ms');
  325. })
  326. .catch((err) => {
  327. logger.error('addAllPages error on add anyway: ', err);
  328. });
  329. };
  330. cursor
  331. .eachAsync(async(doc) => {
  332. if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
  333. // debug('Skipped', doc.path);
  334. skipped++;
  335. return;
  336. }
  337. total++;
  338. const bookmarkCount = await Bookmark.countByPageId(doc._id);
  339. const tagRelations = await PageTagRelation.find({ relatedPage: doc._id }).populate('relatedTag');
  340. const page = { ...doc, bookmarkCount, tagNames: tagRelations.map((relation) => { return relation.relatedTag.name }) };
  341. self.prepareBodyForCreate(body, page);
  342. // send each `BULK_REINDEX_SIZE` docs. (body has 2 elements for each data)
  343. if (body.length >= BULK_REINDEX_SIZE * 2) {
  344. sent++;
  345. logger.debug('Sending request (seq, total, skipped)', sent, total, skipped);
  346. bulkSend(body);
  347. this.searchEvent.emit('addPageProgress', allPageCount, total, skipped);
  348. body = [];
  349. }
  350. })
  351. .then(() => {
  352. // send all remaining data on body[]
  353. logger.debug('Sending last body of bulk operation:', body.length);
  354. bulkSend(body);
  355. this.searchEvent.emit('finishAddPage', allPageCount, total, skipped);
  356. resolve();
  357. })
  358. .catch((e) => {
  359. logger.error('Error wile iterating cursor.eachAsync()', e);
  360. reject(e);
  361. });
  362. });
  363. };
  364. /**
  365. * search returning type:
  366. * {
  367. * meta: { total: Integer, results: Integer},
  368. * data: [ pages ...],
  369. * }
  370. */
  371. SearchClient.prototype.search = async function(query) {
  372. // for debug
  373. if (process.env.NODE_ENV === 'development') {
  374. const result = await this.client.indices.validateQuery({
  375. explain: true,
  376. body: {
  377. query: query.body.query,
  378. },
  379. });
  380. logger.debug('ES returns explanations: ', result.explanations);
  381. }
  382. const result = await this.client.search(query);
  383. // for debug
  384. logger.debug('ES result: ', result);
  385. return {
  386. meta: {
  387. took: result.took,
  388. total: result.hits.total,
  389. results: result.hits.hits.length,
  390. },
  391. data: result.hits.hits.map((elm) => {
  392. return { _id: elm._id, _score: elm._score, _source: elm._source };
  393. }),
  394. };
  395. };
  396. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option) {
  397. // getting path by default is almost for debug
  398. let fields = ['path', 'bookmark_count', 'tag_names'];
  399. if (option) {
  400. fields = option.fields || fields;
  401. }
  402. // default is only id field, sorted by updated_at
  403. const query = {
  404. index: this.aliasName,
  405. type: 'pages',
  406. body: {
  407. sort: [{ updated_at: { order: 'desc' } }],
  408. query: {}, // query
  409. _source: fields,
  410. },
  411. };
  412. this.appendResultSize(query);
  413. return query;
  414. };
  415. SearchClient.prototype.createSearchQuerySortedByScore = function(option) {
  416. let fields = ['path', 'bookmark_count', 'tag_names'];
  417. if (option) {
  418. fields = option.fields || fields;
  419. }
  420. // sort by score
  421. const query = {
  422. index: this.aliasName,
  423. type: 'pages',
  424. body: {
  425. sort: [{ _score: { order: 'desc' } }],
  426. query: {}, // query
  427. _source: fields,
  428. },
  429. };
  430. this.appendResultSize(query);
  431. return query;
  432. };
  433. SearchClient.prototype.appendResultSize = function(query, from, size) {
  434. query.from = from || this.DEFAULT_OFFSET;
  435. query.size = size || this.DEFAULT_LIMIT;
  436. };
  437. SearchClient.prototype.initializeBoolQuery = function(query) {
  438. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  439. if (!query.body.query.bool) {
  440. query.body.query.bool = {};
  441. }
  442. const isInitialized = (query) => { return !!query && Array.isArray(query) };
  443. if (!isInitialized(query.body.query.bool.filter)) {
  444. query.body.query.bool.filter = [];
  445. }
  446. if (!isInitialized(query.body.query.bool.must)) {
  447. query.body.query.bool.must = [];
  448. }
  449. if (!isInitialized(query.body.query.bool.must_not)) {
  450. query.body.query.bool.must_not = [];
  451. }
  452. return query;
  453. };
  454. SearchClient.prototype.appendCriteriaForQueryString = function(query, queryString) {
  455. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  456. // parse
  457. const parsedKeywords = this.parseQueryString(queryString);
  458. if (parsedKeywords.match.length > 0) {
  459. const q = {
  460. multi_match: {
  461. query: parsedKeywords.match.join(' '),
  462. type: 'most_fields',
  463. fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
  464. },
  465. };
  466. query.body.query.bool.must.push(q);
  467. }
  468. if (parsedKeywords.not_match.length > 0) {
  469. const q = {
  470. multi_match: {
  471. query: parsedKeywords.not_match.join(' '),
  472. fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
  473. operator: 'or',
  474. },
  475. };
  476. query.body.query.bool.must_not.push(q);
  477. }
  478. if (parsedKeywords.phrase.length > 0) {
  479. const phraseQueries = [];
  480. parsedKeywords.phrase.forEach((phrase) => {
  481. phraseQueries.push({
  482. multi_match: {
  483. query: phrase, // each phrase is quoteted words
  484. type: 'phrase',
  485. fields: [
  486. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  487. 'path.raw^2',
  488. 'body',
  489. ],
  490. },
  491. });
  492. });
  493. query.body.query.bool.must.push(phraseQueries);
  494. }
  495. if (parsedKeywords.not_phrase.length > 0) {
  496. const notPhraseQueries = [];
  497. parsedKeywords.not_phrase.forEach((phrase) => {
  498. notPhraseQueries.push({
  499. multi_match: {
  500. query: phrase, // each phrase is quoteted words
  501. type: 'phrase',
  502. fields: [
  503. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  504. 'path.raw^2',
  505. 'body',
  506. ],
  507. },
  508. });
  509. });
  510. query.body.query.bool.must_not.push(notPhraseQueries);
  511. }
  512. if (parsedKeywords.prefix.length > 0) {
  513. const queries = parsedKeywords.prefix.map((path) => {
  514. return { prefix: { 'path.raw': path } };
  515. });
  516. query.body.query.bool.filter.push({ bool: { should: queries } });
  517. }
  518. if (parsedKeywords.not_prefix.length > 0) {
  519. const queries = parsedKeywords.not_prefix.map((path) => {
  520. return { prefix: { 'path.raw': path } };
  521. });
  522. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  523. }
  524. if (parsedKeywords.tag.length > 0) {
  525. const queries = parsedKeywords.tag.map((tag) => {
  526. return { term: { tag_names: tag } };
  527. });
  528. query.body.query.bool.filter.push({ bool: { must: queries } });
  529. }
  530. if (parsedKeywords.not_tag.length > 0) {
  531. const queries = parsedKeywords.not_tag.map((tag) => {
  532. return { term: { tag_names: tag } };
  533. });
  534. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  535. }
  536. };
  537. SearchClient.prototype.filterPagesByViewer = async function(query, user, userGroups) {
  538. const showPagesRestrictedByOwner = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByOwner');
  539. const showPagesRestrictedByGroup = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByGroup');
  540. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  541. const Page = this.crowi.model('Page');
  542. const {
  543. GRANT_PUBLIC, GRANT_RESTRICTED, GRANT_SPECIFIED, GRANT_OWNER, GRANT_USER_GROUP,
  544. } = Page;
  545. const grantConditions = [
  546. { term: { grant: GRANT_PUBLIC } },
  547. ];
  548. // ensure to hit to GRANT_RESTRICTED pages that the user specified at own
  549. if (user != null) {
  550. grantConditions.push(
  551. {
  552. bool: {
  553. must: [
  554. { term: { grant: GRANT_RESTRICTED } },
  555. { term: { granted_users: user._id.toString() } },
  556. ],
  557. },
  558. },
  559. );
  560. }
  561. if (showPagesRestrictedByOwner) {
  562. grantConditions.push(
  563. { term: { grant: GRANT_SPECIFIED } },
  564. { term: { grant: GRANT_OWNER } },
  565. );
  566. }
  567. else if (user != null) {
  568. grantConditions.push(
  569. {
  570. bool: {
  571. must: [
  572. { term: { grant: GRANT_SPECIFIED } },
  573. { term: { granted_users: user._id.toString() } },
  574. ],
  575. },
  576. },
  577. {
  578. bool: {
  579. must: [
  580. { term: { grant: GRANT_OWNER } },
  581. { term: { granted_users: user._id.toString() } },
  582. ],
  583. },
  584. },
  585. );
  586. }
  587. if (showPagesRestrictedByGroup) {
  588. grantConditions.push(
  589. { term: { grant: GRANT_USER_GROUP } },
  590. );
  591. }
  592. else if (userGroups != null && userGroups.length > 0) {
  593. const userGroupIds = userGroups.map((group) => { return group._id.toString() });
  594. grantConditions.push(
  595. {
  596. bool: {
  597. must: [
  598. { term: { grant: GRANT_USER_GROUP } },
  599. { terms: { granted_group: userGroupIds } },
  600. ],
  601. },
  602. },
  603. );
  604. }
  605. query.body.query.bool.filter.push({ bool: { should: grantConditions } });
  606. };
  607. SearchClient.prototype.filterPortalPages = function(query) {
  608. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  609. query.body.query.bool.must_not.push(this.queries.USER);
  610. query.body.query.bool.filter.push(this.queries.PORTAL);
  611. };
  612. SearchClient.prototype.filterPublicPages = function(query) {
  613. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  614. query.body.query.bool.must_not.push(this.queries.USER);
  615. query.body.query.bool.filter.push(this.queries.PUBLIC);
  616. };
  617. SearchClient.prototype.filterUserPages = function(query) {
  618. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  619. query.body.query.bool.filter.push(this.queries.USER);
  620. };
  621. SearchClient.prototype.filterPagesByType = function(query, type) {
  622. const Page = this.crowi.model('Page');
  623. switch (type) {
  624. case Page.TYPE_PORTAL:
  625. return this.filterPortalPages(query);
  626. case Page.TYPE_PUBLIC:
  627. return this.filterPublicPages(query);
  628. case Page.TYPE_USER:
  629. return this.filterUserPages(query);
  630. default:
  631. return query;
  632. }
  633. };
  634. SearchClient.prototype.appendFunctionScore = function(query, queryString) {
  635. const User = this.crowi.model('User');
  636. const count = User.count({}) || 1;
  637. const minScore = queryString.length * 0.1 - 1; // increase with length
  638. logger.debug('min_score: ', minScore);
  639. query.body.query = {
  640. function_score: {
  641. query: { ...query.body.query },
  642. // // disable min_score -- 2019.02.28 Yuki Takei
  643. // // more precise adjustment is needed...
  644. // min_score: minScore,
  645. field_value_factor: {
  646. field: 'bookmark_count',
  647. modifier: 'log1p',
  648. factor: 10000 / count,
  649. missing: 0,
  650. },
  651. boost_mode: 'sum',
  652. },
  653. };
  654. };
  655. SearchClient.prototype.searchKeyword = async function(queryString, user, userGroups, option) {
  656. const from = option.offset || null;
  657. const size = option.limit || null;
  658. const type = option.type || null;
  659. const query = this.createSearchQuerySortedByScore();
  660. this.appendCriteriaForQueryString(query, queryString);
  661. this.filterPagesByType(query, type);
  662. await this.filterPagesByViewer(query, user, userGroups);
  663. this.appendResultSize(query, from, size);
  664. this.appendFunctionScore(query, queryString);
  665. return this.search(query);
  666. };
  667. SearchClient.prototype.parseQueryString = function(queryString) {
  668. const matchWords = [];
  669. const notMatchWords = [];
  670. const phraseWords = [];
  671. const notPhraseWords = [];
  672. const prefixPaths = [];
  673. const notPrefixPaths = [];
  674. const tags = [];
  675. const notTags = [];
  676. queryString.trim();
  677. queryString = queryString.replace(/\s+/g, ' '); // eslint-disable-line no-param-reassign
  678. // First: Parse phrase keywords
  679. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  680. const phrases = queryString.match(phraseRegExp);
  681. if (phrases !== null) {
  682. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  683. phrases.forEach((phrase) => {
  684. phrase.trim();
  685. if (phrase.match(/^-/)) {
  686. notPhraseWords.push(phrase.replace(/^-/, ''));
  687. }
  688. else {
  689. phraseWords.push(phrase);
  690. }
  691. });
  692. }
  693. // Second: Parse other keywords (include minus keywords)
  694. queryString.split(' ').forEach((word) => {
  695. if (word === '') {
  696. return;
  697. }
  698. // https://regex101.com/r/pN9XfK/1
  699. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  700. // https://regex101.com/r/3qw9FQ/1
  701. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  702. if (matchNegative != null) {
  703. if (matchNegative[1] === 'prefix:') {
  704. notPrefixPaths.push(matchNegative[2]);
  705. }
  706. else if (matchNegative[1] === 'tag:') {
  707. notTags.push(matchNegative[2]);
  708. }
  709. else {
  710. notMatchWords.push(matchNegative[2]);
  711. }
  712. }
  713. else if (matchPositive != null) {
  714. if (matchPositive[1] === 'prefix:') {
  715. prefixPaths.push(matchPositive[2]);
  716. }
  717. else if (matchPositive[1] === 'tag:') {
  718. tags.push(matchPositive[2]);
  719. }
  720. else {
  721. matchWords.push(matchPositive[2]);
  722. }
  723. }
  724. });
  725. return {
  726. match: matchWords,
  727. not_match: notMatchWords,
  728. phrase: phraseWords,
  729. not_phrase: notPhraseWords,
  730. prefix: prefixPaths,
  731. not_prefix: notPrefixPaths,
  732. tag: tags,
  733. not_tag: notTags,
  734. };
  735. };
  736. SearchClient.prototype.syncPageCreated = function(page, user, bookmarkCount = 0) {
  737. debug('SearchClient.syncPageCreated', page.path);
  738. if (!this.shouldIndexed(page)) {
  739. return;
  740. }
  741. page.bookmarkCount = bookmarkCount;
  742. this.addPages([page])
  743. .then((res) => {
  744. debug('ES Response', res);
  745. })
  746. .catch((err) => {
  747. logger.error('ES Error', err);
  748. });
  749. };
  750. SearchClient.prototype.syncPageUpdated = function(page, user, bookmarkCount = 0) {
  751. debug('SearchClient.syncPageUpdated', page.path);
  752. // TODO delete
  753. if (!this.shouldIndexed(page)) {
  754. this.deletePages([page])
  755. .then((res) => {
  756. debug('deletePages: ES Response', res);
  757. })
  758. .catch((err) => {
  759. logger.error('deletePages:ES Error', err);
  760. });
  761. return;
  762. }
  763. page.bookmarkCount = bookmarkCount;
  764. this.updatePages([page])
  765. .then((res) => {
  766. debug('ES Response', res);
  767. })
  768. .catch((err) => {
  769. logger.error('ES Error', err);
  770. });
  771. };
  772. SearchClient.prototype.syncPageDeleted = function(page, user) {
  773. debug('SearchClient.syncPageDeleted', page.path);
  774. this.deletePages([page])
  775. .then((res) => {
  776. debug('deletePages: ES Response', res);
  777. })
  778. .catch((err) => {
  779. logger.error('deletePages:ES Error', err);
  780. });
  781. };
  782. SearchClient.prototype.syncBookmarkChanged = async function(pageId) {
  783. const Page = this.crowi.model('Page');
  784. const Bookmark = this.crowi.model('Bookmark');
  785. const page = await Page.findById(pageId);
  786. const bookmarkCount = await Bookmark.countByPageId(pageId);
  787. page.bookmarkCount = bookmarkCount;
  788. this.updatePages([page])
  789. .then((res) => { return debug('ES Response', res) })
  790. .catch((err) => { return logger.error('ES Error', err) });
  791. };
  792. SearchClient.prototype.syncTagChanged = async function(page) {
  793. this.updatePages([page])
  794. .then((res) => { return debug('ES Response', res) })
  795. .catch((err) => { return logger.error('ES Error', err) });
  796. };
  797. module.exports = SearchClient;