search.js 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. /**
  2. * Search
  3. */
  4. const elasticsearch = require('elasticsearch');
  5. const debug = require('debug')('growi:lib:search');
  6. const logger = require('@alias/logger')('growi:lib:search');
  7. function SearchClient(crowi, esUri) {
  8. this.DEFAULT_OFFSET = 0;
  9. this.DEFAULT_LIMIT = 50;
  10. this.esNodeName = '-';
  11. this.esNodeNames = [];
  12. this.esVersion = 'unknown';
  13. this.esVersions = [];
  14. this.esPlugin = [];
  15. this.esPlugins = [];
  16. this.esUri = esUri;
  17. this.crowi = crowi;
  18. this.searchEvent = crowi.event('search');
  19. this.configManager = this.crowi.configManager;
  20. // In Elasticsearch RegExp, we don't need to used ^ and $.
  21. // Ref: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-regexp-query.html#_standard_operators
  22. this.queries = {
  23. PORTAL: {
  24. regexp: {
  25. 'path.raw': '.*/',
  26. },
  27. },
  28. PUBLIC: {
  29. regexp: {
  30. 'path.raw': '.*[^/]',
  31. },
  32. },
  33. USER: {
  34. prefix: {
  35. 'path.raw': '/user/',
  36. },
  37. },
  38. };
  39. const uri = this.parseUri(this.esUri);
  40. this.host = uri.host;
  41. this.indexName = uri.indexName;
  42. this.aliasName = `${this.indexName}-alias`;
  43. this.client = new elasticsearch.Client({
  44. host: this.host,
  45. requestTimeout: 5000,
  46. // log: 'debug',
  47. });
  48. this.registerUpdateEvent();
  49. this.mappingFile = `${crowi.resourceDir}search/mappings.json`;
  50. }
  51. SearchClient.prototype.getInfo = function() {
  52. return this.client.info({});
  53. };
  54. SearchClient.prototype.checkESVersion = async function() {
  55. try {
  56. const nodes = await this.client.nodes.info();
  57. if (!nodes._nodes || !nodes.nodes) {
  58. throw new Error('no nodes info');
  59. }
  60. for (const [nodeName, nodeInfo] of Object.entries(nodes.nodes)) {
  61. this.esNodeName = nodeName;
  62. this.esNodeNames.push(nodeName);
  63. this.esVersion = nodeInfo.version;
  64. this.esVersions.push(nodeInfo.version);
  65. this.esPlugin = nodeInfo.plugins;
  66. this.esPlugins.push(nodeInfo.plugins);
  67. }
  68. }
  69. catch (error) {
  70. logger.error('es check version error:', error);
  71. }
  72. };
  73. SearchClient.prototype.registerUpdateEvent = function() {
  74. const pageEvent = this.crowi.event('page');
  75. pageEvent.on('create', this.syncPageCreated.bind(this));
  76. pageEvent.on('update', this.syncPageUpdated.bind(this));
  77. pageEvent.on('updateTag', this.syncPageUpdated.bind(this));
  78. pageEvent.on('delete', this.syncPageDeleted.bind(this));
  79. const bookmarkEvent = this.crowi.event('bookmark');
  80. bookmarkEvent.on('create', this.syncBookmarkChanged.bind(this));
  81. bookmarkEvent.on('delete', this.syncBookmarkChanged.bind(this));
  82. const tagEvent = this.crowi.event('tag');
  83. tagEvent.on('update', this.syncTagChanged.bind(this));
  84. };
  85. SearchClient.prototype.shouldIndexed = function(page) {
  86. return (page.redirectTo == null);
  87. };
  88. // BONSAI_URL is following format:
  89. // => https://{ID}:{PASSWORD}@{HOST}
  90. SearchClient.prototype.parseUri = function(uri) {
  91. let indexName = 'crowi';
  92. let host = uri;
  93. const match = uri.match(/^(https?:\/\/[^/]+)\/(.+)$/);
  94. if (match) {
  95. host = match[1];
  96. indexName = match[2];
  97. }
  98. return {
  99. host,
  100. indexName,
  101. };
  102. };
  103. SearchClient.prototype.initIndices = async function() {
  104. await this.checkESVersion();
  105. const { client, indexName, aliasName } = this;
  106. const tmpIndexName = `${indexName}-tmp`;
  107. // remove tmp index
  108. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  109. if (isExistsTmpIndex) {
  110. await client.indices.delete({ index: tmpIndexName });
  111. }
  112. // create index
  113. const isExistsIndex = await client.indices.exists({ index: indexName });
  114. if (!isExistsIndex) {
  115. await this.createIndex(indexName);
  116. }
  117. // create alias
  118. const isExistsAlias = await client.indices.existsAlias({ name: aliasName, index: indexName });
  119. if (!isExistsAlias) {
  120. await client.indices.putAlias({
  121. name: aliasName,
  122. index: indexName,
  123. });
  124. }
  125. };
  126. SearchClient.prototype.createIndex = async function(index) {
  127. const body = require(this.mappingFile);
  128. return this.client.indices.create({ index, body });
  129. };
  130. SearchClient.prototype.buildIndex = async function(uri) {
  131. await this.initIndices();
  132. const { client, indexName } = this;
  133. const aliasName = `${indexName}-alias`;
  134. const tmpIndexName = `${indexName}-tmp`;
  135. // reindex to tmp index
  136. await this.createIndex(tmpIndexName);
  137. await client.reindex({
  138. body: {
  139. source: { index: indexName },
  140. dest: { index: tmpIndexName },
  141. },
  142. });
  143. // update alias
  144. await client.indices.updateAliases({
  145. body: {
  146. actions: [
  147. { add: { alias: aliasName, index: tmpIndexName } },
  148. { remove: { alias: aliasName, index: indexName } },
  149. ],
  150. },
  151. });
  152. // flush index
  153. await client.indices.delete({
  154. index: indexName,
  155. });
  156. await this.createIndex(indexName);
  157. await this.addAllPages();
  158. // update alias
  159. await client.indices.updateAliases({
  160. body: {
  161. actions: [
  162. { add: { alias: aliasName, index: indexName } },
  163. { remove: { alias: aliasName, index: tmpIndexName } },
  164. ],
  165. },
  166. });
  167. // remove tmp index
  168. await client.indices.delete({ index: tmpIndexName });
  169. };
  170. /**
  171. * generate object that is related to page.grant*
  172. */
  173. function generateDocContentsRelatedToRestriction(page) {
  174. let grantedUserIds = null;
  175. if (page.grantedUsers != null && page.grantedUsers.length > 0) {
  176. grantedUserIds = page.grantedUsers.map((user) => {
  177. const userId = (user._id == null) ? user : user._id;
  178. return userId.toString();
  179. });
  180. }
  181. let grantedGroupId = null;
  182. if (page.grantedGroup != null) {
  183. const groupId = (page.grantedGroup._id == null) ? page.grantedGroup : page.grantedGroup._id;
  184. grantedGroupId = groupId.toString();
  185. }
  186. return {
  187. grant: page.grant,
  188. granted_users: grantedUserIds,
  189. granted_group: grantedGroupId,
  190. };
  191. }
  192. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  193. if (!Array.isArray(body)) {
  194. throw new Error('Body must be an array.');
  195. }
  196. const command = {
  197. update: {
  198. _index: this.aliasName,
  199. _type: 'pages',
  200. _id: page._id.toString(),
  201. },
  202. };
  203. let document = {
  204. path: page.path,
  205. body: page.revision.body,
  206. comment_count: page.commentCount,
  207. bookmark_count: page.bookmarkCount || 0,
  208. like_count: page.liker.length || 0,
  209. updated_at: page.updatedAt,
  210. tag_names: page.tagNames,
  211. };
  212. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  213. body.push(command);
  214. body.push({
  215. doc: document,
  216. doc_as_upsert: true,
  217. });
  218. };
  219. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  220. if (!Array.isArray(body)) {
  221. throw new Error('Body must be an array.');
  222. }
  223. const command = {
  224. index: {
  225. _index: this.aliasName,
  226. _type: 'pages',
  227. _id: page._id.toString(),
  228. },
  229. };
  230. const bookmarkCount = page.bookmarkCount || 0;
  231. let document = {
  232. path: page.path,
  233. body: page.revision.body,
  234. username: page.creator.username,
  235. comment_count: page.commentCount,
  236. bookmark_count: bookmarkCount,
  237. like_count: page.liker.length || 0,
  238. created_at: page.createdAt,
  239. updated_at: page.updatedAt,
  240. tag_names: page.tagNames,
  241. };
  242. document = Object.assign(document, generateDocContentsRelatedToRestriction(page));
  243. body.push(command);
  244. body.push(document);
  245. };
  246. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  247. if (!Array.isArray(body)) {
  248. throw new Error('Body must be an array.');
  249. }
  250. const command = {
  251. delete: {
  252. _index: this.aliasName,
  253. _type: 'pages',
  254. _id: page._id.toString(),
  255. },
  256. };
  257. body.push(command);
  258. };
  259. SearchClient.prototype.addPages = async function(pages) {
  260. const Bookmark = this.crowi.model('Bookmark');
  261. const PageTagRelation = this.crowi.model('PageTagRelation');
  262. const body = [];
  263. /* eslint-disable no-await-in-loop */
  264. for (const page of pages) {
  265. page.bookmarkCount = await Bookmark.countByPageId(page._id);
  266. const tagRelations = await PageTagRelation.find({ relatedPage: page._id }).populate('relatedTag');
  267. page.tagNames = tagRelations.map((relation) => { return relation.relatedTag.name });
  268. this.prepareBodyForCreate(body, page);
  269. }
  270. /* eslint-enable no-await-in-loop */
  271. logger.debug('addPages(): Sending Request to ES', body);
  272. return this.client.bulk({
  273. body,
  274. });
  275. };
  276. SearchClient.prototype.updatePages = async function(pages) {
  277. const self = this;
  278. const PageTagRelation = this.crowi.model('PageTagRelation');
  279. const body = [];
  280. /* eslint-disable no-await-in-loop */
  281. for (const page of pages) {
  282. const tagRelations = await PageTagRelation.find({ relatedPage: page._id }).populate('relatedTag');
  283. page.tagNames = tagRelations.map((relation) => { return relation.relatedTag.name });
  284. self.prepareBodyForUpdate(body, page);
  285. }
  286. logger.debug('updatePages(): Sending Request to ES', body);
  287. return this.client.bulk({
  288. body,
  289. });
  290. };
  291. SearchClient.prototype.deletePages = function(pages) {
  292. const self = this;
  293. const body = [];
  294. pages.map((page) => {
  295. self.prepareBodyForDelete(body, page);
  296. return;
  297. });
  298. logger.debug('deletePages(): Sending Request to ES', body);
  299. return this.client.bulk({
  300. body,
  301. });
  302. };
  303. SearchClient.prototype.addAllPages = async function() {
  304. const self = this;
  305. const Page = this.crowi.model('Page');
  306. const allPageCount = await Page.allPageCount();
  307. const Bookmark = this.crowi.model('Bookmark');
  308. const PageTagRelation = this.crowi.model('PageTagRelation');
  309. const cursor = Page.getStreamOfFindAll();
  310. let body = [];
  311. let sent = 0;
  312. let skipped = 0;
  313. let total = 0;
  314. return new Promise((resolve, reject) => {
  315. const bulkSend = (body) => {
  316. self.client
  317. .bulk({
  318. body,
  319. requestTimeout: Infinity,
  320. })
  321. .then((res) => {
  322. logger.info('addAllPages add anyway (items, errors, took): ', (res.items || []).length, res.errors, res.took, 'ms');
  323. })
  324. .catch((err) => {
  325. logger.error('addAllPages error on add anyway: ', err);
  326. });
  327. };
  328. cursor
  329. .eachAsync(async(doc) => {
  330. if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
  331. // debug('Skipped', doc.path);
  332. skipped++;
  333. return;
  334. }
  335. total++;
  336. const bookmarkCount = await Bookmark.countByPageId(doc._id);
  337. const tagRelations = await PageTagRelation.find({ relatedPage: doc._id }).populate('relatedTag');
  338. const page = { ...doc, bookmarkCount, tagNames: tagRelations.map((relation) => { return relation.relatedTag.name }) };
  339. self.prepareBodyForCreate(body, page);
  340. if (body.length >= 4000) {
  341. // send each 2000 docs. (body has 2 elements for each data)
  342. sent++;
  343. logger.debug('Sending request (seq, total, skipped)', sent, total, skipped);
  344. bulkSend(body);
  345. this.searchEvent.emit('addPageProgress', allPageCount, total, skipped);
  346. body = [];
  347. }
  348. })
  349. .then(() => {
  350. // send all remaining data on body[]
  351. logger.debug('Sending last body of bulk operation:', body.length);
  352. bulkSend(body);
  353. this.searchEvent.emit('finishAddPage', allPageCount, total, skipped);
  354. resolve();
  355. })
  356. .catch((e) => {
  357. logger.error('Error wile iterating cursor.eachAsync()', e);
  358. reject(e);
  359. });
  360. });
  361. };
  362. /**
  363. * search returning type:
  364. * {
  365. * meta: { total: Integer, results: Integer},
  366. * data: [ pages ...],
  367. * }
  368. */
  369. SearchClient.prototype.search = async function(query) {
  370. // for debug
  371. if (process.env.NODE_ENV === 'development') {
  372. const result = await this.client.indices.validateQuery({
  373. explain: true,
  374. body: {
  375. query: query.body.query,
  376. },
  377. });
  378. logger.debug('ES returns explanations: ', result.explanations);
  379. }
  380. const result = await this.client.search(query);
  381. // for debug
  382. logger.debug('ES result: ', result);
  383. return {
  384. meta: {
  385. took: result.took,
  386. total: result.hits.total,
  387. results: result.hits.hits.length,
  388. },
  389. data: result.hits.hits.map((elm) => {
  390. return { _id: elm._id, _score: elm._score, _source: elm._source };
  391. }),
  392. };
  393. };
  394. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option) {
  395. // getting path by default is almost for debug
  396. let fields = ['path', 'bookmark_count', 'tag_names'];
  397. if (option) {
  398. fields = option.fields || fields;
  399. }
  400. // default is only id field, sorted by updated_at
  401. const query = {
  402. index: this.aliasName,
  403. type: 'pages',
  404. body: {
  405. sort: [{ updated_at: { order: 'desc' } }],
  406. query: {}, // query
  407. _source: fields,
  408. },
  409. };
  410. this.appendResultSize(query);
  411. return query;
  412. };
  413. SearchClient.prototype.createSearchQuerySortedByScore = function(option) {
  414. let fields = ['path', 'bookmark_count', 'tag_names'];
  415. if (option) {
  416. fields = option.fields || fields;
  417. }
  418. // sort by score
  419. const query = {
  420. index: this.aliasName,
  421. type: 'pages',
  422. body: {
  423. sort: [{ _score: { order: 'desc' } }],
  424. query: {}, // query
  425. _source: fields,
  426. },
  427. };
  428. this.appendResultSize(query);
  429. return query;
  430. };
  431. SearchClient.prototype.appendResultSize = function(query, from, size) {
  432. query.from = from || this.DEFAULT_OFFSET;
  433. query.size = size || this.DEFAULT_LIMIT;
  434. };
  435. SearchClient.prototype.initializeBoolQuery = function(query) {
  436. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  437. if (!query.body.query.bool) {
  438. query.body.query.bool = {};
  439. }
  440. const isInitialized = (query) => { return !!query && Array.isArray(query) };
  441. if (!isInitialized(query.body.query.bool.filter)) {
  442. query.body.query.bool.filter = [];
  443. }
  444. if (!isInitialized(query.body.query.bool.must)) {
  445. query.body.query.bool.must = [];
  446. }
  447. if (!isInitialized(query.body.query.bool.must_not)) {
  448. query.body.query.bool.must_not = [];
  449. }
  450. return query;
  451. };
  452. SearchClient.prototype.appendCriteriaForQueryString = function(query, queryString) {
  453. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  454. // parse
  455. const parsedKeywords = this.parseQueryString(queryString);
  456. if (parsedKeywords.match.length > 0) {
  457. const q = {
  458. multi_match: {
  459. query: parsedKeywords.match.join(' '),
  460. type: 'most_fields',
  461. fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
  462. },
  463. };
  464. query.body.query.bool.must.push(q);
  465. }
  466. if (parsedKeywords.not_match.length > 0) {
  467. const q = {
  468. multi_match: {
  469. query: parsedKeywords.not_match.join(' '),
  470. fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
  471. operator: 'or',
  472. },
  473. };
  474. query.body.query.bool.must_not.push(q);
  475. }
  476. if (parsedKeywords.phrase.length > 0) {
  477. const phraseQueries = [];
  478. parsedKeywords.phrase.forEach((phrase) => {
  479. phraseQueries.push({
  480. multi_match: {
  481. query: phrase, // each phrase is quoteted words
  482. type: 'phrase',
  483. fields: [
  484. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  485. 'path.raw^2',
  486. 'body',
  487. ],
  488. },
  489. });
  490. });
  491. query.body.query.bool.must.push(phraseQueries);
  492. }
  493. if (parsedKeywords.not_phrase.length > 0) {
  494. const notPhraseQueries = [];
  495. parsedKeywords.not_phrase.forEach((phrase) => {
  496. notPhraseQueries.push({
  497. multi_match: {
  498. query: phrase, // each phrase is quoteted words
  499. type: 'phrase',
  500. fields: [
  501. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  502. 'path.raw^2',
  503. 'body',
  504. ],
  505. },
  506. });
  507. });
  508. query.body.query.bool.must_not.push(notPhraseQueries);
  509. }
  510. if (parsedKeywords.prefix.length > 0) {
  511. const queries = parsedKeywords.prefix.map((path) => {
  512. return { prefix: { 'path.raw': path } };
  513. });
  514. query.body.query.bool.filter.push({ bool: { should: queries } });
  515. }
  516. if (parsedKeywords.not_prefix.length > 0) {
  517. const queries = parsedKeywords.not_prefix.map((path) => {
  518. return { prefix: { 'path.raw': path } };
  519. });
  520. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  521. }
  522. if (parsedKeywords.tag.length > 0) {
  523. const queries = parsedKeywords.tag.map((tag) => {
  524. return { term: { tag_names: tag } };
  525. });
  526. query.body.query.bool.filter.push({ bool: { must: queries } });
  527. }
  528. if (parsedKeywords.not_tag.length > 0) {
  529. const queries = parsedKeywords.not_tag.map((tag) => {
  530. return { term: { tag_names: tag } };
  531. });
  532. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  533. }
  534. };
  535. SearchClient.prototype.filterPagesByViewer = async function(query, user, userGroups) {
  536. const showPagesRestrictedByOwner = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByOwner');
  537. const showPagesRestrictedByGroup = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByGroup');
  538. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  539. const Page = this.crowi.model('Page');
  540. const {
  541. GRANT_PUBLIC, GRANT_RESTRICTED, GRANT_SPECIFIED, GRANT_OWNER, GRANT_USER_GROUP,
  542. } = Page;
  543. const grantConditions = [
  544. { term: { grant: GRANT_PUBLIC } },
  545. ];
  546. // ensure to hit to GRANT_RESTRICTED pages that the user specified at own
  547. if (user != null) {
  548. grantConditions.push(
  549. {
  550. bool: {
  551. must: [
  552. { term: { grant: GRANT_RESTRICTED } },
  553. { term: { granted_users: user._id.toString() } },
  554. ],
  555. },
  556. },
  557. );
  558. }
  559. if (showPagesRestrictedByOwner) {
  560. grantConditions.push(
  561. { term: { grant: GRANT_SPECIFIED } },
  562. { term: { grant: GRANT_OWNER } },
  563. );
  564. }
  565. else if (user != null) {
  566. grantConditions.push(
  567. {
  568. bool: {
  569. must: [
  570. { term: { grant: GRANT_SPECIFIED } },
  571. { term: { granted_users: user._id.toString() } },
  572. ],
  573. },
  574. },
  575. {
  576. bool: {
  577. must: [
  578. { term: { grant: GRANT_OWNER } },
  579. { term: { granted_users: user._id.toString() } },
  580. ],
  581. },
  582. },
  583. );
  584. }
  585. if (showPagesRestrictedByGroup) {
  586. grantConditions.push(
  587. { term: { grant: GRANT_USER_GROUP } },
  588. );
  589. }
  590. else if (userGroups != null && userGroups.length > 0) {
  591. const userGroupIds = userGroups.map((group) => { return group._id.toString() });
  592. grantConditions.push(
  593. {
  594. bool: {
  595. must: [
  596. { term: { grant: GRANT_USER_GROUP } },
  597. { terms: { granted_group: userGroupIds } },
  598. ],
  599. },
  600. },
  601. );
  602. }
  603. query.body.query.bool.filter.push({ bool: { should: grantConditions } });
  604. };
  605. SearchClient.prototype.filterPortalPages = function(query) {
  606. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  607. query.body.query.bool.must_not.push(this.queries.USER);
  608. query.body.query.bool.filter.push(this.queries.PORTAL);
  609. };
  610. SearchClient.prototype.filterPublicPages = function(query) {
  611. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  612. query.body.query.bool.must_not.push(this.queries.USER);
  613. query.body.query.bool.filter.push(this.queries.PUBLIC);
  614. };
  615. SearchClient.prototype.filterUserPages = function(query) {
  616. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  617. query.body.query.bool.filter.push(this.queries.USER);
  618. };
  619. SearchClient.prototype.filterPagesByType = function(query, type) {
  620. const Page = this.crowi.model('Page');
  621. switch (type) {
  622. case Page.TYPE_PORTAL:
  623. return this.filterPortalPages(query);
  624. case Page.TYPE_PUBLIC:
  625. return this.filterPublicPages(query);
  626. case Page.TYPE_USER:
  627. return this.filterUserPages(query);
  628. default:
  629. return query;
  630. }
  631. };
  632. SearchClient.prototype.appendFunctionScore = function(query, queryString) {
  633. const User = this.crowi.model('User');
  634. const count = User.count({}) || 1;
  635. const minScore = queryString.length * 0.1 - 1; // increase with length
  636. logger.debug('min_score: ', minScore);
  637. query.body.query = {
  638. function_score: {
  639. query: { ...query.body.query },
  640. // // disable min_score -- 2019.02.28 Yuki Takei
  641. // // more precise adjustment is needed...
  642. // min_score: minScore,
  643. field_value_factor: {
  644. field: 'bookmark_count',
  645. modifier: 'log1p',
  646. factor: 10000 / count,
  647. missing: 0,
  648. },
  649. boost_mode: 'sum',
  650. },
  651. };
  652. };
  653. SearchClient.prototype.searchKeyword = async function(queryString, user, userGroups, option) {
  654. const from = option.offset || null;
  655. const size = option.limit || null;
  656. const type = option.type || null;
  657. const query = this.createSearchQuerySortedByScore();
  658. this.appendCriteriaForQueryString(query, queryString);
  659. this.filterPagesByType(query, type);
  660. await this.filterPagesByViewer(query, user, userGroups);
  661. this.appendResultSize(query, from, size);
  662. this.appendFunctionScore(query, queryString);
  663. return this.search(query);
  664. };
  665. SearchClient.prototype.parseQueryString = function(queryString) {
  666. const matchWords = [];
  667. const notMatchWords = [];
  668. const phraseWords = [];
  669. const notPhraseWords = [];
  670. const prefixPaths = [];
  671. const notPrefixPaths = [];
  672. const tags = [];
  673. const notTags = [];
  674. queryString.trim();
  675. queryString = queryString.replace(/\s+/g, ' '); // eslint-disable-line no-param-reassign
  676. // First: Parse phrase keywords
  677. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  678. const phrases = queryString.match(phraseRegExp);
  679. if (phrases !== null) {
  680. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  681. phrases.forEach((phrase) => {
  682. phrase.trim();
  683. if (phrase.match(/^-/)) {
  684. notPhraseWords.push(phrase.replace(/^-/, ''));
  685. }
  686. else {
  687. phraseWords.push(phrase);
  688. }
  689. });
  690. }
  691. // Second: Parse other keywords (include minus keywords)
  692. queryString.split(' ').forEach((word) => {
  693. if (word === '') {
  694. return;
  695. }
  696. // https://regex101.com/r/pN9XfK/1
  697. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  698. // https://regex101.com/r/3qw9FQ/1
  699. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  700. if (matchNegative != null) {
  701. if (matchNegative[1] === 'prefix:') {
  702. notPrefixPaths.push(matchNegative[2]);
  703. }
  704. else if (matchNegative[1] === 'tag:') {
  705. notTags.push(matchNegative[2]);
  706. }
  707. else {
  708. notMatchWords.push(matchNegative[2]);
  709. }
  710. }
  711. else if (matchPositive != null) {
  712. if (matchPositive[1] === 'prefix:') {
  713. prefixPaths.push(matchPositive[2]);
  714. }
  715. else if (matchPositive[1] === 'tag:') {
  716. tags.push(matchPositive[2]);
  717. }
  718. else {
  719. matchWords.push(matchPositive[2]);
  720. }
  721. }
  722. });
  723. return {
  724. match: matchWords,
  725. not_match: notMatchWords,
  726. phrase: phraseWords,
  727. not_phrase: notPhraseWords,
  728. prefix: prefixPaths,
  729. not_prefix: notPrefixPaths,
  730. tag: tags,
  731. not_tag: notTags,
  732. };
  733. };
  734. SearchClient.prototype.syncPageCreated = function(page, user, bookmarkCount = 0) {
  735. debug('SearchClient.syncPageCreated', page.path);
  736. if (!this.shouldIndexed(page)) {
  737. return;
  738. }
  739. page.bookmarkCount = bookmarkCount;
  740. this.addPages([page])
  741. .then((res) => {
  742. debug('ES Response', res);
  743. })
  744. .catch((err) => {
  745. logger.error('ES Error', err);
  746. });
  747. };
  748. SearchClient.prototype.syncPageUpdated = function(page, user, bookmarkCount = 0) {
  749. debug('SearchClient.syncPageUpdated', page.path);
  750. // TODO delete
  751. if (!this.shouldIndexed(page)) {
  752. this.deletePages([page])
  753. .then((res) => {
  754. debug('deletePages: ES Response', res);
  755. })
  756. .catch((err) => {
  757. logger.error('deletePages:ES Error', err);
  758. });
  759. return;
  760. }
  761. page.bookmarkCount = bookmarkCount;
  762. this.updatePages([page])
  763. .then((res) => {
  764. debug('ES Response', res);
  765. })
  766. .catch((err) => {
  767. logger.error('ES Error', err);
  768. });
  769. };
  770. SearchClient.prototype.syncPageDeleted = function(page, user) {
  771. debug('SearchClient.syncPageDeleted', page.path);
  772. this.deletePages([page])
  773. .then((res) => {
  774. debug('deletePages: ES Response', res);
  775. })
  776. .catch((err) => {
  777. logger.error('deletePages:ES Error', err);
  778. });
  779. };
  780. SearchClient.prototype.syncBookmarkChanged = async function(pageId) {
  781. const Page = this.crowi.model('Page');
  782. const Bookmark = this.crowi.model('Bookmark');
  783. const page = await Page.findById(pageId);
  784. const bookmarkCount = await Bookmark.countByPageId(pageId);
  785. page.bookmarkCount = bookmarkCount;
  786. this.updatePages([page])
  787. .then((res) => { return debug('ES Response', res) })
  788. .catch((err) => { return logger.error('ES Error', err) });
  789. };
  790. SearchClient.prototype.syncTagChanged = async function(page) {
  791. this.updatePages([page])
  792. .then((res) => { return debug('ES Response', res) })
  793. .catch((err) => { return logger.error('ES Error', err) });
  794. };
  795. module.exports = SearchClient;