elasticsearch.js 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032
  1. const logger = require('@alias/logger')('growi:service:search-delegator:elasticsearch');
  2. const elasticsearch = require('elasticsearch');
  3. const mongoose = require('mongoose');
  4. const { URL } = require('url');
  5. const {
  6. Writable, Transform,
  7. } = require('stream');
  8. const streamToPromise = require('stream-to-promise');
  9. const { createBatchStream } = require('@server/util/batch-stream');
  10. const DEFAULT_OFFSET = 0;
  11. const DEFAULT_LIMIT = 50;
  12. const BULK_REINDEX_SIZE = 100;
  13. class ElasticsearchDelegator {
  14. constructor(configManager, socketIoService) {
  15. this.configManager = configManager;
  16. this.socketIoService = socketIoService;
  17. this.client = null;
  18. // In Elasticsearch RegExp, we don't need to used ^ and $.
  19. // Ref: https://www.elastic.co/guide/en/elasticsearch/reference/5.6/query-dsl-regexp-query.html#_standard_operators
  20. this.queries = {
  21. PORTAL: {
  22. regexp: {
  23. 'path.raw': '.*/',
  24. },
  25. },
  26. PUBLIC: {
  27. regexp: {
  28. 'path.raw': '.*[^/]',
  29. },
  30. },
  31. USER: {
  32. prefix: {
  33. 'path.raw': '/user/',
  34. },
  35. },
  36. };
  37. this.initClient();
  38. }
  39. get aliasName() {
  40. return `${this.indexName}-alias`;
  41. }
  42. shouldIndexed(page) {
  43. return page.revision != null && page.redirectTo == null;
  44. }
  45. initClient() {
  46. const { host, httpAuth, indexName } = this.getConnectionInfo();
  47. this.client = new elasticsearch.Client({
  48. host,
  49. httpAuth,
  50. requestTimeout: this.configManager.getConfig('crowi', 'app:elasticsearchRequestTimeout'),
  51. // log: 'debug',
  52. });
  53. this.indexName = indexName;
  54. }
  55. /**
  56. * return information object to connect to ES
  57. * @return {object} { host, httpAuth, indexName}
  58. */
  59. getConnectionInfo() {
  60. let indexName = 'crowi';
  61. let host = this.esUri;
  62. let httpAuth = '';
  63. const elasticsearchUri = this.configManager.getConfig('crowi', 'app:elasticsearchUri');
  64. const url = new URL(elasticsearchUri);
  65. if (url.pathname !== '/') {
  66. host = `${url.protocol}//${url.host}`;
  67. indexName = url.pathname.substring(1); // omit heading slash
  68. if (url.username != null && url.password != null) {
  69. httpAuth = `${url.username}:${url.password}`;
  70. }
  71. }
  72. return {
  73. host,
  74. httpAuth,
  75. indexName,
  76. };
  77. }
  78. async init() {
  79. return this.normalizeIndices();
  80. }
  81. /**
  82. * return Nodes Info
  83. * `cluster:monitor/nodes/info` privilege is required on ES
  84. * @return {object} `{ esVersion, esNodeInfos }`
  85. *
  86. * @see https://www.elastic.co/guide/en/elasticsearch/reference/6.6/cluster-nodes-info.html
  87. */
  88. async getInfo() {
  89. const info = await this.client.nodes.info();
  90. if (!info._nodes || !info.nodes) {
  91. throw new Error('There is no nodes');
  92. }
  93. let esVersion = 'unknown';
  94. const esNodeInfos = {};
  95. for (const [nodeName, nodeInfo] of Object.entries(info.nodes)) {
  96. esVersion = nodeInfo.version;
  97. const filteredInfo = {
  98. name: nodeInfo.name,
  99. version: nodeInfo.version,
  100. plugins: nodeInfo.plugins.map((pluginInfo) => {
  101. return {
  102. name: pluginInfo.name,
  103. version: pluginInfo.version,
  104. };
  105. }),
  106. };
  107. esNodeInfos[nodeName] = filteredInfo;
  108. }
  109. return { esVersion, esNodeInfos };
  110. }
  111. /**
  112. * return Cluster Health
  113. * `cluster:monitor/health` privilege is required on ES
  114. * @return {object} `{ esClusterHealth }`
  115. *
  116. * @see https://www.elastic.co/guide/en/elasticsearch/reference/6.6/cluster-health.html
  117. */
  118. async getInfoForHealth() {
  119. const esClusterHealth = await this.client.cluster.health();
  120. return { esClusterHealth };
  121. }
  122. /**
  123. * Return information for Admin Full Text Search Management page
  124. */
  125. async getInfoForAdmin() {
  126. const { client, indexName, aliasName } = this;
  127. const tmpIndexName = `${indexName}-tmp`;
  128. // check existence
  129. const isExistsMainIndex = await client.indices.exists({ index: indexName });
  130. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  131. // create indices name list
  132. const existingIndices = [];
  133. if (isExistsMainIndex) { existingIndices.push(indexName) }
  134. if (isExistsTmpIndex) { existingIndices.push(tmpIndexName) }
  135. // results when there is no indices
  136. if (existingIndices.length === 0) {
  137. return {
  138. indices: [],
  139. aliases: [],
  140. isNormalized: false,
  141. };
  142. }
  143. const { indices } = await client.indices.stats({ index: existingIndices, ignore_unavailable: true, metric: ['docs', 'store', 'indexing'] });
  144. const aliases = await client.indices.getAlias({ index: existingIndices });
  145. const isMainIndexHasAlias = isExistsMainIndex && aliases[indexName].aliases != null && aliases[indexName].aliases[aliasName] != null;
  146. const isTmpIndexHasAlias = isExistsTmpIndex && aliases[tmpIndexName].aliases != null && aliases[tmpIndexName].aliases[aliasName] != null;
  147. const isNormalized = isExistsMainIndex && isMainIndexHasAlias && !isExistsTmpIndex && !isTmpIndexHasAlias;
  148. return {
  149. indices,
  150. aliases,
  151. isNormalized,
  152. };
  153. }
  154. /**
  155. * rebuild index
  156. */
  157. async rebuildIndex() {
  158. const { client, indexName, aliasName } = this;
  159. const tmpIndexName = `${indexName}-tmp`;
  160. try {
  161. // reindex to tmp index
  162. await this.createIndex(tmpIndexName);
  163. await client.reindex({
  164. waitForCompletion: false,
  165. body: {
  166. source: { index: indexName },
  167. dest: { index: tmpIndexName },
  168. },
  169. });
  170. // update alias
  171. await client.indices.updateAliases({
  172. body: {
  173. actions: [
  174. { add: { alias: aliasName, index: tmpIndexName } },
  175. { remove: { alias: aliasName, index: indexName } },
  176. ],
  177. },
  178. });
  179. // flush index
  180. await client.indices.delete({
  181. index: indexName,
  182. });
  183. await this.createIndex(indexName);
  184. await this.addAllPages();
  185. }
  186. catch (error) {
  187. logger.warn('An error occured while \'rebuildIndex\', normalize indices anyway.');
  188. const socket = this.socketIoService.getAdminSocket();
  189. socket.emit('rebuildingFailed', { error: error.message });
  190. throw error;
  191. }
  192. finally {
  193. await this.normalizeIndices();
  194. }
  195. }
  196. async normalizeIndices() {
  197. const { client, indexName, aliasName } = this;
  198. const tmpIndexName = `${indexName}-tmp`;
  199. // remove tmp index
  200. const isExistsTmpIndex = await client.indices.exists({ index: tmpIndexName });
  201. if (isExistsTmpIndex) {
  202. await client.indices.delete({ index: tmpIndexName });
  203. }
  204. // create index
  205. const isExistsIndex = await client.indices.exists({ index: indexName });
  206. if (!isExistsIndex) {
  207. await this.createIndex(indexName);
  208. }
  209. // create alias
  210. const isExistsAlias = await client.indices.existsAlias({ name: aliasName, index: indexName });
  211. if (!isExistsAlias) {
  212. await client.indices.putAlias({
  213. name: aliasName,
  214. index: indexName,
  215. });
  216. }
  217. }
  218. async createIndex(index) {
  219. const body = require('@root/resource/search/mappings.json');
  220. return this.client.indices.create({ index, body });
  221. }
  222. /**
  223. * generate object that is related to page.grant*
  224. */
  225. generateDocContentsRelatedToRestriction(page) {
  226. let grantedUserIds = null;
  227. if (page.grantedUsers != null && page.grantedUsers.length > 0) {
  228. grantedUserIds = page.grantedUsers.map((user) => {
  229. const userId = (user._id == null) ? user : user._id;
  230. return userId.toString();
  231. });
  232. }
  233. let grantedGroupId = null;
  234. if (page.grantedGroup != null) {
  235. const groupId = (page.grantedGroup._id == null) ? page.grantedGroup : page.grantedGroup._id;
  236. grantedGroupId = groupId.toString();
  237. }
  238. return {
  239. grant: page.grant,
  240. granted_users: grantedUserIds,
  241. granted_group: grantedGroupId,
  242. };
  243. }
  244. prepareBodyForCreate(body, page) {
  245. if (!Array.isArray(body)) {
  246. throw new Error('Body must be an array.');
  247. }
  248. const command = {
  249. index: {
  250. _index: this.indexName,
  251. _type: 'pages',
  252. _id: page._id.toString(),
  253. },
  254. };
  255. const bookmarkCount = page.bookmarkCount || 0;
  256. let document = {
  257. path: page.path,
  258. body: page.revision.body,
  259. // username: page.creator?.username, // available Node.js v14 and above
  260. username: page.creator != null ? page.creator.username : null,
  261. comment_count: page.commentCount,
  262. bookmark_count: bookmarkCount,
  263. like_count: page.liker.length || 0,
  264. created_at: page.createdAt,
  265. updated_at: page.updatedAt,
  266. tag_names: page.tagNames,
  267. };
  268. document = Object.assign(document, this.generateDocContentsRelatedToRestriction(page));
  269. body.push(command);
  270. body.push(document);
  271. }
  272. prepareBodyForDelete(body, page) {
  273. if (!Array.isArray(body)) {
  274. throw new Error('Body must be an array.');
  275. }
  276. const command = {
  277. delete: {
  278. _index: this.indexName,
  279. _type: 'pages',
  280. _id: page._id.toString(),
  281. },
  282. };
  283. body.push(command);
  284. }
  285. addAllPages() {
  286. const Page = mongoose.model('Page');
  287. return this.updateOrInsertPages(() => Page.find(), { isEmittingProgressEvent: true, invokeGarbageCollection: true });
  288. }
  289. updateOrInsertPageById(pageId) {
  290. const Page = mongoose.model('Page');
  291. return this.updateOrInsertPages(() => Page.findById(pageId));
  292. }
  293. updateOrInsertDescendantsPagesById(page, user) {
  294. const Page = mongoose.model('Page');
  295. const { PageQueryBuilder } = Page;
  296. const builder = new PageQueryBuilder(Page.find());
  297. builder.addConditionToListWithDescendants(page.path);
  298. return this.updateOrInsertPages(() => builder.query);
  299. }
  300. /**
  301. * @param {function} queryFactory factory method to generate a Mongoose Query instance
  302. */
  303. async updateOrInsertPages(queryFactory, option = {}) {
  304. const { isEmittingProgressEvent = false, invokeGarbageCollection = false } = option;
  305. const Page = mongoose.model('Page');
  306. const { PageQueryBuilder } = Page;
  307. const Bookmark = mongoose.model('Bookmark');
  308. const PageTagRelation = mongoose.model('PageTagRelation');
  309. const socket = this.socketIoService.getAdminSocket();
  310. // prepare functions invoked from custom streams
  311. const prepareBodyForCreate = this.prepareBodyForCreate.bind(this);
  312. const shouldIndexed = this.shouldIndexed.bind(this);
  313. const bulkWrite = this.client.bulk.bind(this.client);
  314. const findQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  315. const countQuery = new PageQueryBuilder(queryFactory()).addConditionToExcludeRedirect().query;
  316. const totalCount = await countQuery.count();
  317. const readStream = findQuery
  318. // populate data which will be referenced by prepareBodyForCreate()
  319. .populate([
  320. { path: 'creator', model: 'User', select: 'username' },
  321. { path: 'revision', model: 'Revision', select: 'body' },
  322. ])
  323. .lean()
  324. .cursor();
  325. let skipped = 0;
  326. const thinOutStream = new Transform({
  327. objectMode: true,
  328. async transform(doc, encoding, callback) {
  329. if (shouldIndexed(doc)) {
  330. this.push(doc);
  331. }
  332. else {
  333. skipped++;
  334. }
  335. callback();
  336. },
  337. });
  338. const batchStream = createBatchStream(BULK_REINDEX_SIZE);
  339. const appendBookmarkCountStream = new Transform({
  340. objectMode: true,
  341. async transform(chunk, encoding, callback) {
  342. const pageIds = chunk.map(doc => doc._id);
  343. const idToCountMap = await Bookmark.getPageIdToCountMap(pageIds);
  344. const idsHavingCount = Object.keys(idToCountMap);
  345. // append count
  346. chunk
  347. .filter(doc => idsHavingCount.includes(doc._id.toString()))
  348. .forEach((doc) => {
  349. // append count from idToCountMap
  350. doc.bookmarkCount = idToCountMap[doc._id.toString()];
  351. });
  352. this.push(chunk);
  353. callback();
  354. },
  355. });
  356. const appendTagNamesStream = new Transform({
  357. objectMode: true,
  358. async transform(chunk, encoding, callback) {
  359. const pageIds = chunk.map(doc => doc._id);
  360. const idToTagNamesMap = await PageTagRelation.getIdToTagNamesMap(pageIds);
  361. const idsHavingTagNames = Object.keys(idToTagNamesMap);
  362. // append tagNames
  363. chunk
  364. .filter(doc => idsHavingTagNames.includes(doc._id.toString()))
  365. .forEach((doc) => {
  366. // append tagName from idToTagNamesMap
  367. doc.tagNames = idToTagNamesMap[doc._id.toString()];
  368. });
  369. this.push(chunk);
  370. callback();
  371. },
  372. });
  373. let count = 0;
  374. const writeStream = new Writable({
  375. objectMode: true,
  376. async write(batch, encoding, callback) {
  377. const body = [];
  378. batch.forEach(doc => prepareBodyForCreate(body, doc));
  379. try {
  380. const res = await bulkWrite({
  381. body,
  382. requestTimeout: Infinity,
  383. });
  384. count += (res.items || []).length;
  385. logger.info(`Adding pages progressing: (count=${count}, errors=${res.errors}, took=${res.took}ms)`);
  386. if (isEmittingProgressEvent) {
  387. socket.emit('addPageProgress', { totalCount, count, skipped });
  388. }
  389. }
  390. catch (err) {
  391. logger.error('addAllPages error on add anyway: ', err);
  392. }
  393. if (invokeGarbageCollection) {
  394. try {
  395. // First aid to prevent unexplained memory leaks
  396. global.gc();
  397. }
  398. catch (err) {
  399. logger.error('fail garbage collection: ', err);
  400. }
  401. }
  402. callback();
  403. },
  404. final(callback) {
  405. logger.info(`Adding pages has completed: (totalCount=${totalCount}, skipped=${skipped})`);
  406. if (isEmittingProgressEvent) {
  407. socket.emit('finishAddPage', { totalCount, count, skipped });
  408. }
  409. callback();
  410. },
  411. });
  412. readStream
  413. .pipe(thinOutStream)
  414. .pipe(batchStream)
  415. .pipe(appendBookmarkCountStream)
  416. .pipe(appendTagNamesStream)
  417. .pipe(writeStream);
  418. return streamToPromise(writeStream);
  419. }
  420. deletePages(pages) {
  421. const body = [];
  422. pages.forEach(page => this.prepareBodyForDelete(body, page));
  423. logger.debug('deletePages(): Sending Request to ES', body);
  424. return this.client.bulk({
  425. body,
  426. });
  427. }
  428. /**
  429. * search returning type:
  430. * {
  431. * meta: { total: Integer, results: Integer},
  432. * data: [ pages ...],
  433. * }
  434. */
  435. async search(query) {
  436. // for debug
  437. if (process.env.NODE_ENV === 'development') {
  438. const result = await this.client.indices.validateQuery({
  439. explain: true,
  440. body: {
  441. query: query.body.query,
  442. },
  443. });
  444. logger.debug('ES returns explanations: ', result.explanations);
  445. }
  446. const result = await this.client.search(query);
  447. // for debug
  448. logger.debug('ES result: ', result);
  449. return {
  450. meta: {
  451. took: result.took,
  452. total: result.hits.total,
  453. results: result.hits.hits.length,
  454. },
  455. data: result.hits.hits.map((elm) => {
  456. return { _id: elm._id, _score: elm._score, _source: elm._source };
  457. }),
  458. };
  459. }
  460. createSearchQuerySortedByUpdatedAt(option) {
  461. // getting path by default is almost for debug
  462. let fields = ['path', 'bookmark_count', 'tag_names'];
  463. if (option) {
  464. fields = option.fields || fields;
  465. }
  466. // default is only id field, sorted by updated_at
  467. const query = {
  468. index: this.aliasName,
  469. type: 'pages',
  470. body: {
  471. sort: [{ updated_at: { order: 'desc' } }],
  472. query: {}, // query
  473. _source: fields,
  474. },
  475. };
  476. this.appendResultSize(query);
  477. return query;
  478. }
  479. createSearchQuerySortedByScore(option) {
  480. let fields = ['path', 'bookmark_count', 'tag_names'];
  481. if (option) {
  482. fields = option.fields || fields;
  483. }
  484. // sort by score
  485. const query = {
  486. index: this.aliasName,
  487. type: 'pages',
  488. body: {
  489. sort: [{ _score: { order: 'desc' } }],
  490. query: {}, // query
  491. _source: fields,
  492. },
  493. };
  494. this.appendResultSize(query);
  495. return query;
  496. }
  497. appendResultSize(query, from, size) {
  498. query.from = from || DEFAULT_OFFSET;
  499. query.size = size || DEFAULT_LIMIT;
  500. }
  501. initializeBoolQuery(query) {
  502. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  503. if (!query.body.query.bool) {
  504. query.body.query.bool = {};
  505. }
  506. const isInitialized = (query) => { return !!query && Array.isArray(query) };
  507. if (!isInitialized(query.body.query.bool.filter)) {
  508. query.body.query.bool.filter = [];
  509. }
  510. if (!isInitialized(query.body.query.bool.must)) {
  511. query.body.query.bool.must = [];
  512. }
  513. if (!isInitialized(query.body.query.bool.must_not)) {
  514. query.body.query.bool.must_not = [];
  515. }
  516. return query;
  517. }
  518. appendCriteriaForQueryString(query, queryString) {
  519. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  520. // parse
  521. const parsedKeywords = this.parseQueryString(queryString);
  522. if (parsedKeywords.match.length > 0) {
  523. const q = {
  524. multi_match: {
  525. query: parsedKeywords.match.join(' '),
  526. type: 'most_fields',
  527. fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
  528. },
  529. };
  530. query.body.query.bool.must.push(q);
  531. }
  532. if (parsedKeywords.not_match.length > 0) {
  533. const q = {
  534. multi_match: {
  535. query: parsedKeywords.not_match.join(' '),
  536. fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
  537. operator: 'or',
  538. },
  539. };
  540. query.body.query.bool.must_not.push(q);
  541. }
  542. if (parsedKeywords.phrase.length > 0) {
  543. const phraseQueries = [];
  544. parsedKeywords.phrase.forEach((phrase) => {
  545. phraseQueries.push({
  546. multi_match: {
  547. query: phrase, // each phrase is quoteted words
  548. type: 'phrase',
  549. fields: [
  550. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  551. 'path.raw^2',
  552. 'body',
  553. ],
  554. },
  555. });
  556. });
  557. query.body.query.bool.must.push(phraseQueries);
  558. }
  559. if (parsedKeywords.not_phrase.length > 0) {
  560. const notPhraseQueries = [];
  561. parsedKeywords.not_phrase.forEach((phrase) => {
  562. notPhraseQueries.push({
  563. multi_match: {
  564. query: phrase, // each phrase is quoteted words
  565. type: 'phrase',
  566. fields: [
  567. // Not use "*.ja" fields here, because we want to analyze (parse) search words
  568. 'path.raw^2',
  569. 'body',
  570. ],
  571. },
  572. });
  573. });
  574. query.body.query.bool.must_not.push(notPhraseQueries);
  575. }
  576. if (parsedKeywords.prefix.length > 0) {
  577. const queries = parsedKeywords.prefix.map((path) => {
  578. return { prefix: { 'path.raw': path } };
  579. });
  580. query.body.query.bool.filter.push({ bool: { should: queries } });
  581. }
  582. if (parsedKeywords.not_prefix.length > 0) {
  583. const queries = parsedKeywords.not_prefix.map((path) => {
  584. return { prefix: { 'path.raw': path } };
  585. });
  586. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  587. }
  588. if (parsedKeywords.tag.length > 0) {
  589. const queries = parsedKeywords.tag.map((tag) => {
  590. return { term: { tag_names: tag } };
  591. });
  592. query.body.query.bool.filter.push({ bool: { must: queries } });
  593. }
  594. if (parsedKeywords.not_tag.length > 0) {
  595. const queries = parsedKeywords.not_tag.map((tag) => {
  596. return { term: { tag_names: tag } };
  597. });
  598. query.body.query.bool.filter.push({ bool: { must_not: queries } });
  599. }
  600. }
  601. async filterPagesByViewer(query, user, userGroups) {
  602. const showPagesRestrictedByOwner = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByOwner');
  603. const showPagesRestrictedByGroup = !this.configManager.getConfig('crowi', 'security:list-policy:hideRestrictedByGroup');
  604. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  605. const Page = mongoose.model('Page');
  606. const {
  607. GRANT_PUBLIC, GRANT_RESTRICTED, GRANT_SPECIFIED, GRANT_OWNER, GRANT_USER_GROUP,
  608. } = Page;
  609. const grantConditions = [
  610. { term: { grant: GRANT_PUBLIC } },
  611. ];
  612. // ensure to hit to GRANT_RESTRICTED pages that the user specified at own
  613. if (user != null) {
  614. grantConditions.push(
  615. {
  616. bool: {
  617. must: [
  618. { term: { grant: GRANT_RESTRICTED } },
  619. { term: { granted_users: user._id.toString() } },
  620. ],
  621. },
  622. },
  623. );
  624. }
  625. if (showPagesRestrictedByOwner) {
  626. grantConditions.push(
  627. { term: { grant: GRANT_SPECIFIED } },
  628. { term: { grant: GRANT_OWNER } },
  629. );
  630. }
  631. else if (user != null) {
  632. grantConditions.push(
  633. {
  634. bool: {
  635. must: [
  636. { term: { grant: GRANT_SPECIFIED } },
  637. { term: { granted_users: user._id.toString() } },
  638. ],
  639. },
  640. },
  641. {
  642. bool: {
  643. must: [
  644. { term: { grant: GRANT_OWNER } },
  645. { term: { granted_users: user._id.toString() } },
  646. ],
  647. },
  648. },
  649. );
  650. }
  651. if (showPagesRestrictedByGroup) {
  652. grantConditions.push(
  653. { term: { grant: GRANT_USER_GROUP } },
  654. );
  655. }
  656. else if (userGroups != null && userGroups.length > 0) {
  657. const userGroupIds = userGroups.map((group) => { return group._id.toString() });
  658. grantConditions.push(
  659. {
  660. bool: {
  661. must: [
  662. { term: { grant: GRANT_USER_GROUP } },
  663. { terms: { granted_group: userGroupIds } },
  664. ],
  665. },
  666. },
  667. );
  668. }
  669. query.body.query.bool.filter.push({ bool: { should: grantConditions } });
  670. }
  671. filterPortalPages(query) {
  672. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  673. query.body.query.bool.must_not.push(this.queries.USER);
  674. query.body.query.bool.filter.push(this.queries.PORTAL);
  675. }
  676. filterPublicPages(query) {
  677. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  678. query.body.query.bool.must_not.push(this.queries.USER);
  679. query.body.query.bool.filter.push(this.queries.PUBLIC);
  680. }
  681. filterUserPages(query) {
  682. query = this.initializeBoolQuery(query); // eslint-disable-line no-param-reassign
  683. query.body.query.bool.filter.push(this.queries.USER);
  684. }
  685. filterPagesByType(query, type) {
  686. const Page = mongoose.model('Page');
  687. switch (type) {
  688. case Page.TYPE_PORTAL:
  689. return this.filterPortalPages(query);
  690. case Page.TYPE_PUBLIC:
  691. return this.filterPublicPages(query);
  692. case Page.TYPE_USER:
  693. return this.filterUserPages(query);
  694. default:
  695. return query;
  696. }
  697. }
  698. appendFunctionScore(query, queryString) {
  699. const User = mongoose.model('User');
  700. const count = User.count({}) || 1;
  701. const minScore = queryString.length * 0.1 - 1; // increase with length
  702. logger.debug('min_score: ', minScore);
  703. query.body.query = {
  704. function_score: {
  705. query: { ...query.body.query },
  706. // // disable min_score -- 2019.02.28 Yuki Takei
  707. // // more precise adjustment is needed...
  708. // min_score: minScore,
  709. field_value_factor: {
  710. field: 'bookmark_count',
  711. modifier: 'log1p',
  712. factor: 10000 / count,
  713. missing: 0,
  714. },
  715. boost_mode: 'sum',
  716. },
  717. };
  718. }
  719. async searchKeyword(queryString, user, userGroups, option) {
  720. const from = option.offset || null;
  721. const size = option.limit || null;
  722. const type = option.type || null;
  723. const query = this.createSearchQuerySortedByScore();
  724. this.appendCriteriaForQueryString(query, queryString);
  725. this.filterPagesByType(query, type);
  726. await this.filterPagesByViewer(query, user, userGroups);
  727. this.appendResultSize(query, from, size);
  728. this.appendFunctionScore(query, queryString);
  729. return this.search(query);
  730. }
  731. parseQueryString(queryString) {
  732. const matchWords = [];
  733. const notMatchWords = [];
  734. const phraseWords = [];
  735. const notPhraseWords = [];
  736. const prefixPaths = [];
  737. const notPrefixPaths = [];
  738. const tags = [];
  739. const notTags = [];
  740. queryString.trim();
  741. queryString = queryString.replace(/\s+/g, ' '); // eslint-disable-line no-param-reassign
  742. // First: Parse phrase keywords
  743. const phraseRegExp = new RegExp(/(-?"[^"]+")/g);
  744. const phrases = queryString.match(phraseRegExp);
  745. if (phrases !== null) {
  746. queryString = queryString.replace(phraseRegExp, ''); // eslint-disable-line no-param-reassign
  747. phrases.forEach((phrase) => {
  748. phrase.trim();
  749. if (phrase.match(/^-/)) {
  750. notPhraseWords.push(phrase.replace(/^-/, ''));
  751. }
  752. else {
  753. phraseWords.push(phrase);
  754. }
  755. });
  756. }
  757. // Second: Parse other keywords (include minus keywords)
  758. queryString.split(' ').forEach((word) => {
  759. if (word === '') {
  760. return;
  761. }
  762. // https://regex101.com/r/pN9XfK/1
  763. const matchNegative = word.match(/^-(prefix:|tag:)?(.+)$/);
  764. // https://regex101.com/r/3qw9FQ/1
  765. const matchPositive = word.match(/^(prefix:|tag:)?(.+)$/);
  766. if (matchNegative != null) {
  767. if (matchNegative[1] === 'prefix:') {
  768. notPrefixPaths.push(matchNegative[2]);
  769. }
  770. else if (matchNegative[1] === 'tag:') {
  771. notTags.push(matchNegative[2]);
  772. }
  773. else {
  774. notMatchWords.push(matchNegative[2]);
  775. }
  776. }
  777. else if (matchPositive != null) {
  778. if (matchPositive[1] === 'prefix:') {
  779. prefixPaths.push(matchPositive[2]);
  780. }
  781. else if (matchPositive[1] === 'tag:') {
  782. tags.push(matchPositive[2]);
  783. }
  784. else {
  785. matchWords.push(matchPositive[2]);
  786. }
  787. }
  788. });
  789. return {
  790. match: matchWords,
  791. not_match: notMatchWords,
  792. phrase: phraseWords,
  793. not_phrase: notPhraseWords,
  794. prefix: prefixPaths,
  795. not_prefix: notPrefixPaths,
  796. tag: tags,
  797. not_tag: notTags,
  798. };
  799. }
  800. async syncPageUpdated(page, user) {
  801. logger.debug('SearchClient.syncPageUpdated', page.path);
  802. // delete if page should not indexed
  803. if (!this.shouldIndexed(page)) {
  804. try {
  805. await this.deletePages([page]);
  806. }
  807. catch (err) {
  808. logger.error('deletePages:ES Error', err);
  809. }
  810. return;
  811. }
  812. return this.updateOrInsertPageById(page._id);
  813. }
  814. // remove pages whitch should nod Indexed
  815. async syncPagesUpdated(pages, user) {
  816. const shoudDeletePages = [];
  817. pages.forEach((page) => {
  818. logger.debug('SearchClient.syncPageUpdated', page.path);
  819. if (!this.shouldIndexed(page)) {
  820. shoudDeletePages.append(page);
  821. }
  822. });
  823. // delete if page should not indexed
  824. try {
  825. if (shoudDeletePages.length !== 0) {
  826. await this.deletePages(shoudDeletePages);
  827. }
  828. }
  829. catch (err) {
  830. logger.error('deletePages:ES Error', err);
  831. }
  832. }
  833. async syncDescendantsPagesUpdated(parentPage, user) {
  834. return this.updateOrInsertDescendantsPagesById(parentPage, user);
  835. }
  836. async syncPagesDeletedCompletely(pages, user) {
  837. for (let i = 0; i < pages.length; i++) {
  838. logger.debug('SearchClient.syncPageDeleted', pages[i].path);
  839. }
  840. try {
  841. return await this.deletePages(pages);
  842. }
  843. catch (err) {
  844. logger.error('deletePages:ES Error', err);
  845. }
  846. }
  847. async syncPageDeleted(page, user) {
  848. logger.debug('SearchClient.syncPageDeleted', page.path);
  849. try {
  850. return await this.deletePages([page]);
  851. }
  852. catch (err) {
  853. logger.error('deletePages:ES Error', err);
  854. }
  855. }
  856. async syncBookmarkChanged(pageId) {
  857. logger.debug('SearchClient.syncBookmarkChanged', pageId);
  858. return this.updateOrInsertPageById(pageId);
  859. }
  860. async syncTagChanged(page) {
  861. logger.debug('SearchClient.syncTagChanged', page.path);
  862. return this.updateOrInsertPageById(page._id);
  863. }
  864. }
  865. module.exports = ElasticsearchDelegator;