search.js 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. /**
  2. * Search
  3. */
  4. var elasticsearch = require('elasticsearch'),
  5. debug = require('debug')('crowi:lib:search');
  6. function SearchClient(crowi, esUri) {
  7. this.DEFAULT_OFFSET = 0;
  8. this.DEFAULT_LIMIT = 50;
  9. this.esUri = esUri;
  10. this.crowi = crowi;
  11. var uri = this.parseUri(this.esUri);
  12. this.host = uri.host;
  13. this.index_name = uri.index_name;
  14. this.client = new elasticsearch.Client({
  15. host: this.host,
  16. requestTimeout: 5000,
  17. //log: 'debug',
  18. });
  19. this.registerUpdateEvent();
  20. this.mappingFile = crowi.resourceDir + 'search/mappings.json';
  21. }
  22. SearchClient.prototype.checkESVersion = function() {
  23. // TODO
  24. };
  25. SearchClient.prototype.registerUpdateEvent = function() {
  26. var pageEvent = this.crowi.event('page');
  27. pageEvent.on('create', this.syncPageCreated.bind(this))
  28. pageEvent.on('update', this.syncPageUpdated.bind(this))
  29. pageEvent.on('delete', this.syncPageDeleted.bind(this))
  30. };
  31. SearchClient.prototype.shouldIndexed = function(page) {
  32. // FIXME: Magic Number
  33. if (page.grant !== 1) {
  34. return false;
  35. }
  36. if (page.redirectTo !== null) {
  37. return false;
  38. }
  39. if (page.isDeleted()) {
  40. return false;
  41. }
  42. return true;
  43. };
  44. // BONSAI_URL is following format:
  45. // => https://{ID}:{PASSWORD}@{HOST}
  46. SearchClient.prototype.parseUri = function(uri) {
  47. var index_name = 'crowi';
  48. var host = uri;
  49. if (m = uri.match(/^(https?:\/\/[^\/]+)\/(.+)$/)) {
  50. host = m[1];
  51. index_name = m[2];
  52. }
  53. return {
  54. host,
  55. index_name,
  56. };
  57. };
  58. SearchClient.prototype.buildIndex = function(uri) {
  59. return this.client.indices.create({
  60. index: this.index_name,
  61. body: require(this.mappingFile)
  62. });
  63. };
  64. SearchClient.prototype.deleteIndex = function(uri) {
  65. return this.client.indices.delete({
  66. index: this.index_name,
  67. });
  68. };
  69. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  70. if (!Array.isArray(body)) {
  71. throw new Error('Body must be an array.');
  72. }
  73. var command = {
  74. update: {
  75. _index: this.index_name,
  76. _type: 'pages',
  77. _id: page._id.toString(),
  78. }
  79. };
  80. var document = {
  81. doc: {
  82. path: page.path,
  83. body: page.revision.body,
  84. comment_count: page.commentCount,
  85. bookmark_count: 0, // todo
  86. like_count: page.liker.length || 0,
  87. updated_at: page.updatedAt,
  88. },
  89. doc_as_upsert: true,
  90. };
  91. body.push(command);
  92. body.push(document);
  93. };
  94. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  95. if (!Array.isArray(body)) {
  96. throw new Error('Body must be an array.');
  97. }
  98. var command = {
  99. index: {
  100. _index: this.index_name,
  101. _type: 'pages',
  102. _id: page._id.toString(),
  103. }
  104. };
  105. var document = {
  106. path: page.path,
  107. body: page.revision.body,
  108. username: page.creator.username,
  109. comment_count: page.commentCount,
  110. bookmark_count: 0, // todo
  111. like_count: page.liker.length || 0,
  112. created_at: page.createdAt,
  113. updated_at: page.updatedAt,
  114. };
  115. body.push(command);
  116. body.push(document);
  117. };
  118. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  119. if (!Array.isArray(body)) {
  120. throw new Error('Body must be an array.');
  121. }
  122. var command = {
  123. delete: {
  124. _index: this.index_name,
  125. _type: 'pages',
  126. _id: page._id.toString(),
  127. }
  128. };
  129. body.push(command);
  130. };
  131. SearchClient.prototype.addPages = function(pages)
  132. {
  133. var self = this;
  134. var body = [];
  135. pages.map(function(page) {
  136. self.prepareBodyForCreate(body, page);
  137. });
  138. debug('addPages(): Sending Request to ES', body);
  139. return this.client.bulk({
  140. body: body,
  141. });
  142. };
  143. SearchClient.prototype.updatePages = function(pages)
  144. {
  145. var self = this;
  146. var body = [];
  147. pages.map(function(page) {
  148. self.prepareBodyForUpdate(body, page);
  149. });
  150. debug('updatePages(): Sending Request to ES', body);
  151. return this.client.bulk({
  152. body: body,
  153. });
  154. };
  155. SearchClient.prototype.deletePages = function(pages)
  156. {
  157. var self = this;
  158. var body = [];
  159. pages.map(function(page) {
  160. self.prepareBodyForDelete(body, page);
  161. });
  162. debug('deletePages(): Sending Request to ES', body);
  163. return this.client.bulk({
  164. body: body,
  165. });
  166. };
  167. SearchClient.prototype.addAllPages = function()
  168. {
  169. var self = this;
  170. var offset = 0;
  171. var Page = this.crowi.model('Page');
  172. var cursor = Page.getStreamOfFindAll();
  173. var body = [];
  174. return new Promise(function(resolve, reject) {
  175. cursor.on('data', function (doc) {
  176. if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
  177. debug('Skipped', doc.path);
  178. return ;
  179. }
  180. self.prepareBodyForCreate(body, doc);
  181. }).on('error', function (err) {
  182. // TODO: handle err
  183. debug('Error cursor:', err);
  184. }).on('close', function () {
  185. // all done
  186. // 最後に送信
  187. self.client.bulk({
  188. body: body,
  189. requestTimeout: Infinity,
  190. })
  191. .then(function(res) {
  192. debug('Reponse from es:', res);
  193. return resolve(res);
  194. }).catch(function(err) {
  195. debug('Err from es:', err);
  196. return reject(err);
  197. });
  198. });
  199. });
  200. };
  201. /**
  202. * search returning type:
  203. * {
  204. * meta: { total: Integer, results: Integer},
  205. * data: [ pages ...],
  206. * }
  207. */
  208. SearchClient.prototype.search = function(query)
  209. {
  210. var self = this;
  211. return new Promise(function(resolve, reject) {
  212. self.client.search(query)
  213. .then(function(data) {
  214. var result = {
  215. meta: {
  216. took: data.took,
  217. total: data.hits.total,
  218. results: data.hits.hits.length,
  219. },
  220. data: data.hits.hits.map(function(elm) {
  221. return {_id: elm._id, _score: elm._score};
  222. })
  223. };
  224. resolve(result);
  225. }).catch(function(err) {
  226. reject(err);
  227. });
  228. });
  229. };
  230. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
  231. {
  232. // getting path by default is almost for debug
  233. var fields = ['path'];
  234. if (option) {
  235. fields = option.fields || fields;
  236. }
  237. // default is only id field, sorted by updated_at
  238. var query = {
  239. index: this.index_name,
  240. type: 'pages',
  241. body: {
  242. sort: [{ updated_at: { order: 'desc'}}],
  243. query: {}, // query
  244. _source: fields,
  245. }
  246. };
  247. this.appendResultSize(query);
  248. return query;
  249. };
  250. SearchClient.prototype.createSearchQuerySortedByScore = function(option)
  251. {
  252. var fields = ['path'];
  253. if (option) {
  254. fields = option.fields || fields;
  255. }
  256. // sort by score
  257. var query = {
  258. index: this.index_name,
  259. type: 'pages',
  260. body: {
  261. sort: [ {_score: { order: 'desc'} }],
  262. query: {}, // query
  263. _source: fields,
  264. }
  265. };
  266. this.appendResultSize(query);
  267. return query;
  268. };
  269. SearchClient.prototype.appendResultSize = function(query, from, size)
  270. {
  271. query.from = from || this.DEFAULT_OFFSET;
  272. query.size = size || this.DEFAULT_LIMIT;
  273. };
  274. SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
  275. {
  276. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  277. if (!query.body.query.bool) {
  278. query.body.query.bool = {};
  279. }
  280. if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
  281. query.body.query.bool.must = [];
  282. }
  283. query.body.query.bool.must.push({
  284. multi_match: {
  285. query: keyword,
  286. // TODO: By user's i18n setting, change boost or search target fields
  287. fields: [
  288. "path_ja^2",
  289. "body_ja",
  290. // "path_en",
  291. // "body_en",
  292. ],
  293. operator: "and"
  294. }
  295. });
  296. };
  297. SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
  298. {
  299. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  300. if (!query.body.query.bool) {
  301. query.body.query.bool = {};
  302. }
  303. if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
  304. query.body.query.bool.filter = [];
  305. }
  306. if (path.match(/\/$/)) {
  307. path = path.substr(0, path.length - 1);
  308. }
  309. query.body.query.bool.filter.push({
  310. wildcard: {
  311. "path": path + "/*"
  312. }
  313. });
  314. };
  315. SearchClient.prototype.searchKeyword = function(keyword, option)
  316. {
  317. var from = option.offset || null;
  318. var query = this.createSearchQuerySortedByScore();
  319. this.appendCriteriaForKeywordContains(query, keyword);
  320. return this.search(query);
  321. };
  322. SearchClient.prototype.searchByPath = function(keyword, prefix)
  323. {
  324. // TODO path 名だけから検索
  325. };
  326. SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
  327. {
  328. var from = option.offset || null;
  329. var query = this.createSearchQuerySortedByScore();
  330. this.appendCriteriaForKeywordContains(query, keyword);
  331. this.appendCriteriaForPathFilter(query, path);
  332. if (from) {
  333. this.appendResultSize(query, from);
  334. }
  335. return this.search(query);
  336. };
  337. SearchClient.prototype.syncPageCreated = function(page, user)
  338. {
  339. debug('SearchClient.syncPageCreated', page.path);
  340. if (!this.shouldIndexed(page)) {
  341. return ;
  342. }
  343. this.addPages([page])
  344. .then(function(res) {
  345. debug('ES Response', res);
  346. })
  347. .catch(function(err){
  348. debug('ES Error', err);
  349. });
  350. };
  351. SearchClient.prototype.syncPageUpdated = function(page, user)
  352. {
  353. debug('SearchClient.syncPageUpdated', page.path);
  354. // TODO delete
  355. if (!this.shouldIndexed(page)) {
  356. this.deletePages([page])
  357. .then(function(res) {
  358. debug('deletePages: ES Response', res);
  359. })
  360. .catch(function(err){
  361. debug('deletePages:ES Error', err);
  362. });
  363. return ;
  364. }
  365. this.updatePages([page])
  366. .then(function(res) {
  367. debug('ES Response', res);
  368. })
  369. .catch(function(err){
  370. debug('ES Error', err);
  371. });
  372. };
  373. SearchClient.prototype.syncPageDeleted = function(page, user)
  374. {
  375. debug('SearchClient.syncPageDeleted', page.path);
  376. this.deletePages([page])
  377. .then(function(res) {
  378. debug('deletePages: ES Response', res);
  379. })
  380. .catch(function(err){
  381. debug('deletePages:ES Error', err);
  382. });
  383. return ;
  384. };
  385. module.exports = SearchClient;