search.js 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. /**
  2. * Search
  3. */
  4. var elasticsearch = require('elasticsearch'),
  5. debug = require('debug')('crowi:lib:search');
  6. function SearchClient(crowi, esUri) {
  7. this.DEFAULT_OFFSET = 0;
  8. this.DEFAULT_LIMIT = 50;
  9. this.esUri = esUri;
  10. this.crowi = crowi;
  11. var uri = this.parseUri(this.esUri);
  12. this.host = uri.host;
  13. this.index_name = uri.index_name;
  14. this.client = new elasticsearch.Client({
  15. host: this.host,
  16. requestTimeout: 5000,
  17. });
  18. this.registerUpdateEvent();
  19. this.mappingFile = crowi.resourceDir + 'search/mappings.json';
  20. }
  21. SearchClient.prototype.checkESVersion = function() {
  22. // TODO
  23. };
  24. SearchClient.prototype.registerUpdateEvent = function() {
  25. var pageEvent = this.crowi.event('page');
  26. pageEvent.on('create', this.syncPageCreated.bind(this))
  27. pageEvent.on('update', this.syncPageUpdated.bind(this))
  28. pageEvent.on('delete', this.syncPageDeleted.bind(this))
  29. };
  30. SearchClient.prototype.shouldIndexed = function(page) {
  31. // FIXME: Magic Number
  32. if (page.grant !== 1) {
  33. return false;
  34. }
  35. if (page.redirectTo !== null) {
  36. return false;
  37. }
  38. if (page.isDeleted()) {
  39. return false;
  40. }
  41. return true;
  42. };
  43. // BONSAI_URL is following format:
  44. // => https://{ID}:{PASSWORD}@{HOST}
  45. SearchClient.prototype.parseUri = function(uri) {
  46. var index_name = 'crowi';
  47. var host = uri;
  48. if (m = uri.match(/^(https?:\/\/[^\/]+)\/(.+)$/)) {
  49. host = m[1];
  50. index_name = m[2];
  51. }
  52. return {
  53. host,
  54. index_name,
  55. };
  56. };
  57. SearchClient.prototype.buildIndex = function(uri) {
  58. return this.client.indices.create({
  59. index: this.index_name,
  60. body: require(this.mappingFile)
  61. });
  62. };
  63. SearchClient.prototype.deleteIndex = function(uri) {
  64. return this.client.indices.delete({
  65. index: this.index_name,
  66. });
  67. };
  68. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  69. if (!Array.isArray(body)) {
  70. throw new Error('Body must be an array.');
  71. }
  72. var command = {
  73. update: {
  74. _index: this.index_name,
  75. _type: 'pages',
  76. _id: page._id.toString(),
  77. }
  78. };
  79. var document = {
  80. doc: {
  81. path: page.path,
  82. body: page.revision.body,
  83. comment_count: page.commentCount,
  84. bookmark_count: 0, // todo
  85. like_count: page.liker.length || 0,
  86. updated_at: page.updatedAt,
  87. },
  88. doc_as_upsert: true,
  89. };
  90. body.push(command);
  91. body.push(document);
  92. };
  93. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  94. if (!Array.isArray(body)) {
  95. throw new Error('Body must be an array.');
  96. }
  97. var command = {
  98. index: {
  99. _index: this.index_name,
  100. _type: 'pages',
  101. _id: page._id.toString(),
  102. }
  103. };
  104. var document = {
  105. path: page.path,
  106. body: page.revision.body,
  107. username: page.creator.username,
  108. comment_count: page.commentCount,
  109. bookmark_count: 0, // todo
  110. like_count: page.liker.length || 0,
  111. created_at: page.createdAt,
  112. updated_at: page.updatedAt,
  113. };
  114. body.push(command);
  115. body.push(document);
  116. };
  117. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  118. if (!Array.isArray(body)) {
  119. throw new Error('Body must be an array.');
  120. }
  121. var command = {
  122. delete: {
  123. _index: this.index_name,
  124. _type: 'pages',
  125. _id: page._id.toString(),
  126. }
  127. };
  128. body.push(command);
  129. };
  130. SearchClient.prototype.addPages = function(pages)
  131. {
  132. var self = this;
  133. var body = [];
  134. pages.map(function(page) {
  135. self.prepareBodyForCreate(body, page);
  136. });
  137. debug('addPages(): Sending Request to ES', body);
  138. return this.client.bulk({
  139. body: body,
  140. });
  141. };
  142. SearchClient.prototype.updatePages = function(pages)
  143. {
  144. var self = this;
  145. var body = [];
  146. pages.map(function(page) {
  147. self.prepareBodyForUpdate(body, page);
  148. });
  149. debug('updatePages(): Sending Request to ES', body);
  150. return this.client.bulk({
  151. body: body,
  152. });
  153. };
  154. SearchClient.prototype.deletePages = function(pages)
  155. {
  156. var self = this;
  157. var body = [];
  158. pages.map(function(page) {
  159. self.prepareBodyForDelete(body, page);
  160. });
  161. debug('deletePages(): Sending Request to ES', body);
  162. return this.client.bulk({
  163. body: body,
  164. });
  165. };
  166. SearchClient.prototype.addAllPages = function()
  167. {
  168. var self = this;
  169. var offset = 0;
  170. var Page = this.crowi.model('Page');
  171. var stream = Page.getStreamOfFindAll();
  172. var body = [];
  173. return new Promise(function(resolve, reject) {
  174. stream.on('data', function (doc) {
  175. if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
  176. debug('Skipped', doc.path);
  177. return ;
  178. }
  179. self.prepareBodyForCreate(body, doc);
  180. }).on('error', function (err) {
  181. // TODO: handle err
  182. debug('Error stream:', err);
  183. }).on('close', function () {
  184. // all done
  185. // 最後に送信
  186. self.client.bulk({ body: body, })
  187. .then(function(res) {
  188. debug('Reponse from es:', res);
  189. return resolve(res);
  190. }).catch(function(err) {
  191. debug('Err from es:', err);
  192. return reject(err);
  193. });
  194. });
  195. });
  196. };
  197. /**
  198. * search returning type:
  199. * {
  200. * meta: { total: Integer, results: Integer},
  201. * data: [ pages ...],
  202. * }
  203. */
  204. SearchClient.prototype.search = function(query)
  205. {
  206. var self = this;
  207. return new Promise(function(resolve, reject) {
  208. self.client.search(query)
  209. .then(function(data) {
  210. var result = {
  211. meta: {
  212. took: data.took,
  213. total: data.hits.total,
  214. results: data.hits.hits.length,
  215. },
  216. data: data.hits.hits.map(function(elm) {
  217. return {_id: elm._id, _score: elm._score};
  218. })
  219. };
  220. resolve(result);
  221. }).catch(function(err) {
  222. reject(err);
  223. });
  224. });
  225. };
  226. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
  227. {
  228. // getting path by default is almost for debug
  229. var fields = ['path', '_id'];
  230. if (option) {
  231. fields = option.fields || fields;
  232. }
  233. // default is only id field, sorted by updated_at
  234. var query = {
  235. index: this.index_name,
  236. type: 'pages',
  237. body: {
  238. fields: fields,
  239. sort: [{ updated_at: { order: 'desc'}}],
  240. query: {}, // query
  241. }
  242. };
  243. this.appendResultSize(query);
  244. return query;
  245. };
  246. SearchClient.prototype.createSearchQuerySortedByScore = function(option)
  247. {
  248. var fields = ['path', '_id'];
  249. if (option) {
  250. fields = option.fields || fields;
  251. }
  252. // sort by score
  253. var query = {
  254. index: this.index_name,
  255. type: 'pages',
  256. body: {
  257. fields: fields,
  258. sort: [ {_score: { order: 'desc'} }],
  259. query: {}, // query
  260. }
  261. };
  262. this.appendResultSize(query);
  263. return query;
  264. };
  265. SearchClient.prototype.appendResultSize = function(query, from, size)
  266. {
  267. query.from = from || this.DEFAULT_OFFSET;
  268. query.size = size || this.DEFAULT_LIMIT;
  269. };
  270. SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
  271. {
  272. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  273. if (!query.body.query.bool) {
  274. query.body.query.bool = {};
  275. }
  276. if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
  277. query.body.query.bool.must = [];
  278. }
  279. query.body.query.bool.must.push({
  280. multi_match: {
  281. query: keyword,
  282. fields: [
  283. "path.ja^2", // ためしに。
  284. "body.ja"
  285. ],
  286. operator: "and"
  287. }
  288. });
  289. };
  290. SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
  291. {
  292. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  293. if (!query.body.query.bool) {
  294. query.body.query.bool = {};
  295. }
  296. if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
  297. query.body.query.bool.filter = [];
  298. }
  299. if (path.match(/\/$/)) {
  300. path = path.substr(0, path.length - 1);
  301. }
  302. query.body.query.bool.filter.push({
  303. wildcard: {
  304. "path.raw": path + "/*"
  305. }
  306. });
  307. };
  308. SearchClient.prototype.searchKeyword = function(keyword, option)
  309. {
  310. var from = option.offset || null;
  311. var query = this.createSearchQuerySortedByScore();
  312. this.appendCriteriaForKeywordContains(query, keyword);
  313. return this.search(query);
  314. };
  315. SearchClient.prototype.searchByPath = function(keyword, prefix)
  316. {
  317. // TODO path 名だけから検索
  318. };
  319. SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
  320. {
  321. var from = option.offset || null;
  322. var query = this.createSearchQuerySortedByScore();
  323. this.appendCriteriaForKeywordContains(query, keyword);
  324. this.appendCriteriaForPathFilter(query, path);
  325. if (from) {
  326. this.appendResultSize(query, from);
  327. }
  328. return this.search(query);
  329. };
  330. SearchClient.prototype.syncPageCreated = function(page, user)
  331. {
  332. debug('SearchClient.syncPageCreated', page.path);
  333. if (!this.shouldIndexed(page)) {
  334. return ;
  335. }
  336. this.addPages([page])
  337. .then(function(res) {
  338. debug('ES Response', res);
  339. })
  340. .catch(function(err){
  341. debug('ES Error', err);
  342. });
  343. };
  344. SearchClient.prototype.syncPageUpdated = function(page, user)
  345. {
  346. debug('SearchClient.syncPageUpdated', page.path);
  347. // TODO delete
  348. if (!this.shouldIndexed(page)) {
  349. this.deletePages([page])
  350. .then(function(res) {
  351. debug('deletePages: ES Response', res);
  352. })
  353. .catch(function(err){
  354. debug('deletePages:ES Error', err);
  355. });
  356. return ;
  357. }
  358. this.updatePages([page])
  359. .then(function(res) {
  360. debug('ES Response', res);
  361. })
  362. .catch(function(err){
  363. debug('ES Error', err);
  364. });
  365. };
  366. SearchClient.prototype.syncPageDeleted = function(page, user)
  367. {
  368. debug('SearchClient.syncPageDeleted', page.path);
  369. this.deletePages([page])
  370. .then(function(res) {
  371. debug('deletePages: ES Response', res);
  372. })
  373. .catch(function(err){
  374. debug('deletePages:ES Error', err);
  375. });
  376. return ;
  377. };
  378. module.exports = SearchClient;