search.js 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /**
  2. * Search
  3. */
  4. var elasticsearch = require('elasticsearch'),
  5. debug = require('debug')('crowi:lib:search');
  6. function SearchClient(crowi, esUri) {
  7. this.DEFAULT_OFFSET = 0;
  8. this.DEFAULT_LIMIT = 50;
  9. this.esUri = esUri;
  10. this.crowi = crowi;
  11. var uri = this.parseUri(this.esUri);
  12. this.host = uri.host;
  13. this.index_name = uri.index_name;
  14. this.client = new elasticsearch.Client({
  15. host: this.host,
  16. requestTimeout: 5000,
  17. });
  18. this.registerUpdateEvent();
  19. this.mappingFile = crowi.resourceDir + 'search/mappings.json';
  20. }
  21. SearchClient.prototype.checkESVersion = function() {
  22. // TODO
  23. };
  24. SearchClient.prototype.registerUpdateEvent = function() {
  25. var pageEvent = this.crowi.event('page');
  26. pageEvent.on('create', this.syncPageCreated.bind(this))
  27. pageEvent.on('update', this.syncPageUpdated.bind(this))
  28. };
  29. SearchClient.prototype.shouldIndexed = function(page) {
  30. // FIXME: Magic Number
  31. if (page.grant !== 1) {
  32. return false;
  33. }
  34. if (page.redirectTo !== null) {
  35. return false;
  36. }
  37. return true;
  38. };
  39. // BONSAI_URL is following format:
  40. // => https://{ID}:{PASSWORD}@{HOST}
  41. SearchClient.prototype.parseUri = function(uri) {
  42. var index_name = 'crowi';
  43. var host = uri;
  44. if (m = uri.match(/^(https?:\/\/[^\/]+)\/(.+)$/)) {
  45. host = m[1];
  46. index_name = m[2];
  47. }
  48. return {
  49. host,
  50. index_name,
  51. };
  52. };
  53. SearchClient.prototype.buildIndex = function(uri) {
  54. return this.client.indices.create({
  55. index: this.index_name,
  56. body: require(this.mappingFile)
  57. });
  58. };
  59. SearchClient.prototype.deleteIndex = function(uri) {
  60. return this.client.indices.delete({
  61. index: this.index_name,
  62. });
  63. };
  64. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  65. if (!Array.isArray(body)) {
  66. throw new Error('Body must be an array.');
  67. }
  68. var command = {
  69. update: {
  70. _index: this.index_name,
  71. _type: 'pages',
  72. _id: page._id.toString(),
  73. }
  74. };
  75. var document = {
  76. doc: {
  77. path: page.path,
  78. body: page.revision.body,
  79. comment_count: page.commentCount,
  80. bookmark_count: 0, // todo
  81. like_count: page.liker.length || 0,
  82. updated_at: page.updatedAt,
  83. },
  84. doc_as_upsert: true,
  85. };
  86. body.push(command);
  87. body.push(document);
  88. };
  89. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  90. if (!Array.isArray(body)) {
  91. throw new Error('Body must be an array.');
  92. }
  93. var command = {
  94. index: {
  95. _index: this.index_name,
  96. _type: 'pages',
  97. _id: page._id.toString(),
  98. }
  99. };
  100. var document = {
  101. path: page.path,
  102. body: page.revision.body,
  103. username: page.creator.username,
  104. comment_count: page.commentCount,
  105. bookmark_count: 0, // todo
  106. like_count: page.liker.length || 0,
  107. created_at: page.createdAt,
  108. updated_at: page.updatedAt,
  109. };
  110. body.push(command);
  111. body.push(document);
  112. };
  113. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  114. if (!Array.isArray(body)) {
  115. throw new Error('Body must be an array.');
  116. }
  117. var command = {
  118. delete: {
  119. _index: this.index_name,
  120. _type: 'pages',
  121. _id: page._id.toString(),
  122. }
  123. };
  124. body.push(command);
  125. };
  126. SearchClient.prototype.addPages = function(pages)
  127. {
  128. var self = this;
  129. var body = [];
  130. pages.map(function(page) {
  131. self.prepareBodyForCreate(body, page);
  132. });
  133. debug('addPages(): Sending Request to ES', body);
  134. return this.client.bulk({
  135. body: body,
  136. });
  137. };
  138. SearchClient.prototype.updatePages = function(pages)
  139. {
  140. var self = this;
  141. var body = [];
  142. pages.map(function(page) {
  143. self.prepareBodyForUpdate(body, page);
  144. });
  145. debug('updatePages(): Sending Request to ES', body);
  146. return this.client.bulk({
  147. body: body,
  148. });
  149. };
  150. SearchClient.prototype.deletePages = function(pages)
  151. {
  152. var self = this;
  153. var body = [];
  154. pages.map(function(page) {
  155. self.prepareBodyForDelete(body, page);
  156. });
  157. debug('deletePages(): Sending Request to ES', body);
  158. return this.client.bulk({
  159. body: body,
  160. });
  161. };
  162. SearchClient.prototype.addAllPages = function()
  163. {
  164. var self = this;
  165. var offset = 0;
  166. var Page = this.crowi.model('Page');
  167. var stream = Page.getStreamOfFindAll();
  168. var body = [];
  169. return new Promise(function(resolve, reject) {
  170. stream.on('data', function (doc) {
  171. if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
  172. debug('Skipped', doc.path);
  173. return ;
  174. }
  175. self.prepareBodyForCreate(body, doc);
  176. }).on('error', function (err) {
  177. // TODO: handle err
  178. debug('Error stream:', err);
  179. }).on('close', function () {
  180. // all done
  181. // 最後に送信
  182. self.client.bulk({ body: body, })
  183. .then(function(res) {
  184. debug('Reponse from es:', res);
  185. return resolve(res);
  186. }).catch(function(err) {
  187. debug('Err from es:', err);
  188. return reject(err);
  189. });
  190. });
  191. });
  192. };
  193. /**
  194. * search returning type:
  195. * {
  196. * meta: { total: Integer, results: Integer},
  197. * data: [ pages ...],
  198. * }
  199. */
  200. SearchClient.prototype.search = function(query)
  201. {
  202. var self = this;
  203. return new Promise(function(resolve, reject) {
  204. self.client.search(query)
  205. .then(function(data) {
  206. var result = {
  207. meta: {
  208. took: data.took,
  209. total: data.hits.total,
  210. results: data.hits.hits.length,
  211. },
  212. data: data.hits.hits.map(function(elm) {
  213. return {_id: elm._id, _score: elm._score};
  214. })
  215. };
  216. resolve(result);
  217. }).catch(function(err) {
  218. reject(err);
  219. });
  220. });
  221. };
  222. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
  223. {
  224. // getting path by default is almost for debug
  225. var fields = ['path', '_id'];
  226. if (option) {
  227. fields = option.fields || fields;
  228. }
  229. // default is only id field, sorted by updated_at
  230. var query = {
  231. index: this.index_name,
  232. type: 'pages',
  233. body: {
  234. fields: fields,
  235. sort: [{ updated_at: { order: 'desc'}}],
  236. query: {}, // query
  237. }
  238. };
  239. this.appendResultSize(query);
  240. return query;
  241. };
  242. SearchClient.prototype.createSearchQuerySortedByScore = function(option)
  243. {
  244. var fields = ['path', '_id'];
  245. if (option) {
  246. fields = option.fields || fields;
  247. }
  248. // sort by score
  249. var query = {
  250. index: this.index_name,
  251. type: 'pages',
  252. body: {
  253. fields: fields,
  254. sort: [ {_score: { order: 'desc'} }],
  255. query: {}, // query
  256. }
  257. };
  258. this.appendResultSize(query);
  259. return query;
  260. };
  261. SearchClient.prototype.appendResultSize = function(query, from, size)
  262. {
  263. query.from = from || this.DEFAULT_OFFSET;
  264. query.size = size || this.DEFAULT_LIMIT;
  265. };
  266. SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
  267. {
  268. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  269. if (!query.body.query.bool) {
  270. query.body.query.bool = {};
  271. }
  272. if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
  273. query.body.query.bool.must = [];
  274. }
  275. query.body.query.bool.must.push({
  276. multi_match: {
  277. query: keyword,
  278. fields: [
  279. "path.ja^2", // ためしに。
  280. "body.ja"
  281. ],
  282. operator: "and"
  283. }
  284. });
  285. };
  286. SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
  287. {
  288. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  289. if (!query.body.query.bool) {
  290. query.body.query.bool = {};
  291. }
  292. if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
  293. query.body.query.bool.filter = [];
  294. }
  295. if (path.match(/\/$/)) {
  296. path = path.substr(0, path.length - 1);
  297. }
  298. query.body.query.bool.filter.push({
  299. wildcard: {
  300. "path.raw": path + "/*"
  301. }
  302. });
  303. };
  304. SearchClient.prototype.searchKeyword = function(keyword, option)
  305. {
  306. var from = option.offset || null;
  307. var query = this.createSearchQuerySortedByScore();
  308. this.appendCriteriaForKeywordContains(query, keyword);
  309. return this.search(query);
  310. };
  311. SearchClient.prototype.searchByPath = function(keyword, prefix)
  312. {
  313. // TODO path 名だけから検索
  314. };
  315. SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
  316. {
  317. var from = option.offset || null;
  318. var query = this.createSearchQuerySortedByScore();
  319. this.appendCriteriaForKeywordContains(query, keyword);
  320. this.appendCriteriaForPathFilter(query, path);
  321. if (from) {
  322. this.appendResultSize(query, from);
  323. }
  324. return this.search(query);
  325. };
  326. SearchClient.prototype.syncPageCreated = function(page, user)
  327. {
  328. debug('SearchClient.syncPageCreated', page);
  329. if (!this.shouldIndexed(page)) {
  330. return ;
  331. }
  332. this.addPages([page])
  333. .then(function(res) {
  334. debug('ES Response', res);
  335. })
  336. .catch(function(err){
  337. debug('ES Error', err);
  338. });
  339. };
  340. SearchClient.prototype.syncPageUpdated = function(page, user)
  341. {
  342. debug('SearchClient.syncPageUpdated', page);
  343. // TODO delete
  344. if (!this.shouldIndexed(page)) {
  345. this.deletePages([page])
  346. .then(function(res) {
  347. debug('deletePages: ES Response', res);
  348. })
  349. .catch(function(err){
  350. debug('deletePages:ES Error', err);
  351. });
  352. return ;
  353. }
  354. this.updatePages([page])
  355. .then(function(res) {
  356. debug('ES Response', res);
  357. })
  358. .catch(function(err){
  359. debug('ES Error', err);
  360. });
  361. };
  362. module.exports = SearchClient;