search.js 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. /**
  2. * Search
  3. */
  4. var elasticsearch = require('elasticsearch'),
  5. debug = require('debug')('crowi:lib:search');
  6. function SearchClient(crowi, esUri) {
  7. this.DEFAULT_OFFSET = 0;
  8. this.DEFAULT_LIMIT = 50;
  9. this.esUri = esUri;
  10. this.crowi = crowi;
  11. var uri = this.parseUri(this.esUri);
  12. this.host = uri.host;
  13. this.index_name = uri.index_name;
  14. this.client = new elasticsearch.Client({
  15. host: this.host,
  16. requestTimeout: 5000,
  17. });
  18. this.registerUpdateEvent();
  19. this.mappingFile = crowi.resourceDir + 'search/mappings.json';
  20. }
  21. SearchClient.prototype.registerUpdateEvent = function() {
  22. var pageEvent = this.crowi.event('page');
  23. pageEvent.on('create', this.syncPageCreated.bind(this))
  24. pageEvent.on('update', this.syncPageUpdated.bind(this))
  25. };
  26. SearchClient.prototype.parseUri = function(uri) {
  27. if (!(m = uri.match(/^elasticsearch:\/\/([^:]+):([^\/]+)\/(.+)$/))) {
  28. throw new Error('Invalid ELASTICSEARCH_URI format. Should be elasticsearch://host:port/index_name');
  29. }
  30. return {
  31. host: m[1] + ':' + m[2],
  32. index_name: m[3],
  33. };
  34. };
  35. SearchClient.prototype.buildIndex = function(uri) {
  36. return this.client.indices.create({
  37. index: this.index_name,
  38. body: require(this.mappingFile)
  39. });
  40. };
  41. SearchClient.prototype.deleteIndex = function(uri) {
  42. return this.client.indices.delete({
  43. index: this.index_name,
  44. });
  45. };
  46. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  47. if (!Array.isArray(body)) {
  48. throw new Error('Body must be an array.');
  49. }
  50. var command = {
  51. update: {
  52. _index: this.index_name,
  53. _type: 'pages',
  54. _id: page._id.toString(),
  55. }
  56. };
  57. var document = {
  58. doc: {
  59. path: page.path,
  60. body: page.revision.body,
  61. comment_count: page.commentCount,
  62. bookmark_count: 0, // todo
  63. like_count: page.liker.length || 0,
  64. updated_at: page.updatedAt,
  65. }
  66. };
  67. body.push(command);
  68. body.push(document);
  69. };
  70. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  71. if (!Array.isArray(body)) {
  72. throw new Error('Body must be an array.');
  73. }
  74. var command = {
  75. index: {
  76. _index: this.index_name,
  77. _type: 'pages',
  78. _id: page._id.toString(),
  79. }
  80. };
  81. var document = {
  82. path: page.path,
  83. body: page.revision.body,
  84. username: page.creator.username,
  85. comment_count: page.commentCount,
  86. bookmark_count: 0, // todo
  87. like_count: page.liker.length || 0,
  88. created_at: page.createdAt,
  89. updated_at: page.updatedAt,
  90. };
  91. body.push(command);
  92. body.push(document);
  93. };
  94. SearchClient.prototype.addPages = function(pages)
  95. {
  96. var self = this;
  97. var body = [];
  98. pages.map(function(page) {
  99. self.prepareBodyForCreate(body, page);
  100. });
  101. debug('addPages(): Sending Request to ES', body);
  102. return this.client.bulk({
  103. body: body,
  104. });
  105. };
  106. SearchClient.prototype.updatePages = function(pages)
  107. {
  108. var self = this;
  109. var body = [];
  110. pages.map(function(page) {
  111. self.prepareBodyForUpdate(body, page);
  112. });
  113. debug('updatePages(): Sending Request to ES', body);
  114. return this.client.bulk({
  115. body: body,
  116. });
  117. };
  118. SearchClient.prototype.addAllPages = function()
  119. {
  120. var self = this;
  121. var offset = 0;
  122. var Page = this.crowi.model('Page');
  123. var stream = Page.getStreamOfFindAll();
  124. var body = [];
  125. return new Promise(function(resolve, reject) {
  126. stream.on('data', function (doc) {
  127. if (!doc.creator || !doc.revision) {
  128. debug('Skipped', doc.path);
  129. return ;
  130. }
  131. self.prepareBodyForCreate(body, doc);
  132. }).on('error', function (err) {
  133. // TODO: handle err
  134. debug('Error stream:', err);
  135. }).on('close', function () {
  136. // all done
  137. // 最後に送信
  138. self.client.bulk({ body: body, })
  139. .then(function(res) {
  140. debug('Reponse from es:', res);
  141. return resolve(res);
  142. }).catch(function(err) {
  143. debug('Err from es:', err);
  144. return reject(err);
  145. });
  146. });
  147. });
  148. };
  149. /**
  150. * search returning type:
  151. * {
  152. * meta: { total: Integer, results: Integer},
  153. * data: [ pages ...],
  154. * }
  155. */
  156. SearchClient.prototype.search = function(query)
  157. {
  158. var self = this;
  159. return new Promise(function(resolve, reject) {
  160. self.client.search(query)
  161. .then(function(data) {
  162. var result = {
  163. meta: {
  164. took: data.took,
  165. total: data.hits.total,
  166. results: data.hits.hits.length,
  167. },
  168. data: data.hits.hits.map(function(elm) {
  169. return {_id: elm._id, _score: elm._score};
  170. })
  171. };
  172. resolve(result);
  173. }).catch(function(err) {
  174. reject(err);
  175. });
  176. });
  177. };
  178. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
  179. {
  180. // getting path by default is almost for debug
  181. var fields = ['path', '_id'];
  182. if (option) {
  183. fields = option.fields || fields;
  184. }
  185. // default is only id field, sorted by updated_at
  186. var query = {
  187. index: this.index_name,
  188. type: 'pages',
  189. body: {
  190. fields: fields,
  191. sort: [{ updated_at: { order: 'desc'}}],
  192. query: {}, // query
  193. }
  194. };
  195. this.appendResultSize(query);
  196. return query;
  197. };
  198. SearchClient.prototype.createSearchQuerySortedByScore = function(option)
  199. {
  200. var fields = ['path', '_id'];
  201. if (option) {
  202. fields = option.fields || fields;
  203. }
  204. // sort by score
  205. var query = {
  206. index: this.index_name,
  207. type: 'pages',
  208. body: {
  209. fields: fields,
  210. sort: [ {_score: { order: 'desc'} }],
  211. query: {}, // query
  212. }
  213. };
  214. this.appendResultSize(query);
  215. return query;
  216. };
  217. SearchClient.prototype.appendResultSize = function(query, from, size)
  218. {
  219. query.from = from || this.DEFAULT_OFFSET;
  220. query.size = size || this.DEFAULT_LIMIT;
  221. };
  222. SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
  223. {
  224. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  225. if (!query.body.query.bool) {
  226. query.body.query.bool = {};
  227. }
  228. if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
  229. query.body.query.bool.must = [];
  230. }
  231. query.body.query.bool.must.push({
  232. multi_match: {
  233. query: keyword,
  234. fields: [
  235. "path.ja^2", // ためしに。
  236. "body.ja"
  237. ],
  238. operator: "and"
  239. }
  240. });
  241. };
  242. SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
  243. {
  244. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  245. if (!query.body.query.bool) {
  246. query.body.query.bool = {};
  247. }
  248. if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
  249. query.body.query.bool.filter = [];
  250. }
  251. if (path.match(/\/$/)) {
  252. path = path.substr(0, path.length - 1);
  253. }
  254. query.body.query.bool.filter.push({
  255. wildcard: {
  256. "path.raw": path + "/*"
  257. }
  258. });
  259. };
  260. SearchClient.prototype.searchKeyword = function(keyword, option)
  261. {
  262. var from = option.offset || null;
  263. var query = this.createSearchQuerySortedByScore();
  264. this.appendCriteriaForKeywordContains(query, keyword);
  265. return this.search(query);
  266. };
  267. SearchClient.prototype.searchByPath = function(keyword, prefix)
  268. {
  269. // TODO path 名だけから検索
  270. };
  271. SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
  272. {
  273. var from = option.offset || null;
  274. var query = this.createSearchQuerySortedByScore();
  275. this.appendCriteriaForKeywordContains(query, keyword);
  276. this.appendCriteriaForPathFilter(query, path);
  277. if (from) {
  278. this.appendResultSize(query, from);
  279. }
  280. return this.search(query);
  281. };
  282. SearchClient.prototype.syncPageCreated = function(page, user)
  283. {
  284. this.addPages([page])
  285. .then(function(res) {
  286. debug('ES Response', res);
  287. })
  288. .catch(function(err){
  289. debug('ES Error', err);
  290. });
  291. };
  292. SearchClient.prototype.syncPageUpdated = function(page, user)
  293. {
  294. this.updatePages([page])
  295. .then(function(res) {
  296. debug('ES Response', res);
  297. })
  298. .catch(function(err){
  299. debug('ES Error', err);
  300. });
  301. };
  302. module.exports = SearchClient;