search.js 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. /**
  2. * Search
  3. */
  4. var elasticsearch = require('elasticsearch'),
  5. debug = require('debug')('crowi:lib:search');
  6. function SearchClient(crowi, esUri) {
  7. this.DEFAULT_OFFSET = 0;
  8. this.DEFAULT_LIMIT = 50;
  9. this.esUri = esUri;
  10. this.crowi = crowi;
  11. var uri = this.parseUri(this.esUri);
  12. this.host = uri.host;
  13. this.index_name = uri.index_name;
  14. this.client = new elasticsearch.Client({
  15. host: this.host,
  16. requestTimeout: 5000,
  17. });
  18. this.registerUpdateEvent();
  19. this.mappingFile = crowi.resourceDir + 'search/mappings.json';
  20. }
  21. SearchClient.prototype.registerUpdateEvent = function() {
  22. var pageEvent = this.crowi.event('page');
  23. pageEvent.on('create', this.syncPageCreated.bind(this))
  24. pageEvent.on('update', this.syncPageUpdated.bind(this))
  25. };
  26. SearchClient.prototype.shouldIndexed = function(page) {
  27. // FIXME: Magic Number
  28. if (page.grant !== 1) {
  29. return false;
  30. }
  31. if (page.redirectTo !== null) {
  32. return false;
  33. }
  34. return true;
  35. };
  36. SearchClient.prototype.parseUri = function(uri) {
  37. if (!(m = uri.match(/^elasticsearch:\/\/([^:]+):([^\/]+)\/(.+)$/))) {
  38. throw new Error('Invalid ELASTICSEARCH_URI format. Should be elasticsearch://host:port/index_name');
  39. }
  40. return {
  41. host: m[1] + ':' + m[2],
  42. index_name: m[3],
  43. };
  44. };
  45. SearchClient.prototype.buildIndex = function(uri) {
  46. return this.client.indices.create({
  47. index: this.index_name,
  48. body: require(this.mappingFile)
  49. });
  50. };
  51. SearchClient.prototype.deleteIndex = function(uri) {
  52. return this.client.indices.delete({
  53. index: this.index_name,
  54. });
  55. };
  56. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  57. if (!Array.isArray(body)) {
  58. throw new Error('Body must be an array.');
  59. }
  60. var command = {
  61. update: {
  62. _index: this.index_name,
  63. _type: 'pages',
  64. _id: page._id.toString(),
  65. }
  66. };
  67. var document = {
  68. doc: {
  69. path: page.path,
  70. body: page.revision.body,
  71. comment_count: page.commentCount,
  72. bookmark_count: 0, // todo
  73. like_count: page.liker.length || 0,
  74. updated_at: page.updatedAt,
  75. }
  76. };
  77. body.push(command);
  78. body.push(document);
  79. };
  80. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  81. if (!Array.isArray(body)) {
  82. throw new Error('Body must be an array.');
  83. }
  84. var command = {
  85. index: {
  86. _index: this.index_name,
  87. _type: 'pages',
  88. _id: page._id.toString(),
  89. }
  90. };
  91. var document = {
  92. path: page.path,
  93. body: page.revision.body,
  94. username: page.creator.username,
  95. comment_count: page.commentCount,
  96. bookmark_count: 0, // todo
  97. like_count: page.liker.length || 0,
  98. created_at: page.createdAt,
  99. updated_at: page.updatedAt,
  100. };
  101. body.push(command);
  102. body.push(document);
  103. };
  104. SearchClient.prototype.addPages = function(pages)
  105. {
  106. var self = this;
  107. var body = [];
  108. pages.map(function(page) {
  109. self.prepareBodyForCreate(body, page);
  110. });
  111. debug('addPages(): Sending Request to ES', body);
  112. return this.client.bulk({
  113. body: body,
  114. });
  115. };
  116. SearchClient.prototype.updatePages = function(pages)
  117. {
  118. var self = this;
  119. var body = [];
  120. pages.map(function(page) {
  121. self.prepareBodyForUpdate(body, page);
  122. });
  123. debug('updatePages(): Sending Request to ES', body);
  124. return this.client.bulk({
  125. body: body,
  126. });
  127. };
  128. SearchClient.prototype.addAllPages = function()
  129. {
  130. var self = this;
  131. var offset = 0;
  132. var Page = this.crowi.model('Page');
  133. var stream = Page.getStreamOfFindAll();
  134. var body = [];
  135. return new Promise(function(resolve, reject) {
  136. stream.on('data', function (doc) {
  137. if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
  138. debug('Skipped', doc.path);
  139. return ;
  140. }
  141. self.prepareBodyForCreate(body, doc);
  142. }).on('error', function (err) {
  143. // TODO: handle err
  144. debug('Error stream:', err);
  145. }).on('close', function () {
  146. // all done
  147. // 最後に送信
  148. self.client.bulk({ body: body, })
  149. .then(function(res) {
  150. debug('Reponse from es:', res);
  151. return resolve(res);
  152. }).catch(function(err) {
  153. debug('Err from es:', err);
  154. return reject(err);
  155. });
  156. });
  157. });
  158. };
  159. /**
  160. * search returning type:
  161. * {
  162. * meta: { total: Integer, results: Integer},
  163. * data: [ pages ...],
  164. * }
  165. */
  166. SearchClient.prototype.search = function(query)
  167. {
  168. var self = this;
  169. return new Promise(function(resolve, reject) {
  170. self.client.search(query)
  171. .then(function(data) {
  172. var result = {
  173. meta: {
  174. took: data.took,
  175. total: data.hits.total,
  176. results: data.hits.hits.length,
  177. },
  178. data: data.hits.hits.map(function(elm) {
  179. return {_id: elm._id, _score: elm._score};
  180. })
  181. };
  182. resolve(result);
  183. }).catch(function(err) {
  184. reject(err);
  185. });
  186. });
  187. };
  188. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
  189. {
  190. // getting path by default is almost for debug
  191. var fields = ['path', '_id'];
  192. if (option) {
  193. fields = option.fields || fields;
  194. }
  195. // default is only id field, sorted by updated_at
  196. var query = {
  197. index: this.index_name,
  198. type: 'pages',
  199. body: {
  200. fields: fields,
  201. sort: [{ updated_at: { order: 'desc'}}],
  202. query: {}, // query
  203. }
  204. };
  205. this.appendResultSize(query);
  206. return query;
  207. };
  208. SearchClient.prototype.createSearchQuerySortedByScore = function(option)
  209. {
  210. var fields = ['path', '_id'];
  211. if (option) {
  212. fields = option.fields || fields;
  213. }
  214. // sort by score
  215. var query = {
  216. index: this.index_name,
  217. type: 'pages',
  218. body: {
  219. fields: fields,
  220. sort: [ {_score: { order: 'desc'} }],
  221. query: {}, // query
  222. }
  223. };
  224. this.appendResultSize(query);
  225. return query;
  226. };
  227. SearchClient.prototype.appendResultSize = function(query, from, size)
  228. {
  229. query.from = from || this.DEFAULT_OFFSET;
  230. query.size = size || this.DEFAULT_LIMIT;
  231. };
  232. SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
  233. {
  234. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  235. if (!query.body.query.bool) {
  236. query.body.query.bool = {};
  237. }
  238. if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
  239. query.body.query.bool.must = [];
  240. }
  241. query.body.query.bool.must.push({
  242. multi_match: {
  243. query: keyword,
  244. fields: [
  245. "path.ja^2", // ためしに。
  246. "body.ja"
  247. ],
  248. operator: "and"
  249. }
  250. });
  251. };
  252. SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
  253. {
  254. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  255. if (!query.body.query.bool) {
  256. query.body.query.bool = {};
  257. }
  258. if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
  259. query.body.query.bool.filter = [];
  260. }
  261. if (path.match(/\/$/)) {
  262. path = path.substr(0, path.length - 1);
  263. }
  264. query.body.query.bool.filter.push({
  265. wildcard: {
  266. "path.raw": path + "/*"
  267. }
  268. });
  269. };
  270. SearchClient.prototype.searchKeyword = function(keyword, option)
  271. {
  272. var from = option.offset || null;
  273. var query = this.createSearchQuerySortedByScore();
  274. this.appendCriteriaForKeywordContains(query, keyword);
  275. return this.search(query);
  276. };
  277. SearchClient.prototype.searchByPath = function(keyword, prefix)
  278. {
  279. // TODO path 名だけから検索
  280. };
  281. SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
  282. {
  283. var from = option.offset || null;
  284. var query = this.createSearchQuerySortedByScore();
  285. this.appendCriteriaForKeywordContains(query, keyword);
  286. this.appendCriteriaForPathFilter(query, path);
  287. if (from) {
  288. this.appendResultSize(query, from);
  289. }
  290. return this.search(query);
  291. };
  292. SearchClient.prototype.syncPageCreated = function(page, user)
  293. {
  294. if (!this.shouldIndexed(page)) {
  295. return ;
  296. }
  297. this.addPages([page])
  298. .then(function(res) {
  299. debug('ES Response', res);
  300. })
  301. .catch(function(err){
  302. debug('ES Error', err);
  303. });
  304. };
  305. SearchClient.prototype.syncPageUpdated = function(page, user)
  306. {
  307. // TODO delete
  308. if (!this.shouldIndexed(page)) {
  309. return ;
  310. }
  311. this.updatePages([page])
  312. .then(function(res) {
  313. debug('ES Response', res);
  314. })
  315. .catch(function(err){
  316. debug('ES Error', err);
  317. });
  318. };
  319. module.exports = SearchClient;