search.js 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /**
  2. * Search
  3. */
  4. var elasticsearch = require('elasticsearch'),
  5. debug = require('debug')('crowi:lib:search');
  6. function SearchClient(crowi, esUri) {
  7. this.DEFAULT_OFFSET = 0;
  8. this.DEFAULT_LIMIT = 50;
  9. this.esUri = esUri;
  10. this.crowi = crowi;
  11. var uri = this.parseUri(this.esUri);
  12. this.host = uri.host;
  13. this.index_name = uri.index_name;
  14. this.client = new elasticsearch.Client({
  15. host: this.host,
  16. requestTimeout: 5000,
  17. });
  18. this.registerUpdateEvent();
  19. this.mappingFile = crowi.resourceDir + 'search/mappings.json';
  20. }
  21. SearchClient.prototype.registerUpdateEvent = function() {
  22. var pageEvent = this.crowi.event('page');
  23. pageEvent.on('create', this.syncPageCreated.bind(this))
  24. pageEvent.on('update', this.syncPageUpdated.bind(this))
  25. };
  26. SearchClient.prototype.shouldIndexed = function(page) {
  27. // FIXME: Magic Number
  28. if (page.grant !== 1) {
  29. return false;
  30. }
  31. if (page.redirectTo !== null) {
  32. return false;
  33. }
  34. return true;
  35. };
  36. // BONSAI_URL is following format:
  37. // => https://{ID}:{PASSWORD}@{HOST}
  38. SearchClient.prototype.parseUri = function(uri) {
  39. var index_name = 'crowi';
  40. var host = uri;
  41. if (m = uri.match(/^(https?:\/\/[^\/]+)\/(.+)$/)) {
  42. host = m[1];
  43. index_name = m[2];
  44. }
  45. return {
  46. host,
  47. index_name,
  48. };
  49. };
  50. SearchClient.prototype.buildIndex = function(uri) {
  51. return this.client.indices.create({
  52. index: this.index_name,
  53. body: require(this.mappingFile)
  54. });
  55. };
  56. SearchClient.prototype.deleteIndex = function(uri) {
  57. return this.client.indices.delete({
  58. index: this.index_name,
  59. });
  60. };
  61. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  62. if (!Array.isArray(body)) {
  63. throw new Error('Body must be an array.');
  64. }
  65. var command = {
  66. update: {
  67. _index: this.index_name,
  68. _type: 'pages',
  69. _id: page._id.toString(),
  70. }
  71. };
  72. var document = {
  73. doc: {
  74. path: page.path,
  75. body: page.revision.body,
  76. comment_count: page.commentCount,
  77. bookmark_count: 0, // todo
  78. like_count: page.liker.length || 0,
  79. updated_at: page.updatedAt,
  80. }
  81. };
  82. body.push(command);
  83. body.push(document);
  84. };
  85. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  86. if (!Array.isArray(body)) {
  87. throw new Error('Body must be an array.');
  88. }
  89. var command = {
  90. index: {
  91. _index: this.index_name,
  92. _type: 'pages',
  93. _id: page._id.toString(),
  94. }
  95. };
  96. var document = {
  97. path: page.path,
  98. body: page.revision.body,
  99. username: page.creator.username,
  100. comment_count: page.commentCount,
  101. bookmark_count: 0, // todo
  102. like_count: page.liker.length || 0,
  103. created_at: page.createdAt,
  104. updated_at: page.updatedAt,
  105. };
  106. body.push(command);
  107. body.push(document);
  108. };
  109. SearchClient.prototype.addPages = function(pages)
  110. {
  111. var self = this;
  112. var body = [];
  113. pages.map(function(page) {
  114. self.prepareBodyForCreate(body, page);
  115. });
  116. debug('addPages(): Sending Request to ES', body);
  117. return this.client.bulk({
  118. body: body,
  119. });
  120. };
  121. SearchClient.prototype.updatePages = function(pages)
  122. {
  123. var self = this;
  124. var body = [];
  125. pages.map(function(page) {
  126. self.prepareBodyForUpdate(body, page);
  127. });
  128. debug('updatePages(): Sending Request to ES', body);
  129. return this.client.bulk({
  130. body: body,
  131. });
  132. };
  133. SearchClient.prototype.addAllPages = function()
  134. {
  135. var self = this;
  136. var offset = 0;
  137. var Page = this.crowi.model('Page');
  138. var stream = Page.getStreamOfFindAll();
  139. var body = [];
  140. return new Promise(function(resolve, reject) {
  141. stream.on('data', function (doc) {
  142. if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
  143. debug('Skipped', doc.path);
  144. return ;
  145. }
  146. self.prepareBodyForCreate(body, doc);
  147. }).on('error', function (err) {
  148. // TODO: handle err
  149. debug('Error stream:', err);
  150. }).on('close', function () {
  151. // all done
  152. // 最後に送信
  153. self.client.bulk({ body: body, })
  154. .then(function(res) {
  155. debug('Reponse from es:', res);
  156. return resolve(res);
  157. }).catch(function(err) {
  158. debug('Err from es:', err);
  159. return reject(err);
  160. });
  161. });
  162. });
  163. };
  164. /**
  165. * search returning type:
  166. * {
  167. * meta: { total: Integer, results: Integer},
  168. * data: [ pages ...],
  169. * }
  170. */
  171. SearchClient.prototype.search = function(query)
  172. {
  173. var self = this;
  174. return new Promise(function(resolve, reject) {
  175. self.client.search(query)
  176. .then(function(data) {
  177. var result = {
  178. meta: {
  179. took: data.took,
  180. total: data.hits.total,
  181. results: data.hits.hits.length,
  182. },
  183. data: data.hits.hits.map(function(elm) {
  184. return {_id: elm._id, _score: elm._score};
  185. })
  186. };
  187. resolve(result);
  188. }).catch(function(err) {
  189. reject(err);
  190. });
  191. });
  192. };
  193. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
  194. {
  195. // getting path by default is almost for debug
  196. var fields = ['path', '_id'];
  197. if (option) {
  198. fields = option.fields || fields;
  199. }
  200. // default is only id field, sorted by updated_at
  201. var query = {
  202. index: this.index_name,
  203. type: 'pages',
  204. body: {
  205. fields: fields,
  206. sort: [{ updated_at: { order: 'desc'}}],
  207. query: {}, // query
  208. }
  209. };
  210. this.appendResultSize(query);
  211. return query;
  212. };
  213. SearchClient.prototype.createSearchQuerySortedByScore = function(option)
  214. {
  215. var fields = ['path', '_id'];
  216. if (option) {
  217. fields = option.fields || fields;
  218. }
  219. // sort by score
  220. var query = {
  221. index: this.index_name,
  222. type: 'pages',
  223. body: {
  224. fields: fields,
  225. sort: [ {_score: { order: 'desc'} }],
  226. query: {}, // query
  227. }
  228. };
  229. this.appendResultSize(query);
  230. return query;
  231. };
  232. SearchClient.prototype.appendResultSize = function(query, from, size)
  233. {
  234. query.from = from || this.DEFAULT_OFFSET;
  235. query.size = size || this.DEFAULT_LIMIT;
  236. };
  237. SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
  238. {
  239. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  240. if (!query.body.query.bool) {
  241. query.body.query.bool = {};
  242. }
  243. if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
  244. query.body.query.bool.must = [];
  245. }
  246. query.body.query.bool.must.push({
  247. multi_match: {
  248. query: keyword,
  249. fields: [
  250. "path.ja^2", // ためしに。
  251. "body.ja"
  252. ],
  253. operator: "and"
  254. }
  255. });
  256. };
  257. SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
  258. {
  259. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  260. if (!query.body.query.bool) {
  261. query.body.query.bool = {};
  262. }
  263. if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
  264. query.body.query.bool.filter = [];
  265. }
  266. if (path.match(/\/$/)) {
  267. path = path.substr(0, path.length - 1);
  268. }
  269. query.body.query.bool.filter.push({
  270. wildcard: {
  271. "path.raw": path + "/*"
  272. }
  273. });
  274. };
  275. SearchClient.prototype.searchKeyword = function(keyword, option)
  276. {
  277. var from = option.offset || null;
  278. var query = this.createSearchQuerySortedByScore();
  279. this.appendCriteriaForKeywordContains(query, keyword);
  280. return this.search(query);
  281. };
  282. SearchClient.prototype.searchByPath = function(keyword, prefix)
  283. {
  284. // TODO path 名だけから検索
  285. };
  286. SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
  287. {
  288. var from = option.offset || null;
  289. var query = this.createSearchQuerySortedByScore();
  290. this.appendCriteriaForKeywordContains(query, keyword);
  291. this.appendCriteriaForPathFilter(query, path);
  292. if (from) {
  293. this.appendResultSize(query, from);
  294. }
  295. return this.search(query);
  296. };
  297. SearchClient.prototype.syncPageCreated = function(page, user)
  298. {
  299. if (!this.shouldIndexed(page)) {
  300. return ;
  301. }
  302. this.addPages([page])
  303. .then(function(res) {
  304. debug('ES Response', res);
  305. })
  306. .catch(function(err){
  307. debug('ES Error', err);
  308. });
  309. };
  310. SearchClient.prototype.syncPageUpdated = function(page, user)
  311. {
  312. // TODO delete
  313. if (!this.shouldIndexed(page)) {
  314. return ;
  315. }
  316. this.updatePages([page])
  317. .then(function(res) {
  318. debug('ES Response', res);
  319. })
  320. .catch(function(err){
  321. debug('ES Error', err);
  322. });
  323. };
  324. module.exports = SearchClient;