search.js 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. /**
  2. * Search
  3. */
  4. var elasticsearch = require('elasticsearch'),
  5. debug = require('debug')('crowi:lib:search');
  6. function SearchClient(crowi, esUri) {
  7. this.DEFAULT_OFFSET = 0;
  8. this.DEFAULT_LIMIT = 50;
  9. this.esUri = esUri;
  10. this.crowi = crowi;
  11. var uri = this.parseUri(this.esUri);
  12. this.host = uri.host;
  13. this.index_name = uri.index_name;
  14. this.client = new elasticsearch.Client({
  15. host: this.host,
  16. requestTimeout: 5000,
  17. });
  18. this.mappingFile = crowi.resourceDir + 'search/mappings.json';
  19. //this.Page = crowi.model('Page');
  20. //this.Config = crowi.model('Config');
  21. //this.config = crowi.getConfig();
  22. }
  23. SearchClient.prototype.parseUri = function(uri) {
  24. if (!(m = uri.match(/^elasticsearch:\/\/([^:]+):([^\/]+)\/(.+)$/))) {
  25. throw new Error('Invalid ELASTICSEARCH_URI format. Should be elasticsearch://host:port/index_name');
  26. }
  27. return {
  28. host: m[1] + ':' + m[2],
  29. index_name: m[3],
  30. };
  31. };
  32. SearchClient.prototype.buildIndex = function(uri) {
  33. return this.client.indices.create({
  34. index: this.index_name,
  35. body: require(this.mappingFile)
  36. });
  37. };
  38. SearchClient.prototype.deleteIndex = function(uri) {
  39. return this.client.indices.delete({
  40. index: this.index_name,
  41. });
  42. };
  43. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  44. if (!Array.isArray(body)) {
  45. throw new Error('Body must be an array.');
  46. }
  47. var command = {
  48. update: {
  49. _index: this.index_name,
  50. _type: 'pages',
  51. _id: page._id.toString(),
  52. }
  53. };
  54. var document = {
  55. path: page.path,
  56. body: page.revision.body,
  57. username: page.creator.username,
  58. comment_count: page.commentCount,
  59. like_count: page.liker.length || 0,
  60. updated_at: page.updatedAt,
  61. };
  62. body.push(command);
  63. body.push(document);
  64. };
  65. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  66. if (!Array.isArray(body)) {
  67. throw new Error('Body must be an array.');
  68. }
  69. var command = {
  70. index: {
  71. _index: this.index_name,
  72. _type: 'pages',
  73. _id: page._id.toString(),
  74. }
  75. };
  76. var document = {
  77. path: page.path,
  78. body: page.revision.body,
  79. username: page.creator.username,
  80. comment_count: page.commentCount,
  81. bookmark_count: 0, // todo
  82. like_count: page.liker.length || 0,
  83. created_at: page.createdAt,
  84. updated_at: page.updatedAt,
  85. };
  86. body.push(command);
  87. body.push(document);
  88. };
  89. SearchClient.prototype.addPages = function(pages)
  90. {
  91. var self = this;
  92. var body = [];
  93. pages.map(function(page) {
  94. self.prepareBodyForCreate(body, page);
  95. });
  96. return this.client.bulk({
  97. body: body,
  98. });
  99. };
  100. SearchClient.prototype.updatePages = function(pages)
  101. {
  102. var self = this;
  103. var body = [];
  104. pages.map(function(page) {
  105. self.prepareBodyForUpdate(body, page);
  106. });
  107. return this.client.bulk({
  108. body: body,
  109. });
  110. };
  111. SearchClient.prototype.addAllPages = function()
  112. {
  113. var self = this;
  114. var offset = 0;
  115. var Page = this.crowi.model('Page');
  116. var stream = Page.getStreamOfFindAll();
  117. var body = [];
  118. return new Promise(function(resolve, reject) {
  119. stream.on('data', function (doc) {
  120. if (!doc.creator || !doc.revision) {
  121. debug('Skipped', doc.path);
  122. return ;
  123. }
  124. self.prepareBodyForCreate(body, doc);
  125. }).on('error', function (err) {
  126. // TODO: handle err
  127. debug('Error stream:', err);
  128. }).on('close', function () {
  129. // all done
  130. // 最後に送信
  131. self.client.bulk({ body: body, })
  132. .then(function(res) {
  133. debug('Reponse from es:', res);
  134. return resolve(res);
  135. }).catch(function(err) {
  136. debug('Err from es:', err);
  137. return reject(err);
  138. });
  139. });
  140. });
  141. };
  142. /**
  143. * search returning type:
  144. * {
  145. * meta: { total: Integer, results: Integer},
  146. * data: [ pages ...],
  147. * }
  148. */
  149. SearchClient.prototype.search = function(query)
  150. {
  151. var self = this;
  152. return new Promise(function(resolve, reject) {
  153. self.client.search(query)
  154. .then(function(data) {
  155. var result = {
  156. meta: {
  157. took: data.took,
  158. total: data.hits.total,
  159. results: data.hits.hits.length,
  160. },
  161. data: data.hits.hits.map(function(elm) {
  162. return {_id: elm._id, _score: elm._score};
  163. })
  164. };
  165. resolve(result);
  166. }).catch(function(err) {
  167. reject(err);
  168. });
  169. });
  170. };
  171. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
  172. {
  173. // getting path by default is almost for debug
  174. var fields = ['path', '_id'];
  175. if (option) {
  176. fields = option.fields || fields;
  177. }
  178. // default is only id field, sorted by updated_at
  179. var query = {
  180. index: this.index_name,
  181. type: 'pages',
  182. body: {
  183. fields: fields,
  184. sort: [{ updated_at: { order: 'desc'}}],
  185. query: {}, // query
  186. }
  187. };
  188. this.appendResultSize(query);
  189. return query;
  190. };
  191. SearchClient.prototype.createSearchQuerySortedByScore = function(option)
  192. {
  193. var fields = ['path', '_id'];
  194. if (option) {
  195. fields = option.fields || fields;
  196. }
  197. // sort by score
  198. var query = {
  199. index: this.index_name,
  200. type: 'pages',
  201. body: {
  202. fields: fields,
  203. sort: [ {_score: { order: 'desc'} }],
  204. query: {}, // query
  205. }
  206. };
  207. this.appendResultSize(query);
  208. return query;
  209. };
  210. SearchClient.prototype.appendResultSize = function(query, from, size)
  211. {
  212. query.from = from || this.DEFAULT_OFFSET;
  213. query.size = size || this.DEFAULT_LIMIT;
  214. };
  215. SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
  216. {
  217. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  218. if (!query.body.query.bool) {
  219. query.body.query.bool = {};
  220. }
  221. if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
  222. query.body.query.bool.must = [];
  223. }
  224. query.body.query.bool.must.push({
  225. multi_match: {
  226. query: keyword,
  227. fields: [
  228. "path.ja^2", // ためしに。
  229. "body.ja"
  230. ],
  231. operator: "and"
  232. }
  233. });
  234. };
  235. SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
  236. {
  237. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  238. if (!query.body.query.bool) {
  239. query.body.query.bool = {};
  240. }
  241. if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
  242. query.body.query.bool.filter = [];
  243. }
  244. if (path.match(/\/$/)) {
  245. path = path.substr(0, path.length - 1);
  246. }
  247. query.body.query.bool.filter.push({
  248. wildcard: {
  249. "path.raw": path + "/*"
  250. }
  251. });
  252. };
  253. SearchClient.prototype.searchKeyword = function(keyword, option)
  254. {
  255. var from = option.offset || null;
  256. var query = this.createSearchQuerySortedByScore();
  257. this.appendCriteriaForKeywordContains(query, keyword);
  258. return this.search(query);
  259. };
  260. SearchClient.prototype.searchByPath = function(keyword, prefix)
  261. {
  262. // TODO path 名だけから検索
  263. };
  264. SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
  265. {
  266. var from = option.offset || null;
  267. var query = this.createSearchQuerySortedByScore();
  268. this.appendCriteriaForKeywordContains(query, keyword);
  269. this.appendCriteriaForPathFilter(query, path);
  270. if (from) {
  271. this.appendResultSize(query, from);
  272. }
  273. return this.search(query);
  274. };
  275. module.exports = SearchClient;
  276. /*
  277. lib.searchPageByKeyword = function(keyword) {
  278. var queryBody = {
  279. query: {
  280. bool: {
  281. should: [
  282. {term: { path: { term: keyword, boost: 2.0 } }},
  283. {term: { body: { term: keyword } }}
  284. ]
  285. }
  286. },
  287. highlight : { fields : { body : {} } },
  288. //sort: [{ updated: { order: "desc" } } ]
  289. };
  290. return client.search({
  291. index: index_name,
  292. body: queryBody
  293. });
  294. };
  295. */