search.js 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /**
  2. * Search
  3. */
  4. var elasticsearch = require('elasticsearch'),
  5. debug = require('debug')('crowi:lib:search');
  6. function SearchClient(crowi, esUri) {
  7. this.DEFAULT_OFFSET = 0;
  8. this.DEFAULT_LIMIT = 50;
  9. this.esUri = esUri;
  10. this.crowi = crowi;
  11. var uri = this.parseUri(this.esUri);
  12. this.host = uri.host;
  13. this.index_name = uri.index_name;
  14. this.client = new elasticsearch.Client({
  15. host: this.host,
  16. requestTimeout: 5000,
  17. });
  18. this.registerUpdateEvent();
  19. this.mappingFile = crowi.resourceDir + 'search/mappings.json';
  20. }
  21. SearchClient.prototype.checkESVersion = function() {
  22. // TODO
  23. };
  24. SearchClient.prototype.registerUpdateEvent = function() {
  25. var pageEvent = this.crowi.event('page');
  26. pageEvent.on('create', this.syncPageCreated.bind(this))
  27. pageEvent.on('update', this.syncPageUpdated.bind(this))
  28. };
  29. SearchClient.prototype.shouldIndexed = function(page) {
  30. // FIXME: Magic Number
  31. if (page.grant !== 1) {
  32. return false;
  33. }
  34. if (page.redirectTo !== null) {
  35. return false;
  36. }
  37. if (page.isDeleted()) {
  38. return false;
  39. }
  40. return true;
  41. };
  42. // BONSAI_URL is following format:
  43. // => https://{ID}:{PASSWORD}@{HOST}
  44. SearchClient.prototype.parseUri = function(uri) {
  45. var index_name = 'crowi';
  46. var host = uri;
  47. if (m = uri.match(/^(https?:\/\/[^\/]+)\/(.+)$/)) {
  48. host = m[1];
  49. index_name = m[2];
  50. }
  51. return {
  52. host,
  53. index_name,
  54. };
  55. };
  56. SearchClient.prototype.buildIndex = function(uri) {
  57. return this.client.indices.create({
  58. index: this.index_name,
  59. body: require(this.mappingFile)
  60. });
  61. };
  62. SearchClient.prototype.deleteIndex = function(uri) {
  63. return this.client.indices.delete({
  64. index: this.index_name,
  65. });
  66. };
  67. SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
  68. if (!Array.isArray(body)) {
  69. throw new Error('Body must be an array.');
  70. }
  71. var command = {
  72. update: {
  73. _index: this.index_name,
  74. _type: 'pages',
  75. _id: page._id.toString(),
  76. }
  77. };
  78. var document = {
  79. doc: {
  80. path: page.path,
  81. body: page.revision.body,
  82. comment_count: page.commentCount,
  83. bookmark_count: 0, // todo
  84. like_count: page.liker.length || 0,
  85. updated_at: page.updatedAt,
  86. },
  87. doc_as_upsert: true,
  88. };
  89. body.push(command);
  90. body.push(document);
  91. };
  92. SearchClient.prototype.prepareBodyForCreate = function(body, page) {
  93. if (!Array.isArray(body)) {
  94. throw new Error('Body must be an array.');
  95. }
  96. var command = {
  97. index: {
  98. _index: this.index_name,
  99. _type: 'pages',
  100. _id: page._id.toString(),
  101. }
  102. };
  103. var document = {
  104. path: page.path,
  105. body: page.revision.body,
  106. username: page.creator.username,
  107. comment_count: page.commentCount,
  108. bookmark_count: 0, // todo
  109. like_count: page.liker.length || 0,
  110. created_at: page.createdAt,
  111. updated_at: page.updatedAt,
  112. };
  113. body.push(command);
  114. body.push(document);
  115. };
  116. SearchClient.prototype.prepareBodyForDelete = function(body, page) {
  117. if (!Array.isArray(body)) {
  118. throw new Error('Body must be an array.');
  119. }
  120. var command = {
  121. delete: {
  122. _index: this.index_name,
  123. _type: 'pages',
  124. _id: page._id.toString(),
  125. }
  126. };
  127. body.push(command);
  128. };
  129. SearchClient.prototype.addPages = function(pages)
  130. {
  131. var self = this;
  132. var body = [];
  133. pages.map(function(page) {
  134. self.prepareBodyForCreate(body, page);
  135. });
  136. debug('addPages(): Sending Request to ES', body);
  137. return this.client.bulk({
  138. body: body,
  139. });
  140. };
  141. SearchClient.prototype.updatePages = function(pages)
  142. {
  143. var self = this;
  144. var body = [];
  145. pages.map(function(page) {
  146. self.prepareBodyForUpdate(body, page);
  147. });
  148. debug('updatePages(): Sending Request to ES', body);
  149. return this.client.bulk({
  150. body: body,
  151. });
  152. };
  153. SearchClient.prototype.deletePages = function(pages)
  154. {
  155. var self = this;
  156. var body = [];
  157. pages.map(function(page) {
  158. self.prepareBodyForDelete(body, page);
  159. });
  160. debug('deletePages(): Sending Request to ES', body);
  161. return this.client.bulk({
  162. body: body,
  163. });
  164. };
  165. SearchClient.prototype.addAllPages = function()
  166. {
  167. var self = this;
  168. var offset = 0;
  169. var Page = this.crowi.model('Page');
  170. var stream = Page.getStreamOfFindAll();
  171. var body = [];
  172. return new Promise(function(resolve, reject) {
  173. stream.on('data', function (doc) {
  174. if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
  175. debug('Skipped', doc.path);
  176. return ;
  177. }
  178. self.prepareBodyForCreate(body, doc);
  179. }).on('error', function (err) {
  180. // TODO: handle err
  181. debug('Error stream:', err);
  182. }).on('close', function () {
  183. // all done
  184. // 最後に送信
  185. self.client.bulk({ body: body, })
  186. .then(function(res) {
  187. debug('Reponse from es:', res);
  188. return resolve(res);
  189. }).catch(function(err) {
  190. debug('Err from es:', err);
  191. return reject(err);
  192. });
  193. });
  194. });
  195. };
  196. /**
  197. * search returning type:
  198. * {
  199. * meta: { total: Integer, results: Integer},
  200. * data: [ pages ...],
  201. * }
  202. */
  203. SearchClient.prototype.search = function(query)
  204. {
  205. var self = this;
  206. return new Promise(function(resolve, reject) {
  207. self.client.search(query)
  208. .then(function(data) {
  209. var result = {
  210. meta: {
  211. took: data.took,
  212. total: data.hits.total,
  213. results: data.hits.hits.length,
  214. },
  215. data: data.hits.hits.map(function(elm) {
  216. return {_id: elm._id, _score: elm._score};
  217. })
  218. };
  219. resolve(result);
  220. }).catch(function(err) {
  221. reject(err);
  222. });
  223. });
  224. };
  225. SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
  226. {
  227. // getting path by default is almost for debug
  228. var fields = ['path', '_id'];
  229. if (option) {
  230. fields = option.fields || fields;
  231. }
  232. // default is only id field, sorted by updated_at
  233. var query = {
  234. index: this.index_name,
  235. type: 'pages',
  236. body: {
  237. fields: fields,
  238. sort: [{ updated_at: { order: 'desc'}}],
  239. query: {}, // query
  240. }
  241. };
  242. this.appendResultSize(query);
  243. return query;
  244. };
  245. SearchClient.prototype.createSearchQuerySortedByScore = function(option)
  246. {
  247. var fields = ['path', '_id'];
  248. if (option) {
  249. fields = option.fields || fields;
  250. }
  251. // sort by score
  252. var query = {
  253. index: this.index_name,
  254. type: 'pages',
  255. body: {
  256. fields: fields,
  257. sort: [ {_score: { order: 'desc'} }],
  258. query: {}, // query
  259. }
  260. };
  261. this.appendResultSize(query);
  262. return query;
  263. };
  264. SearchClient.prototype.appendResultSize = function(query, from, size)
  265. {
  266. query.from = from || this.DEFAULT_OFFSET;
  267. query.size = size || this.DEFAULT_LIMIT;
  268. };
  269. SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
  270. {
  271. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  272. if (!query.body.query.bool) {
  273. query.body.query.bool = {};
  274. }
  275. if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
  276. query.body.query.bool.must = [];
  277. }
  278. query.body.query.bool.must.push({
  279. multi_match: {
  280. query: keyword,
  281. fields: [
  282. "path.ja^2", // ためしに。
  283. "body.ja"
  284. ],
  285. operator: "and"
  286. }
  287. });
  288. };
  289. SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
  290. {
  291. // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
  292. if (!query.body.query.bool) {
  293. query.body.query.bool = {};
  294. }
  295. if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
  296. query.body.query.bool.filter = [];
  297. }
  298. if (path.match(/\/$/)) {
  299. path = path.substr(0, path.length - 1);
  300. }
  301. query.body.query.bool.filter.push({
  302. wildcard: {
  303. "path.raw": path + "/*"
  304. }
  305. });
  306. };
  307. SearchClient.prototype.searchKeyword = function(keyword, option)
  308. {
  309. var from = option.offset || null;
  310. var query = this.createSearchQuerySortedByScore();
  311. this.appendCriteriaForKeywordContains(query, keyword);
  312. return this.search(query);
  313. };
  314. SearchClient.prototype.searchByPath = function(keyword, prefix)
  315. {
  316. // TODO path 名だけから検索
  317. };
  318. SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
  319. {
  320. var from = option.offset || null;
  321. var query = this.createSearchQuerySortedByScore();
  322. this.appendCriteriaForKeywordContains(query, keyword);
  323. this.appendCriteriaForPathFilter(query, path);
  324. if (from) {
  325. this.appendResultSize(query, from);
  326. }
  327. return this.search(query);
  328. };
  329. SearchClient.prototype.syncPageCreated = function(page, user)
  330. {
  331. debug('SearchClient.syncPageCreated', page);
  332. if (!this.shouldIndexed(page)) {
  333. return ;
  334. }
  335. this.addPages([page])
  336. .then(function(res) {
  337. debug('ES Response', res);
  338. })
  339. .catch(function(err){
  340. debug('ES Error', err);
  341. });
  342. };
  343. SearchClient.prototype.syncPageUpdated = function(page, user)
  344. {
  345. debug('SearchClient.syncPageUpdated', page);
  346. // TODO delete
  347. if (!this.shouldIndexed(page)) {
  348. this.deletePages([page])
  349. .then(function(res) {
  350. debug('deletePages: ES Response', res);
  351. })
  352. .catch(function(err){
  353. debug('deletePages:ES Error', err);
  354. });
  355. return ;
  356. }
  357. this.updatePages([page])
  358. .then(function(res) {
  359. debug('ES Response', res);
  360. })
  361. .catch(function(err){
  362. debug('ES Error', err);
  363. });
  364. };
  365. module.exports = SearchClient;