| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596 |
- /**
- * Search
- */
- var elasticsearch = require('elasticsearch'),
- debug = require('debug')('crowi:lib:search');
- function SearchClient(crowi, esUri) {
- this.DEFAULT_OFFSET = 0;
- this.DEFAULT_LIMIT = 50;
- this.esUri = esUri;
- this.crowi = crowi;
- var uri = this.parseUri(this.esUri);
- this.host = uri.host;
- this.index_name = uri.index_name;
- this.client = new elasticsearch.Client({
- host: this.host,
- requestTimeout: 5000,
- //log: 'debug',
- });
- this.registerUpdateEvent();
- this.mappingFile = crowi.resourceDir + 'search/mappings.json';
- }
- SearchClient.prototype.checkESVersion = function() {
- // TODO
- };
- SearchClient.prototype.registerUpdateEvent = function() {
- var pageEvent = this.crowi.event('page');
- pageEvent.on('create', this.syncPageCreated.bind(this))
- pageEvent.on('update', this.syncPageUpdated.bind(this))
- pageEvent.on('delete', this.syncPageDeleted.bind(this))
- };
- SearchClient.prototype.shouldIndexed = function(page) {
- // FIXME: Magic Number
- if (page.grant !== 1) {
- return false;
- }
- if (page.redirectTo !== null) {
- return false;
- }
- if (page.isDeleted()) {
- return false;
- }
- return true;
- };
- // BONSAI_URL is following format:
- // => https://{ID}:{PASSWORD}@{HOST}
- SearchClient.prototype.parseUri = function(uri) {
- var index_name = 'crowi';
- var host = uri;
- if (m = uri.match(/^(https?:\/\/[^\/]+)\/(.+)$/)) {
- host = m[1];
- index_name = m[2];
- }
- return {
- host,
- index_name,
- };
- };
- SearchClient.prototype.buildIndex = function(uri) {
- return this.client.indices.create({
- index: this.index_name,
- body: require(this.mappingFile)
- });
- };
- SearchClient.prototype.deleteIndex = function(uri) {
- return this.client.indices.delete({
- index: this.index_name,
- });
- };
- SearchClient.prototype.prepareBodyForUpdate = function(body, page) {
- if (!Array.isArray(body)) {
- throw new Error('Body must be an array.');
- }
- var command = {
- update: {
- _index: this.index_name,
- _type: 'pages',
- _id: page._id.toString(),
- }
- };
- var document = {
- doc: {
- path: page.path,
- body: page.revision.body,
- comment_count: page.commentCount,
- bookmark_count: 0, // todo
- like_count: page.liker.length || 0,
- updated_at: page.updatedAt,
- },
- doc_as_upsert: true,
- };
- body.push(command);
- body.push(document);
- };
- SearchClient.prototype.prepareBodyForCreate = function(body, page) {
- if (!Array.isArray(body)) {
- throw new Error('Body must be an array.');
- }
- var command = {
- index: {
- _index: this.index_name,
- _type: 'pages',
- _id: page._id.toString(),
- }
- };
- var document = {
- path: page.path,
- body: page.revision.body,
- username: page.creator.username,
- comment_count: page.commentCount,
- bookmark_count: 0, // todo
- like_count: page.liker.length || 0,
- created_at: page.createdAt,
- updated_at: page.updatedAt,
- };
- body.push(command);
- body.push(document);
- };
- SearchClient.prototype.prepareBodyForDelete = function(body, page) {
- if (!Array.isArray(body)) {
- throw new Error('Body must be an array.');
- }
- var command = {
- delete: {
- _index: this.index_name,
- _type: 'pages',
- _id: page._id.toString(),
- }
- };
- body.push(command);
- };
- SearchClient.prototype.addPages = function(pages)
- {
- var self = this;
- var body = [];
- pages.map(function(page) {
- self.prepareBodyForCreate(body, page);
- });
- debug('addPages(): Sending Request to ES', body);
- return this.client.bulk({
- body: body,
- });
- };
- SearchClient.prototype.updatePages = function(pages)
- {
- var self = this;
- var body = [];
- pages.map(function(page) {
- self.prepareBodyForUpdate(body, page);
- });
- debug('updatePages(): Sending Request to ES', body);
- return this.client.bulk({
- body: body,
- });
- };
- SearchClient.prototype.deletePages = function(pages)
- {
- var self = this;
- var body = [];
- pages.map(function(page) {
- self.prepareBodyForDelete(body, page);
- });
- debug('deletePages(): Sending Request to ES', body);
- return this.client.bulk({
- body: body,
- });
- };
- SearchClient.prototype.addAllPages = function()
- {
- var self = this;
- var Page = this.crowi.model('Page');
- var cursor = Page.getStreamOfFindAll();
- var body = [];
- var sent = 0;
- var skipped = 0;
- return new Promise(function(resolve, reject) {
- cursor.on('data', function (doc) {
- if (!doc.creator || !doc.revision || !self.shouldIndexed(doc)) {
- //debug('Skipped', doc.path);
- skipped++;
- return ;
- }
- self.prepareBodyForCreate(body, doc);
- //debug(body.length);
- if (body.length > 2000) {
- sent++;
- debug('Sending request (seq, skipped)', sent, skipped);
- self.client.bulk({
- body: body,
- requestTimeout: Infinity,
- }).then(res => {
- debug('addAllPages add anyway (items, errors, took): ', (res.items || []).length, res.errors, res.took)
- }).catch(err => {
- debug('addAllPages error on add anyway: ', err)
- });
- body = [];
- }
- }).on('error', function (err) {
- // TODO: handle err
- debug('Error cursor:', err);
- }).on('close', function () {
- // all done
- // return if body is empty
- // see: https://github.com/weseek/crowi-plus/issues/228
- if (body.length == 0) {
- return resolve();
- }
- // 最後にすべてを送信
- self.client.bulk({
- body: body,
- requestTimeout: Infinity,
- })
- .then(function(res) {
- debug('Reponse from es (item length, errros, took):', (res.items || []).length, res.errors, res.took);
- return resolve(res);
- }).catch(function(err) {
- debug('Err from es:', err);
- return reject(err);
- });
- });
- });
- };
- /**
- * search returning type:
- * {
- * meta: { total: Integer, results: Integer},
- * data: [ pages ...],
- * }
- */
- SearchClient.prototype.search = function(query)
- {
- var self = this;
- return new Promise(function(resolve, reject) {
- self.client.search(query)
- .then(function(data) {
- var result = {
- meta: {
- took: data.took,
- total: data.hits.total,
- results: data.hits.hits.length,
- },
- data: data.hits.hits.map(function(elm) {
- return {_id: elm._id, _score: elm._score};
- })
- };
- resolve(result);
- }).catch(function(err) {
- reject(err);
- });
- });
- };
- SearchClient.prototype.createSearchQuerySortedByUpdatedAt = function(option)
- {
- // getting path by default is almost for debug
- var fields = ['path'];
- if (option) {
- fields = option.fields || fields;
- }
- // default is only id field, sorted by updated_at
- var query = {
- index: this.index_name,
- type: 'pages',
- body: {
- sort: [{ updated_at: { order: 'desc'}}],
- query: {}, // query
- _source: fields,
- }
- };
- this.appendResultSize(query);
- return query;
- };
- SearchClient.prototype.createSearchQuerySortedByScore = function(option)
- {
- var fields = ['path'];
- if (option) {
- fields = option.fields || fields;
- }
- // sort by score
- var query = {
- index: this.index_name,
- type: 'pages',
- body: {
- sort: [ {_score: { order: 'desc'} }],
- query: {}, // query
- _source: fields,
- }
- };
- this.appendResultSize(query);
- return query;
- };
- SearchClient.prototype.appendResultSize = function(query, from, size)
- {
- query.from = from || this.DEFAULT_OFFSET;
- query.size = size || this.DEFAULT_LIMIT;
- };
- SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keyword)
- {
- // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
- if (!query.body.query.bool) {
- query.body.query.bool = {};
- }
- if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
- query.body.query.bool.must = [];
- }
- if (!query.body.query.bool.must_not || !Array.isArray(query.body.query.must_not)) {
- query.body.query.bool.must_not = [];
- }
- var appendMultiMatchQuery = function(query, type, keywords) {
- var target;
- var operator = 'and';
- switch (type) {
- case 'not_match':
- target = query.body.query.bool.must_not;
- operator = 'or';
- break;
- case 'match':
- default:
- target = query.body.query.bool.must;
- }
- target.push({
- multi_match: {
- query: keywords.join(' '),
- // TODO: By user's i18n setting, change boost or search target fields
- fields: [
- "path_ja^2",
- "path_en^2",
- "body_ja",
- // "path_en",
- // "body_en",
- ],
- operator: operator,
- }
- });
- return query;
- };
- var parsedKeywords = this.getParsedKeywords(keyword);
- if (parsedKeywords.match.length > 0) {
- query = appendMultiMatchQuery(query, 'match', parsedKeywords.match);
- }
- if (parsedKeywords.not_match.length > 0) {
- query = appendMultiMatchQuery(query, 'not_match', parsedKeywords.not_match);
- }
- if (parsedKeywords.phrase.length > 0) {
- var phraseQueries = [];
- parsedKeywords.phrase.forEach(function(phrase) {
- phraseQueries.push({
- multi_match: {
- query: phrase, // each phrase is quoteted words
- type: 'phrase',
- fields: [ // Not use "*.ja" fields here, because we want to analyze (parse) search words
- "path_raw^2",
- "body_raw",
- ],
- }
- });
- });
- query.body.query.bool.must.push(phraseQueries);
- }
- if (parsedKeywords.not_phrase.length > 0) {
- var notPhraseQueries = [];
- parsedKeywords.not_phrase.forEach(function(phrase) {
- notPhraseQueries.push({
- multi_match: {
- query: phrase, // each phrase is quoteted words
- type: 'phrase',
- fields: [ // Not use "*.ja" fields here, because we want to analyze (parse) search words
- "path_raw^2",
- "body_raw",
- ],
- }
- });
- });
- query.body.query.bool.must_not.push(notPhraseQueries);
- }
- };
- SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
- {
- // query is created by createSearchQuerySortedByScore() or createSearchQuerySortedByUpdatedAt()
- if (!query.body.query.bool) {
- query.body.query.bool = {};
- }
- if (!query.body.query.bool.filter || !Array.isArray(query.body.query.bool.filter)) {
- query.body.query.bool.filter = [];
- }
- if (path.match(/\/$/)) {
- path = path.substr(0, path.length - 1);
- }
- query.body.query.bool.filter.push({
- wildcard: {
- "path": path + "/*"
- }
- });
- };
- SearchClient.prototype.searchKeyword = function(keyword, option)
- {
- var from = option.offset || null;
- var query = this.createSearchQuerySortedByScore();
- this.appendCriteriaForKeywordContains(query, keyword);
- return this.search(query);
- };
- SearchClient.prototype.searchByPath = function(keyword, prefix)
- {
- // TODO path 名だけから検索
- };
- SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
- {
- var from = option.offset || null;
- var query = this.createSearchQuerySortedByScore();
- this.appendCriteriaForKeywordContains(query, keyword);
- this.appendCriteriaForPathFilter(query, path);
- if (from) {
- this.appendResultSize(query, from);
- }
- return this.search(query);
- };
- SearchClient.prototype.getParsedKeywords = function(keyword)
- {
- var matchWords = [];
- var notMatchWords = [];
- var phraseWords = [];
- var notPhraseWords = [];
- keyword.trim();
- keyword = keyword.replace(/\s+/g, ' ');
- // First: Parse phrase keywords
- var phraseRegExp = new RegExp(/(-?"[^"]+")/g);
- var phrases = keyword.match(phraseRegExp);
- if (phrases !== null) {
- keyword = keyword.replace(phraseRegExp, '');
- phrases.forEach(function(phrase) {
- phrase.trim();
- if (phrase.match(/^\-/)) {
- notPhraseWords.push(phrase.replace(/^\-/, ''));
- } else {
- phraseWords.push(phrase);
- }
- });
- }
- // Second: Parse other keywords (include minus keywords)
- keyword.split(' ').forEach(function(word) {
- if (word === '') {
- return;
- }
- if (word.match(/^\-(.+)$/)) {
- notMatchWords.push((RegExp.$1));
- } else {
- matchWords.push(word);
- }
- });
- return {
- match: matchWords,
- not_match: notMatchWords,
- phrase: phraseWords,
- not_phrase: notPhraseWords,
- };
- }
- SearchClient.prototype.syncPageCreated = function(page, user)
- {
- debug('SearchClient.syncPageCreated', page.path);
- if (!this.shouldIndexed(page)) {
- return ;
- }
- this.addPages([page])
- .then(function(res) {
- debug('ES Response', res);
- })
- .catch(function(err){
- debug('ES Error', err);
- });
- };
- SearchClient.prototype.syncPageUpdated = function(page, user)
- {
- debug('SearchClient.syncPageUpdated', page.path);
- // TODO delete
- if (!this.shouldIndexed(page)) {
- this.deletePages([page])
- .then(function(res) {
- debug('deletePages: ES Response', res);
- })
- .catch(function(err){
- debug('deletePages:ES Error', err);
- });
- return ;
- }
- this.updatePages([page])
- .then(function(res) {
- debug('ES Response', res);
- })
- .catch(function(err){
- debug('ES Error', err);
- });
- };
- SearchClient.prototype.syncPageDeleted = function(page, user)
- {
- debug('SearchClient.syncPageDeleted', page.path);
- this.deletePages([page])
- .then(function(res) {
- debug('deletePages: ES Response', res);
- })
- .catch(function(err){
- debug('deletePages:ES Error', err);
- });
- return ;
- };
- module.exports = SearchClient;
|