Răsfoiți Sursa

Merge pull request #182 from crowi/compat-es-2-and-5-add-search-operators

Support search operators (minus and phrase) with ES v2.x and v5.x
Sotaro KARASAWA 9 ani în urmă
părinte
comite
d3b1e75444

+ 125 - 13
lib/util/search.js

@@ -331,24 +331,88 @@ SearchClient.prototype.appendCriteriaForKeywordContains = function(query, keywor
   if (!query.body.query.bool) {
     query.body.query.bool = {};
   }
-
   if (!query.body.query.bool.must || !Array.isArray(query.body.query.must)) {
     query.body.query.bool.must = [];
   }
+  if (!query.body.query.bool.must_not || !Array.isArray(query.body.query.must_not)) {
+    query.body.query.bool.must_not = [];
+  }
 
-  query.body.query.bool.must.push({
-    multi_match: {
-      query: keyword,
-      // TODO: By user's i18n setting, change boost or search target fields
-      fields: [
-        "path_ja^2",
-        "body_ja",
-        // "path_en",
-        // "body_en",
-      ],
-      operator: "and"
+  var appendMultiMatchQuery = function(query, type, keywords) {
+    var target;
+    var operator = 'and';
+    switch (type) {
+      case 'not_match':
+        target = query.body.query.bool.must_not;
+        operator = 'or';
+        break;
+      case 'match':
+      default:
+        target = query.body.query.bool.must;
     }
-  });
+
+    target.push({
+      multi_match: {
+        query: keywords.join(' '),
+        // TODO: By user's i18n setting, change boost or search target fields
+        fields: [
+          "path_ja^2",
+          "body_ja",
+          // "path_en",
+          // "body_en",
+        ],
+        operator: operator,
+      }
+    });
+
+    return query;
+  };
+
+  var parsedKeywords = this.getParsedKeywords(keyword);
+
+  if (parsedKeywords.match.length > 0) {
+    query = appendMultiMatchQuery(query, 'match', parsedKeywords.match);
+  }
+
+  if (parsedKeywords.not_match.length > 0) {
+    query = appendMultiMatchQuery(query, 'not_match', parsedKeywords.not_match);
+  }
+
+  if (parsedKeywords.phrase.length > 0) {
+    var phraseQueries = [];
+    parsedKeywords.phrase.forEach(function(phrase) {
+      phraseQueries.push({
+        multi_match: {
+          query: phrase, // each phrase is quoteted words
+          type: 'phrase',
+          fields: [ // Not use "*.ja" fields here, because we want to analyze (parse) search words
+            "path_raw^2",
+            "body_raw",
+          ],
+        }
+      });
+    });
+
+    query.body.query.bool.must.push(phraseQueries);
+  }
+
+  if (parsedKeywords.not_phrase.length > 0) {
+    var notPhraseQueries = [];
+    parsedKeywords.not_phrase.forEach(function(phrase) {
+      notPhraseQueries.push({
+        multi_match: {
+          query: phrase, // each phrase is quoteted words
+          type: 'phrase',
+          fields: [ // Not use "*.ja" fields here, because we want to analyze (parse) search words
+            "path_raw^2",
+            "body_raw",
+          ],
+        }
+      });
+    });
+
+    query.body.query.bool.must_not.push(notPhraseQueries);
+  }
 };
 
 SearchClient.prototype.appendCriteriaForPathFilter = function(query, path)
@@ -400,6 +464,54 @@ SearchClient.prototype.searchKeywordUnderPath = function(keyword, path, option)
   return this.search(query);
 };
 
+SearchClient.prototype.getParsedKeywords = function(keyword)
+{
+  var matchWords = [];
+  var notMatchWords = [];
+  var phraseWords = [];
+  var notPhraseWords = [];
+
+  keyword.trim();
+  keyword = keyword.replace(/\s+/g, ' ');
+
+  // First: Parse phrase keywords
+  var phraseRegExp = new RegExp(/(-?"[^"]+")/g);
+  var phrases = keyword.match(phraseRegExp);
+
+  if (phrases !== null) {
+    keyword = keyword.replace(phraseRegExp, '');
+
+    phrases.forEach(function(phrase) {
+      phrase.trim();
+      if (phrase.match(/^\-/)) {
+        notPhraseWords.push(phrase.replace(/^\-/, ''));
+      } else {
+        phraseWords.push(phrase);
+      }
+    });
+  }
+
+  // Second: Parse other keywords (include minus keywords)
+  keyword.split(' ').forEach(function(word) {
+    if (word === '') {
+      return;
+    }
+
+    if (word.match(/^\-(.+)$/)) {
+      notMatchWords.push((RegExp.$1));
+    } else {
+      matchWords.push(word);
+    }
+  });
+
+  return {
+    match: matchWords,
+    not_match: notMatchWords,
+    phrase: phraseWords,
+    not_phrase: notPhraseWords,
+  };
+}
+
 SearchClient.prototype.syncPageCreated = function(page, user)
 {
   debug('SearchClient.syncPageCreated', page.path);

+ 6 - 2
resource/js/components/SearchPage/SearchResultList.js

@@ -14,7 +14,12 @@ export default class SearchResultList extends React.Component {
     let returnBody = body;
 
     this.props.searchingKeyword.split(' ').forEach((keyword) => {
-      const k = keyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+      if (keyword === '') {
+        return;
+      }
+      const k = keyword
+            .replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
+            .replace(/(^"|"$)/g, ''); // for phrase (quoted) keyword
       const keywordExp = new RegExp(`(${k}(?!(.*?\]|.*?\\)|.*?"|.*?>)))`, 'ig');
       returnBody = returnBody.replace(keywordExp, '<em class="highlighted">$&</em>');
     });
@@ -53,4 +58,3 @@ SearchResultList.defaultProps = {
   pages: [],
   searchingKeyword: '',
 };
-

+ 12 - 2
resource/search/mappings.json

@@ -49,10 +49,15 @@
       "properties" : {
         "path": {
           "type": "string",
-          "copy_to": ["path_ja", "path_en"],
+          "copy_to": ["path_raw", "path_ja", "path_en"],
           "include_in_all": false,
           "index": "not_analyzed"
         },
+        "path_raw": {
+          "type": "string",
+          "analyzer": "standard",
+          "include_in_all": false
+        },
         "path_ja": {
           "type": "string",
           "analyzer": "kuromoji",
@@ -65,10 +70,15 @@
         },
         "body": {
           "type": "string",
-          "copy_to": ["body_ja", "body_en"],
+          "copy_to": ["body_raw", "body_ja", "body_en"],
           "include_in_all": false,
           "index": "not_analyzed"
         },
+        "body_raw": {
+          "type": "string",
+          "analyzer": "standard",
+          "include_in_all": false
+        },
         "body_ja": {
           "type": "string",
           "analyzer": "kuromoji",