Browse Source

Merge pull request #849 from weseek/imprv/ngram

Imprv/ngram
Yuki Takei 7 years ago
parent
commit
50439df044
3 changed files with 24 additions and 23 deletions
  1. 1 0
      config/logger/config.dev.js
  2. 10 18
      resource/search/mappings.json
  3. 13 5
      src/server/util/search.js

+ 1 - 0
config/logger/config.dev.js

@@ -11,6 +11,7 @@ module.exports = {
   // 'growi:routes:login': 'debug',
   // 'growi:routes:login': 'debug',
   'growi:routes:login-passport': 'debug',
   'growi:routes:login-passport': 'debug',
   'growi:service:PassportService': 'debug',
   'growi:service:PassportService': 'debug',
+  'growi:lib:search': 'debug',
   // 'growi:service:GlobalNotification': 'debug',
   // 'growi:service:GlobalNotification': 'debug',
   // 'growi:lib:importer': 'debug',
   // 'growi:lib:importer': 'debug',
   // 'growi:routes:page': 'debug',
   // 'growi:routes:page': 'debug',

+ 10 - 18
resource/search/mappings.json

@@ -5,21 +5,13 @@
         "english_stop": {
         "english_stop": {
           "type":       "stop",
           "type":       "stop",
           "stopwords":  "_english_"
           "stopwords":  "_english_"
-        },
-        "english_stemmer": {
-          "type":       "stemmer",
-          "language":   "english"
-        },
-        "english_possessive_stemmer": {
-          "type":       "stemmer",
-          "language":   "possessive_english"
         }
         }
       },
       },
       "tokenizer": {
       "tokenizer": {
-        "ngram_tokenizer": {
-          "type": "ngram",
+        "edge_ngram_tokenizer": {
+          "type": "edge_ngram",
           "min_gram": 2,
           "min_gram": 2,
-          "max_gram": 3,
+          "max_gram": 20,
           "token_chars": ["letter", "digit"]
           "token_chars": ["letter", "digit"]
         }
         }
       },
       },
@@ -28,13 +20,11 @@
           "tokenizer": "kuromoji_tokenizer",
           "tokenizer": "kuromoji_tokenizer",
           "char_filter" : ["icu_normalizer"]
           "char_filter" : ["icu_normalizer"]
         },
         },
-        "english": {
-          "tokenizer": "ngram_tokenizer",
+        "english_edge_ngram": {
+          "tokenizer": "edge_ngram_tokenizer",
           "filter": [
           "filter": [
-            "english_possessive_stemmer",
             "lowercase",
             "lowercase",
-            "english_stop",
-            "english_stemmer"
+            "english_stop"
           ]
           ]
         }
         }
       }
       }
@@ -56,7 +46,8 @@
             },
             },
             "en": {
             "en": {
               "type": "text",
               "type": "text",
-              "analyzer": "english"
+              "analyzer": "english_edge_ngram",
+              "search_analyzer": "standard"
             }
             }
           }
           }
         },
         },
@@ -69,7 +60,8 @@
             },
             },
             "en": {
             "en": {
               "type": "text",
               "type": "text",
-              "analyzer": "english"
+              "analyzer": "english_edge_ngram",
+              "search_analyzer": "standard"
             }
             }
           }
           }
         },
         },

+ 13 - 5
src/server/util/search.js

@@ -351,11 +351,14 @@ SearchClient.prototype.search = async function(query) {
         query: query.body.query
         query: query.body.query
       },
       },
     });
     });
-    logger.info('ES returns explanations: ', result.explanations);
+    logger.debug('ES returns explanations: ', result.explanations);
   }
   }
 
 
   const result = await this.client.search(query);
   const result = await this.client.search(query);
 
 
+  // for debug
+  logger.debug('ES result: ', result);
+
   return {
   return {
     meta: {
     meta: {
       took: result.took,
       took: result.took,
@@ -447,6 +450,7 @@ SearchClient.prototype.appendCriteriaForQueryString = function(query, queryStrin
     const q = {
     const q = {
       multi_match: {
       multi_match: {
         query: parsedKeywords.match.join(' '),
         query: parsedKeywords.match.join(' '),
+        type: 'most_fields',
         fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
         fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
       },
       },
     };
     };
@@ -457,7 +461,7 @@ SearchClient.prototype.appendCriteriaForQueryString = function(query, queryStrin
     const q = {
     const q = {
       multi_match: {
       multi_match: {
         query: parsedKeywords.not_match.join(' '),
         query: parsedKeywords.not_match.join(' '),
-        fields: ['path.ja^2', 'path.en^2', 'body.ja', 'body.en'],
+        fields: ['path.ja', 'path.en', 'body.ja', 'body.en'],
         operator: 'or'
         operator: 'or'
       },
       },
     };
     };
@@ -624,13 +628,17 @@ SearchClient.prototype.filterPagesByType = function(query, type) {
   }
   }
 };
 };
 
 
-SearchClient.prototype.appendFunctionScore = function(query) {
+SearchClient.prototype.appendFunctionScore = function(query, queryString) {
   const User = this.crowi.model('User');
   const User = this.crowi.model('User');
   const count = User.count({}) || 1;
   const count = User.count({}) || 1;
-  // newScore = oldScore + log(1 + factor * 'bookmark_count')
+  const minScore = queryString.length * 0.33;   // increase with length
+
+  logger.debug('min_score: ', minScore);
+
   query.body.query = {
   query.body.query = {
     function_score: {
     function_score: {
       query: { ...query.body.query },
       query: { ...query.body.query },
+      min_score: minScore,
       field_value_factor: {
       field_value_factor: {
         field: 'bookmark_count',
         field: 'bookmark_count',
         modifier: 'log1p',
         modifier: 'log1p',
@@ -654,7 +662,7 @@ SearchClient.prototype.searchKeyword = async function(queryString, user, userGro
 
 
   this.appendResultSize(query, from, size);
   this.appendResultSize(query, from, size);
 
 
-  this.appendFunctionScore(query);
+  this.appendFunctionScore(query, queryString);
 
 
   return this.search(query);
   return this.search(query);
 };
 };