2017-04-10 21 views

私は中国語のElasticsearchで簡単なデモを実装しようとしています。 しかし、検索結果の関連性にはいくつか問題があります。検索結果のElasticsearch関連性についての質問

私はマッピングを持つ新しいインデックス作成:2つのレコードが "中国" と "美国" のマッチングがあります

    "query" : { 
     "bool" : { 
      "must" : { 
       "multi_match" : { 
        "query" : "美国", 
        "fields" : [ "name", "synonyms" ] 
      "filter" : { 
       "term" : { 
        "status" : 2 


    "tag": { 
     "mappings": { 
      "tag": { 
       "properties": { 
        "name": { 
         "type": "text", 
         "analyzer": "standard" 
        "note": { 
         "type": "text", 
         "analyzer": "standard" 
        "status": { 
         "type": "integer" 
        "synonyms": { 
         "type": "text", 
         "analyzer": "standard" 

とリクエストボディを "美国"クエリ。しかし、記録「中国」は高い得点を得ました。レスポンスJSONは以下の通りです:

    "took": 2, 
    "timed_out": false, 
    "_shards": { 
     "total": 5, 
     "successful": 5, 
     "failed": 0 
    "hits": { 
     "total": 2, 
     "max_score": 0.7373906, 
     "hits": [ { 
      "_index": "tag", 
      "_type": "tag", 
      "_id": "5482361185636870", 
      "_score": 0.7373906, 
      "_source": { 
       "status": 2, 
       "name": "中国", 
       "note": "", 
       "synonyms": [] 
     }, { 
      "_index": "tag", 
      "_type": "tag", 
      "_id": "5474649504748034", 
      "_score": 0.53484553, 
      "_source": { 
       "status": 2, 
       "name": "美国", 
       "note": "", 
       "synonyms": [] 
     } ] 



    "hits": [ 
     "_shard": "[tag][0]", 
     "_node": "Wh9qH0bcTAaVNrsP1Aiyxg", 
     "_index": "tag", 
     "_type": "tag", 
     "_id": "5482361185636870", 
     "_score": 0.7373906, 
     "_source": { 
     "status": 2, 
     "name": "中国", 
     "note": "", 
     "synonyms": [] 
     "_explanation": { 
     "value": 0.73739064, 
     "description": "sum of:", 
     "details": [ 
      "value": 0.73739064, 
      "description": "sum of:", 
      "details": [ 
       "value": 0.73739064, 
       "description": "max of:", 
       "details": [ 
        "value": 0.73739064, 
        "description": "sum of:", 
        "details": [ 
         "value": 0.73739064, 
         "description": "weight(name:国 in 0) [PerFieldSimilarity], result of:", 
         "details": [ 
          "value": 0.73739064, 
          "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", 
          "details": [ 
           "value": 0.6931472, 
           "description": "idf, computed as log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5)) from:", 
           "details": [ 
            "value": 1, 
            "description": "docFreq", 
            "details": [] 
            "value": 2, 
            "description": "docCount", 
            "details": [] 
           "value": 1.0638298, 
           "description": "tfNorm, computed as (freq * (k1 + 1))/(freq + k1 * (1 - b + b * fieldLength/avgFieldLength)) from:", 
           "details": [ 
            "value": 1, 
            "description": "termFreq=1.0", 
            "details": [] 
            "value": 1.2, 
            "description": "parameter k1", 
            "details": [] 
            "value": 0.75, 
            "description": "parameter b", 
            "details": [] 
            "value": 3, 
            "description": "avgFieldLength", 
            "details": [] 
            "value": 2.56, 
            "description": "fieldLength", 
            "details": [] 
       "value": 0, 
       "description": "match on required clause, product of:", 
       "details": [ 
        "value": 0, 
        "description": "# clause", 
        "details": [] 
        "value": 1, 
        "description": "status:[2 TO 2], product of:", 
        "details": [ 
         "value": 1, 
         "description": "boost", 
         "details": [] 
         "value": 1, 
         "description": "queryNorm", 
         "details": [] 
      "value": 0, 
      "description": "match on required clause, product of:", 
      "details": [ 
       "value": 0, 
       "description": "# clause", 
       "details": [] 
       "value": 1, 
       "description": "*:*, product of:", 
       "details": [ 
        "value": 1, 
        "description": "boost", 
        "details": [] 
        "value": 1, 
        "description": "queryNorm", 
        "details": [] 
     "_shard": "[tag][4]", 
     "_node": "Wh9qH0bcTAaVNrsP1Aiyxg", 
     "_index": "tag", 
     "_type": "tag", 
     "_id": "5474649504748034", 
     "_score": 0.51623213, 
     "_source": { 
     "status": 2, 
     "name": "美国", 
     "note": "", 
     "synonyms": [] 
     "_explanation": { 
     "value": 0.51623213, 
     "description": "sum of:", 
     "details": [ 
      "value": 0.51623213, 
      "description": "sum of:", 
      "details": [ 
       "value": 0.51623213, 
       "description": "max of:", 
       "details": [ 
        "value": 0.51623213, 
        "description": "sum of:", 
        "details": [ 
         "value": 0.25811607, 
         "description": "weight(name:美 in 0) [PerFieldSimilarity], result of:", 
         "details": [ 
          "value": 0.25811607, 
          "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", 
          "details": [ 
           "value": 0.2876821, 
           "description": "idf, computed as log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5)) from:", 
           "details": [ 
            "value": 1, 
            "description": "docFreq", 
            "details": [] 
            "value": 1, 
            "description": "docCount", 
            "details": [] 
           "value": 0.89722675, 
           "description": "tfNorm, computed as (freq * (k1 + 1))/(freq + k1 * (1 - b + b * fieldLength/avgFieldLength)) from:", 
           "details": [ 
            "value": 1, 
            "description": "termFreq=1.0", 
            "details": [] 
            "value": 1.2, 
            "description": "parameter k1", 
            "details": [] 
            "value": 0.75, 
            "description": "parameter b", 
            "details": [] 
            "value": 2, 
            "description": "avgFieldLength", 
            "details": [] 
            "value": 2.56, 
            "description": "fieldLength", 
            "details": [] 
         "value": 0.25811607, 
         "description": "weight(name:国 in 0) [PerFieldSimilarity], result of:", 
         "details": [ 
          "value": 0.25811607, 
          "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", 
          "details": [ 
           "value": 0.2876821, 
           "description": "idf, computed as log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5)) from:", 
           "details": [ 
            "value": 1, 
            "description": "docFreq", 
            "details": [] 
            "value": 1, 
            "description": "docCount", 
            "details": [] 
           "value": 0.89722675, 
           "description": "tfNorm, computed as (freq * (k1 + 1))/(freq + k1 * (1 - b + b * fieldLength/avgFieldLength)) from:", 
           "details": [ 
            "value": 1, 
            "description": "termFreq=1.0", 
            "details": [] 
            "value": 1.2, 
            "description": "parameter k1", 
            "details": [] 
            "value": 0.75, 
            "description": "parameter b", 
            "details": [] 
            "value": 2, 
            "description": "avgFieldLength", 
            "details": [] 
            "value": 2.56, 
            "description": "fieldLength", 
            "details": [] 
       "value": 0, 
       "description": "match on required clause, product of:", 
       "details": [ 
        "value": 0, 
        "description": "# clause", 
        "details": [] 
        "value": 1, 
        "description": "status:[2 TO 2], product of:", 
        "details": [ 
         "value": 1, 
         "description": "boost", 
         "details": [] 
         "value": 1, 
         "description": "queryNorm", 
         "details": [] 
      "value": 0, 
      "description": "match on required clause, product of:", 
      "details": [ 
       "value": 0, 
       "description": "# clause", 
       "details": [] 
       "value": 1, 
       "description": "*:*, product of:", 
       "details": [ 
        "value": 1, 
        "description": "boost", 
        "details": [] 
        "value": 1, 
        "description": "queryNorm", 
        "details": [] 




    "search_type": "dfs_query_then_fetch", 
    "query": { 
     "bool": { 
      "must": { 
       "multi_match": { 
        "query": "美国", 
        "fields": [ 
      "filter": { 
       "term": { 
        "status": 2 



ありがとう、この記事を見てみましょう。 DFSクエリとフェッチがパフォーマンスヒットを引き起こす可能性があることが判明したので、設定、マッピング、またはその他の方法を変更することによってこの問題を解決する別の方法がありますか? – LCB


インデックスが小さく、スケーラビリティが必要ない場合は、単一のシャードでインデックスを作成できます。インデックスが大きい場合は、用語の頻度に大きな違いはなく、通常この問題に遭遇することはありません – Random
