検索結果のElasticsearch関連性についての質問

私は中国語のElasticsearchで簡単なデモを実装しようとしています。しかし、検索結果の関連性にはいくつか問題があります。検索結果のElasticsearch関連性についての質問

私はマッピングを持つ新しいインデックス作成：2つのレコードが "中国" と "美国" のマッチングがあります

{ 
    "query" : { 
     "bool" : { 
      "must" : { 
       "multi_match" : { 
        "query" : "美国", 
        "fields" : [ "name", "synonyms" ] 
       } 
      }, 
      "filter" : { 
       "term" : { 
        "status" : 2 
       } 
      } 
     } 
    } 
}

：クエリと

{ 
    "tag": { 
     "mappings": { 
      "tag": { 
       "properties": { 
        "name": { 
         "type": "text", 
         "analyzer": "standard" 
        }, 
        "note": { 
         "type": "text", 
         "analyzer": "standard" 
        }, 
        "status": { 
         "type": "integer" 
        }, 
        "synonyms": { 
         "type": "text", 
         "analyzer": "standard" 
        } 
       } 
      } 
     } 
    } 
}

とリクエストボディを "美国"クエリ。しかし、記録「中国」は高い得点を得ました。レスポンスJSONは以下の通りです：

{ 
    "took": 2, 
    "timed_out": false, 
    "_shards": { 
     "total": 5, 
     "successful": 5, 
     "failed": 0 
    }, 
    "hits": { 
     "total": 2, 
     "max_score": 0.7373906, 
     "hits": [ { 
      "_index": "tag", 
      "_type": "tag", 
      "_id": "5482361185636870", 
      "_score": 0.7373906, 
      "_source": { 
       "status": 2, 
       "name": "中国", 
       "note": "", 
       "synonyms": [] 
      } 
     }, { 
      "_index": "tag", 
      "_type": "tag", 
      "_id": "5474649504748034", 
      "_score": 0.53484553, 
      "_source": { 
       "status": 2, 
       "name": "美国", 
       "note": "", 
       "synonyms": [] 
      } 
     } ] 
    } 
}

「中国」のレコードは0.7373906を得たが、「美国」のレコードがのみ0.53484553を得ました。説明と

結果：

{ 
    "hits": [ 
    { 
     "_shard": "[tag][0]", 
     "_node": "Wh9qH0bcTAaVNrsP1Aiyxg", 
     "_index": "tag", 
     "_type": "tag", 
     "_id": "5482361185636870", 
     "_score": 0.7373906, 
     "_source": { 
     "status": 2, 
     "name": "中国", 
     "note": "", 
     "synonyms": [] 
     }, 
     "_explanation": { 
     "value": 0.73739064, 
     "description": "sum of:", 
     "details": [ 
      { 
      "value": 0.73739064, 
      "description": "sum of:", 
      "details": [ 
       { 
       "value": 0.73739064, 
       "description": "max of:", 
       "details": [ 
        { 
        "value": 0.73739064, 
        "description": "sum of:", 
        "details": [ 
         { 
         "value": 0.73739064, 
         "description": "weight(name:国 in 0) [PerFieldSimilarity], result of:", 
         "details": [ 
          { 
          "value": 0.73739064, 
          "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", 
          "details": [ 
           { 
           "value": 0.6931472, 
           "description": "idf, computed as log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5)) from:", 
           "details": [ 
            { 
            "value": 1, 
            "description": "docFreq", 
            "details": [] 
            }, 
            { 
            "value": 2, 
            "description": "docCount", 
            "details": [] 
            } 
           ] 
           }, 
           { 
           "value": 1.0638298, 
           "description": "tfNorm, computed as (freq * (k1 + 1))/(freq + k1 * (1 - b + b * fieldLength/avgFieldLength)) from:", 
           "details": [ 
            { 
            "value": 1, 
            "description": "termFreq=1.0", 
            "details": [] 
            }, 
            { 
            "value": 1.2, 
            "description": "parameter k1", 
            "details": [] 
            }, 
            { 
            "value": 0.75, 
            "description": "parameter b", 
            "details": [] 
            }, 
            { 
            "value": 3, 
            "description": "avgFieldLength", 
            "details": [] 
            }, 
            { 
            "value": 2.56, 
            "description": "fieldLength", 
            "details": [] 
            } 
           ] 
           } 
          ] 
          } 
         ] 
         } 
        ] 
        } 
       ] 
       }, 
       { 
       "value": 0, 
       "description": "match on required clause, product of:", 
       "details": [ 
        { 
        "value": 0, 
        "description": "# clause", 
        "details": [] 
        }, 
        { 
        "value": 1, 
        "description": "status:[2 TO 2], product of:", 
        "details": [ 
         { 
         "value": 1, 
         "description": "boost", 
         "details": [] 
         }, 
         { 
         "value": 1, 
         "description": "queryNorm", 
         "details": [] 
         } 
        ] 
        } 
       ] 
       } 
      ] 
      }, 
      { 
      "value": 0, 
      "description": "match on required clause, product of:", 
      "details": [ 
       { 
       "value": 0, 
       "description": "# clause", 
       "details": [] 
       }, 
       { 
       "value": 1, 
       "description": "*:*, product of:", 
       "details": [ 
        { 
        "value": 1, 
        "description": "boost", 
        "details": [] 
        }, 
        { 
        "value": 1, 
        "description": "queryNorm", 
        "details": [] 
        } 
       ] 
       } 
      ] 
      } 
     ] 
     } 
    }, 
    { 
     "_shard": "[tag][4]", 
     "_node": "Wh9qH0bcTAaVNrsP1Aiyxg", 
     "_index": "tag", 
     "_type": "tag", 
     "_id": "5474649504748034", 
     "_score": 0.51623213, 
     "_source": { 
     "status": 2, 
     "name": "美国", 
     "note": "", 
     "synonyms": [] 
     }, 
     "_explanation": { 
     "value": 0.51623213, 
     "description": "sum of:", 
     "details": [ 
      { 
      "value": 0.51623213, 
      "description": "sum of:", 
      "details": [ 
       { 
       "value": 0.51623213, 
       "description": "max of:", 
       "details": [ 
        { 
        "value": 0.51623213, 
        "description": "sum of:", 
        "details": [ 
         { 
         "value": 0.25811607, 
         "description": "weight(name:美 in 0) [PerFieldSimilarity], result of:", 
         "details": [ 
          { 
          "value": 0.25811607, 
          "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", 
          "details": [ 
           { 
           "value": 0.2876821, 
           "description": "idf, computed as log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5)) from:", 
           "details": [ 
            { 
            "value": 1, 
            "description": "docFreq", 
            "details": [] 
            }, 
            { 
            "value": 1, 
            "description": "docCount", 
            "details": [] 
            } 
           ] 
           }, 
           { 
           "value": 0.89722675, 
           "description": "tfNorm, computed as (freq * (k1 + 1))/(freq + k1 * (1 - b + b * fieldLength/avgFieldLength)) from:", 
           "details": [ 
            { 
            "value": 1, 
            "description": "termFreq=1.0", 
            "details": [] 
            }, 
            { 
            "value": 1.2, 
            "description": "parameter k1", 
            "details": [] 
            }, 
            { 
            "value": 0.75, 
            "description": "parameter b", 
            "details": [] 
            }, 
            { 
            "value": 2, 
            "description": "avgFieldLength", 
            "details": [] 
            }, 
            { 
            "value": 2.56, 
            "description": "fieldLength", 
            "details": [] 
            } 
           ] 
           } 
          ] 
          } 
         ] 
         }, 
         { 
         "value": 0.25811607, 
         "description": "weight(name:国 in 0) [PerFieldSimilarity], result of:", 
         "details": [ 
          { 
          "value": 0.25811607, 
          "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", 
          "details": [ 
           { 
           "value": 0.2876821, 
           "description": "idf, computed as log(1 + (docCount - docFreq + 0.5)/(docFreq + 0.5)) from:", 
           "details": [ 
            { 
            "value": 1, 
            "description": "docFreq", 
            "details": [] 
            }, 
            { 
            "value": 1, 
            "description": "docCount", 
            "details": [] 
            } 
           ] 
           }, 
           { 
           "value": 0.89722675, 
           "description": "tfNorm, computed as (freq * (k1 + 1))/(freq + k1 * (1 - b + b * fieldLength/avgFieldLength)) from:", 
           "details": [ 
            { 
            "value": 1, 
            "description": "termFreq=1.0", 
            "details": [] 
            }, 
            { 
            "value": 1.2, 
            "description": "parameter k1", 
            "details": [] 
            }, 
            { 
            "value": 0.75, 
            "description": "parameter b", 
            "details": [] 
            }, 
            { 
            "value": 2, 
            "description": "avgFieldLength", 
            "details": [] 
            }, 
            { 
            "value": 2.56, 
            "description": "fieldLength", 
            "details": [] 
            } 
           ] 
           } 
          ] 
          } 
         ] 
         } 
        ] 
        } 
       ] 
       }, 
       { 
       "value": 0, 
       "description": "match on required clause, product of:", 
       "details": [ 
        { 
        "value": 0, 
        "description": "# clause", 
        "details": [] 
        }, 
        { 
        "value": 1, 
        "description": "status:[2 TO 2], product of:", 
        "details": [ 
         { 
         "value": 1, 
         "description": "boost", 
         "details": [] 
         }, 
         { 
         "value": 1, 
         "description": "queryNorm", 
         "details": [] 
         } 
        ] 
        } 
       ] 
       } 
      ] 
      }, 
      { 
      "value": 0, 
      "description": "match on required clause, product of:", 
      "details": [ 
       { 
       "value": 0, 
       "description": "# clause", 
       "details": [] 
       }, 
       { 
       "value": 1, 
       "description": "*:*, product of:", 
       "details": [ 
        { 
        "value": 1, 
        "description": "boost", 
        "details": [] 
        }, 
        { 
        "value": 1, 
        "description": "queryNorm", 
        "details": [] 
        } 
       ] 
       } 
      ] 
      } 
     ] 
     } 
    } 
    ] 
}

出典

2017-04-10 LCB

あなたの指数はわずか数の文書が含まれていると、彼らは別の破片に陥るようです。各シャドーには独自のターム周波数があります。デフォルトでは、ElasticSearchはこれらのローカル値を使用します。しかし、あなたはsearch_type=dfs_query_then_fetchクエリ文字列パラメータを指定することで、この動作を変更したり、この

{ 
    "search_type": "dfs_query_then_fetch", 
    "query": { 
     "bool": { 
      "must": { 
       "multi_match": { 
        "query": "美国", 
        "fields": [ 
         "name", 
         "synonyms" 
        ] 
       } 
      }, 
      "filter": { 
       "term": { 
        "status": 2 
       } 
      } 
     } 
    } 
}

のように対応する身体のフィールドを追加することができますhttps://www.elastic.co/blog/understanding-query-then-fetch-vs-dfs-query-then-fetch

出典

2017-04-10 21:13:11 Random

ありがとう、この記事を見てみましょう。 DFSクエリとフェッチがパフォーマンスヒットを引き起こす可能性があることが判明したので、設定、マッピング、またはその他の方法を変更することによってこの問題を解決する別の方法がありますか？ – LCB

インデックスが小さく、スケーラビリティが必要ない場合は、単一のシャードでインデックスを作成できます。インデックスが大きい場合は、用語の頻度に大きな違いはなく、通常この問題に遭遇することはありません – Random

検索結果のElasticsearch関連性についての質問

答えて

関連する問題