好きなことをする類似性を実装する必要があります。 Similarity
に直接実装することもできますが、ClassicSimilarity
(バージョン5.4より前はDefaultSimilarity
)をコピーして、スコアに影響を与えたくないものを除外する(つまり、定数を返す)方が簡単です。たとえば、クエリ内の用語の頻度を返す単純な実装を次に示します。
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef;
public class SimpleSimilarity extends TFIDFSimilarity {
//Comments describe briefly what these methods do in the *standard* implementation.
//Not what they do in this implementation (which, for most of them, is nothing at all)
public SimpleSimilarity() {}
//boosts results which match more query terms
@Override
public float coord(int overlap, int maxOverlap) {
return 1f;
}
//constant per query, normalizes scores somewhat based on query
@Override
public float queryNorm(float sumOfSquaredWeights) {
return 1f;
}
//Norms should be disabled when using this similarity
//They are useless to it, and would just be wasted space.
@Override
public final long encodeNormValue(float f) {
return 1L;
}
@Override
public final float decodeNormValue(long norm) {
return 1f;
}
//Weighs shorter fields more heavily
@Override
public float lengthNorm(FieldInvertState state) {
return 1f;
}
//Higher frequency terms (more matches) scored higher
@Override
public float tf(float freq) {
//return (float)Math.sqrt(freq); The standard tf impl
return freq;
}
//Scores closer matches higher when using a sloppy phrase query
@Override
public float sloppyFreq(int distance) {
return 1.0f;
}
//ClassicSimilarity doesn't really do much with payloads. This is unmodified
@Override
public float scorePayload(int doc, int start, int end, BytesRef payload) {
return 1f;
}
//Weigh matches on rarer terms more heavily.
@Override
public float idf(long docFreq, long numDocs) {
return 1f;
}
@Override
public String toString() {
return "SimpleSimilarity";
}
}