Source code for pyterrier_anserini._similarity
from enum import Enum
from typing import Dict, Optional
from pyterrier_anserini import J
DEFAULT_WMODEL_ARGS = {
'bm25.k1': 0.9,
'bm25.b': 0.4,
'qld.mu': 1000.,
}
[docs]
class AnseriniSimilarity(Enum):
"""An enum representing the similarity functions available in Anserini."""
bm25 = 'BM25'
qld = 'QLD'
tfidf = 'TFIDF'
impact = 'Impact'
[docs]
def to_lucene_sim(self, sim_args: Optional[Dict[str, float]] = None):
"""Provides a Lucene similarity object that represents this similarity functions, including provided arguments.
Args:
sim_args: The arguments of this similarity function. Default values will be used when they are not provided.
Returns:
A ``pyjnius`` binding to a ``org.apache.lucene.search.similarities.Similarity`` object.
"""
args = {}
args.update(DEFAULT_WMODEL_ARGS)
if sim_args is not None:
args.update(sim_args)
if self == AnseriniSimilarity.bm25:
return J.BM25Similarity(args['bm25.k1'], args['bm25.b'])
elif self == AnseriniSimilarity.qld:
return J.LMDirichletSimilarity(args['qld.mu'])
elif self == AnseriniSimilarity.tfidf:
return J.ClassicSimilarity()
if self == AnseriniSimilarity.impact:
return J.ImpactSimilarity()
raise ValueError(f"similarity {self} is not supported")
def __repr__(self):
return repr(self.value)