"""Module providing the Rank Biased Overlap (RBO) measure."""fromtypingimportCallable,Iterable,Optional,Tupleimportir_measuresimportpandasaspd
[docs]defRBO(other:pd.DataFrame,p:float=0.99,*,name:Optional[str]=None)->ir_measures.Measure:# noqa: N802"""Create an RBO measure from a dataframe of rankings. .. versionadded:: 0.3.0 .. versionchanged:: 0.3.1 Fixed bug where ``p`` wasn't honored. """returnir_measures.define(_rbo_wrapper(other,p=p),name=nameorf'RBO(p={p})')
def_rbo_wrapper(a:pd.DataFrame,p:float=0.99)->Callable:# adapted from https://github.com/terrierteam/ir_measures/blob/main/ir_measures/providers/compat_provider.pya_q_col='query_id'if'query_id'ina.columnselse'qid'a_d_col='doc_id'if'doc_id'ina.columnselse'docno'a=a.sort_values(by=[a_q_col,'score'],ascending=False)a=dict(iter(a.groupby(a_q_col)))definner(qrels:pd.DataFrame,b:pd.DataFrame)->Iterable[Tuple[str,float]]:# qrels ignoredb_q_col='query_id'if'query_id'inb.columnselse'qid'b_d_col='doc_id'if'doc_id'inb.columnselse'docno'res={}b=b.sort_values(by=[b_q_col,'score'],ascending=False)b=dict(iter(b.groupby(b_q_col)))forqidinset(a.keys())|set(b.keys()):ranking=list(a[qid][a_d_col])ifqidinaelse[]ideal=list(b[qid][b_d_col])ifqidinbelse[]ranking_set=set()ideal_set=set()score=0.0normalizer=0.0weight=1.0foriinrange(1000):ifi<len(ranking):ranking_set.add(ranking[i])ifi<len(ideal):ideal_set.add(ideal[i])score+=weight*len(ideal_set.intersection(ranking_set))/(i+1)normalizer+=weightweight*=pres[qid]=score/normalizerreturnres.items()returninner
[docs]defrbo(a:pd.DataFrame,b:pd.DataFrame,p:float=0.99)->Iterable[Tuple[str,float]]:"""Calculate the Rank Biased Overlap between two rankings. .. versionadded:: 0.3.0 .. versionchanged:: 0.12.5 Fixed bug where b wasn't passed properly """return_rbo_wrapper(a,p)(None,b)