Source code for pyterrier_alpha.rbo

"""Module providing the Rank Biased Overlap (RBO) measure."""

from typing import Callable, Iterable, Optional, Tuple

import ir_measures
import pandas as pd


[docs] def RBO(other: pd.DataFrame, p: float = 0.99, *, name: Optional[str] = None) -> ir_measures.Measure: # noqa: N802 """Create an RBO measure from a dataframe of rankings. .. versionadded:: 0.3.0 .. versionchanged:: 0.3.1 Fixed bug where ``p`` wasn't honored. """ return ir_measures.define(_rbo_wrapper(other, p=p), name=name or f'RBO(p={p})')
def _rbo_wrapper(a: pd.DataFrame, p: float = 0.99) -> Callable: # adapted from https://github.com/terrierteam/ir_measures/blob/main/ir_measures/providers/compat_provider.py a_q_col = 'query_id' if 'query_id' in a.columns else 'qid' a_d_col = 'doc_id' if 'doc_id' in a.columns else 'docno' a = a.sort_values(by=[a_q_col, 'score'], ascending=False) a = dict(iter(a.groupby(a_q_col))) def inner(qrels: pd.DataFrame, b: pd.DataFrame) -> Iterable[Tuple[str, float]]: # qrels ignored b_q_col = 'query_id' if 'query_id' in b.columns else 'qid' b_d_col = 'doc_id' if 'doc_id' in b.columns else 'docno' res = {} b = b.sort_values(by=[b_q_col, 'score'], ascending=False) b = dict(iter(b.groupby(b_q_col))) for qid in set(a.keys()) | set(b.keys()): ranking = list(a[qid][a_d_col]) if qid in a else [] ideal = list(b[qid][b_d_col]) if qid in b else [] ranking_set = set() ideal_set = set() score = 0.0 normalizer = 0.0 weight = 1.0 for i in range(1000): if i < len(ranking): ranking_set.add(ranking[i]) if i < len(ideal): ideal_set.add(ideal[i]) score += weight*len(ideal_set.intersection(ranking_set))/(i + 1) normalizer += weight weight *= p res[qid] = score/normalizer return res.items() return inner
[docs] def rbo(a: pd.DataFrame, b: pd.DataFrame, p: float = 0.99) -> Iterable[Tuple[str, float]]: """Calculate the Rank Biased Overlap between two rankings. .. versionadded:: 0.3.0 .. versionchanged:: 0.12.5 Fixed bug where b wasn't passed properly """ return _rbo_wrapper(a, p)(None, b)