Source code for pyterrier_alpha.transform
"""Decorators over transform functions."""
import functools
from typing import Callable, Dict, Iterable, Optional, Union
from warnings import warn
import pandas as pd
import pyterrier as pt
from packaging.version import Version
T_TRANSFORM_FN = Callable[[pd.DataFrame], pd.DataFrame]
T_TRANSFORM_ITER_FN = Callable[[Iterable[Dict]], Iterable[Dict]]
[docs]
def by_query(*,
add_ranks: bool = True,
batch_size: Optional[int] = None,
verbose: Optional[bool] = None,
) -> Union[Callable[[T_TRANSFORM_FN], T_TRANSFORM_FN], Callable[[T_TRANSFORM_ITER_FN], T_TRANSFORM_ITER_FN]]:
"""Decorates a function to transform a DataFrame query-by-query. Arguments match those in pt.apply closely.
Args:
verbose(bool): Whether to print progress bar. Default is to inspect the passed transformer for
a verbose member variable that is True.
add_ranks(bool): Whether to add ranks
batch_size(int): whether to apply fn on batches of rows or all that are received.
Example::
class MyTransformer(pt.Transformer):
@pta.transform.by_query()
def transform(self, inp: pd.DataFrame) -> pd.DataFrame:
# inp only contains a single query at a time.
It can also decorate ``transform_iter``, which is identifed by the function name
Example::
class MyIterTransformer(pt.Transformer):
@pta.transform.by_query(add_ranks=False)
def transform_iter(self, inp: Iterable[Dict]) -> Iterable[Dict]:
# inp only contains a single query at a time.
.. versionchanged:: 0.12.0 added support for ``transform_iter``
.. versionchanged:: 0.12.3 supports verbose kwarg
.. versionchanged:: 0.12.4 inspect the passed transformer for a verbose variable
"""
def _wrapper(fn: Union[T_TRANSFORM_FN]) -> Union[T_TRANSFORM_FN]:
apply_iter_supports_verbose = Version(pt.__version__) >= Version('0.12.1')
is_iter = fn.__name__ == 'transform_iter'
if is_iter:
assert not add_ranks, "add_ranks not supported for by_query with transform_iter; set add_ranks=False"
@functools.wraps(fn)
def _transform_iter(self: pt.Transformer, inp: Iterable[Dict]) -> Iterable[Dict]:
kwargs = {}
if verbose:
if apply_iter_supports_verbose:
kwargs['verbose'] = verbose
else:
warn(f'verbose ignored for pyterrier version {pt.__version__} (minimum 0.12.1 required)')
elif (verbose is None and apply_iter_supports_verbose and
hasattr(self, 'verbose') and getattr(self, 'verbose')):
kwargs['verbose'] = True
return pt.apply.by_query(
functools.partial(fn, self),
batch_size=batch_size,
iter=True,
**kwargs,
)(inp)
return _transform_iter
else:
@functools.wraps(fn)
def _transform(self: pt.Transformer, inp: pd.DataFrame) -> pd.DataFrame:
nonlocal verbose
if verbose is None:
verbose = hasattr(self, 'verbose') and getattr(self, 'verbose')
return pt.apply.by_query(
functools.partial(fn, self),
add_ranks=add_ranks,
batch_size=batch_size,
iter=False,
verbose=verbose,
)(inp)
return _transform
return _wrapper