Source code for suiteeval.context

import tempfile
from typing import Union, List, Literal, Optional
import pyterrier as pt


[docs] class DatasetContext: """ Holds both a PyTerrier Dataset and a filesystem path (for indexes, caches, etc.). """ def __init__( self, dataset: pt.datasets.Dataset, path: Optional[str] = None, ): """ Args: dataset: The pyterrier Dataset instance (must have `_irds_id`). path: Optional filesystem path to use; if omitted, a temp dir will be created for you. """ self.dataset = dataset if path is None: formatted = self.dataset._irds_id.replace("/", "-") self.path = tempfile.mkdtemp(suffix=f"-{formatted}") else: self.path = path
[docs] def text_loader(self, fields: Union[List[str], str, Literal["*"]] = "*"): """ Returns a IRDSTextLoader instance for retrieving document texts. Args: fields: Fields to load; can be a list of field names, a single field name, or "*" for all fields. Returns: An IRDSTextLoader instance. """ return self.dataset.text_loader(fields=fields)
[docs] def get_corpus_iter(self, **iter_kwargs): """ Returns an iterator over the corpus documents. Args: **iter_kwargs: Keyword arguments passed to `get_corpus_iter`. """ return self.dataset.get_corpus_iter(**iter_kwargs)
__all__ = ["DatasetContext"]