Source code for suiteeval.context
import tempfile
from typing import Union, List, Literal, Optional
import pyterrier as pt
[docs]
class DatasetContext:
"""
Holds both a PyTerrier Dataset and a filesystem path (for indexes, caches, etc.).
"""
def __init__(self, dataset: pt.datasets.Dataset, path: Optional[str] = None):
"""
Args:
dataset: The pyterrier Dataset instance (must have `_irds_id`).
path: Optional filesystem path to use; if omitted, a temp dir
will be created for you.
"""
self.dataset = dataset
if path is None:
formatted = self.dataset._irds_id.replace("/", "-")
self.path = tempfile.mkdtemp(suffix=f"-{formatted}")
else:
self.path = path
[docs]
def text_loader(self, fields: Union[List[str], str, Literal["*"]] = "*"):
"""
Returns a IRDSTextLoader instance for retrieving document texts.
"""
return self.dataset.text_loader(fields=fields)
[docs]
def get_corpus_iter(self):
"""
Returns an iterator over the corpus documents.
"""
return self.dataset.get_corpus_iter()
__all__ = ["DatasetContext"]