Skip to main content

Retriever

The Retriever class is the entry point for loading your conversation data into Fair Forge. Every evaluation requires a custom retriever that implements load_dataset(). Fair Forge supports three iteration strategies that control how data is consumed — from loading everything upfront to yielding individual QA pairs on demand.

Iteration Modes

Choose the mode that fits your dataset size and processing requirements:
ModeValueReturnsUse When
FULL_DATASET"full_dataset"list[Dataset]Default. Dataset fits comfortably in memory
STREAM_SESSIONS"stream_sessions"Iterator[Dataset]Many sessions, avoid loading all at once
STREAM_BATCHES"stream_batches"Iterator[StreamedBatch]QA pairs arrive independently (e.g. from a queue)
The default mode is FULL_DATASET. To use streaming, override the iteration_level property.

Interface

from abc import ABC, abstractmethod
from collections.abc import Iterator
from fair_forge.schemas.common import Dataset, IterationLevel, StreamedBatch

class Retriever(ABC):
    def __init__(self, **kwargs):
        self.kwargs = kwargs

    @property
    def iteration_level(self) -> IterationLevel:
        return IterationLevel.FULL_DATASET  # default

    @abstractmethod
    def load_dataset(self) -> list[Dataset] | Iterator[Dataset] | Iterator[StreamedBatch]:
        ...
The return type of load_dataset() must be consistent with iteration_level. Returning an Iterator with the default FULL_DATASET level will raise a ValueError at runtime.

Full Dataset Mode

The simplest and most common mode. Load all sessions into a list and return it.

Loading from JSON

import json
from pathlib import Path
from fair_forge.core.retriever import Retriever
from fair_forge.schemas.common import Dataset

class JSONRetriever(Retriever):
    def __init__(self, file_path: str = "data.json", **kwargs):
        super().__init__(**kwargs)
        self.file_path = Path(file_path)

    def load_dataset(self) -> list[Dataset]:
        with open(self.file_path) as f:
            return [Dataset.model_validate(item) for item in json.load(f)]

# Usage — kwargs are forwarded to the retriever constructor
metrics = Toxicity.run(
    JSONRetriever,
    file_path="conversations.json",
    group_prototypes={...},
)

Loading from a Database

import sqlite3
from fair_forge.core.retriever import Retriever
from fair_forge.schemas.common import Dataset, Batch

class DatabaseRetriever(Retriever):
    def __init__(self, db_path: str, assistant_id: str, **kwargs):
        super().__init__(**kwargs)
        self.db_path = db_path
        self.assistant_id = assistant_id

    def load_dataset(self) -> list[Dataset]:
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute(
            "SELECT DISTINCT session_id, context, language FROM conversations WHERE assistant_id = ?",
            (self.assistant_id,),
        )

        datasets = []
        for session_id, context, language in cursor.fetchall():
            cursor.execute(
                "SELECT qa_id, query, assistant, ground_truth FROM conversations WHERE session_id = ? ORDER BY created_at",
                (session_id,),
            )
            batches = [
                Batch(qa_id=row[0], query=row[1], assistant=row[2], ground_truth_assistant=row[3])
                for row in cursor.fetchall()
            ]
            datasets.append(Dataset(
                session_id=session_id,
                assistant_id=self.assistant_id,
                language=language,
                context=context,
                conversation=batches,
            ))

        conn.close()
        return datasets

Loading from an API

import httpx
from fair_forge.core.retriever import Retriever
from fair_forge.schemas.common import Dataset

class APIRetriever(Retriever):
    def __init__(self, api_url: str, api_key: str, **kwargs):
        super().__init__(**kwargs)
        self.api_url = api_url
        self.api_key = api_key

    def load_dataset(self) -> list[Dataset]:
        with httpx.Client() as client:
            response = client.get(
                f"{self.api_url}/conversations",
                headers={"Authorization": f"Bearer {self.api_key}"},
            )
        return [Dataset.model_validate(d) for d in response.json()["datasets"]]

Loading from CSV

import csv
from collections import defaultdict
from fair_forge.core.retriever import Retriever
from fair_forge.schemas.common import Dataset, Batch

class CSVRetriever(Retriever):
    def __init__(self, file_path: str, **kwargs):
        super().__init__(**kwargs)
        self.file_path = file_path

    def load_dataset(self) -> list[Dataset]:
        sessions: dict[str, list] = defaultdict(list)
        with open(self.file_path, newline="") as f:
            for row in csv.DictReader(f):
                sessions[row["session_id"]].append(row)

        datasets = []
        for session_id, rows in sessions.items():
            batches = [
                Batch(
                    qa_id=row["qa_id"],
                    query=row["query"],
                    assistant=row["assistant"],
                    ground_truth_assistant=row.get("ground_truth", ""),
                )
                for row in rows
            ]
            datasets.append(Dataset(
                session_id=session_id,
                assistant_id=rows[0].get("assistant_id", "unknown"),
                language=rows[0].get("language", "english"),
                context=rows[0].get("context", ""),
                conversation=batches,
            ))

        return datasets

Multi-Assistant Retriever (BestOf)

BestOf expects one Dataset per assistant, all answering the same questions. The retriever loads them — BestOf handles the tournament logic automatically.
import json
from pathlib import Path
from fair_forge.core.retriever import Retriever
from fair_forge.schemas.common import Dataset

class MultiAssistantRetriever(Retriever):
    """Each Dataset in the file represents one assistant answering the same questions."""

    def __init__(self, file_path: str = "dataset_bestof.json", **kwargs):
        super().__init__(**kwargs)
        self.file_path = Path(file_path)

    def load_dataset(self) -> list[Dataset]:
        with open(self.file_path) as f:
            return [Dataset.model_validate(item) for item in json.load(f)]

# Usage — BestOf only needs model and criteria; the retriever provides the assistants
metrics = BestOf.run(
    MultiAssistantRetriever,
    model=judge_model,
    use_structured_output=True,
    criteria="Overall response quality, helpfulness, and clarity",
)
The JSON file must contain one entry per assistant, all sharing the same qa_id values so BestOf can pair their responses:
[
  {
    "session_id": "eval-session",
    "assistant_id": "assistant_alpha",
    "language": "english",
    "context": "",
    "conversation": [
      {"qa_id": "q1", "query": "What are the benefits of renewable energy?", "assistant": "...", "ground_truth_assistant": ""}
    ]
  },
  {
    "session_id": "eval-session",
    "assistant_id": "assistant_beta",
    "language": "english",
    "context": "",
    "conversation": [
      {"qa_id": "q1", "query": "What are the benefits of renewable energy?", "assistant": "...", "ground_truth_assistant": ""}
    ]
  }
]

Best Practices

Declare parameters explicitly for clarity and IDE support:
# Preferred
class MyRetriever(Retriever):
    def __init__(self, file_path: str, assistant_id: str = "default", **kwargs):
        super().__init__(**kwargs)
        self.file_path = file_path
        self.assistant_id = assistant_id
Use model_validate to leverage Pydantic’s validation and defaults:
def load_dataset(self) -> list[Dataset]:
    with open(self.file_path) as f:
        return [Dataset.model_validate(item) for item in json.load(f)]
Fields with defaults (language="english") are filled in automatically.
If your dataset has thousands of sessions or QA pairs, prefer streaming to avoid loading everything into memory. See the Streaming page.

Next Steps

Streaming Retrievers

Handle large datasets with stream_sessions and stream_batches

Dataset & Batch

Understand the data structures used by all retrievers