Documentation Index
Fetch the complete documentation index at: https://fairforge.alquimia.ai/llms.txt
Use this file to discover all available pages before exploring further.
Generators API Reference
BaseGenerator
from fair_forge.generators import BaseGenerator
Constructor
def __init__(
self,
model: BaseChatModel,
use_structured_output: bool = False,
):
pass
generate_dataset()
async def generate_dataset(
self,
context_loader: BaseContextLoader,
source: str,
assistant_id: str,
num_queries_per_chunk: int = 3,
language: str = "english",
conversation_mode: bool = False,
selection_strategy: SelectionStrategy | None = None,
seed_examples: list[str] | None = None,
) -> list[Dataset]
Context Loaders
create_markdown_loader()
from fair_forge.generators import create_markdown_loader
def create_markdown_loader(
max_chunk_size: int = 2000,
min_chunk_size: int = 100,
header_levels: list[int] = [1, 2, 3],
) -> BaseContextLoader
BaseContextLoader
from fair_forge.generators.context_loaders.base import BaseContextLoader
class BaseContextLoader(ABC):
@abstractmethod
def load(self, source: str) -> list[ContentChunk]:
pass
ContentChunk
class ContentChunk:
chunk_id: str
content: str
metadata: dict
Selection Strategies
SequentialStrategy
from fair_forge.generators import SequentialStrategy
class SequentialStrategy:
"""Process all chunks sequentially into a single dataset."""
pass
RandomSamplingStrategy
from fair_forge.generators import RandomSamplingStrategy
class RandomSamplingStrategy:
def __init__(
self,
num_samples: int,
chunks_per_sample: int,
seed: int | None = None,
):
pass
Generated Output
Generated batches include metadata in agentic:
batch.agentic = {
"difficulty": str, # "easy", "medium", "hard"
"query_type": str, # "factual", "inferential", "comparative", "application"
"chunk_id": str, # Source chunk ID
"turn_number": int, # (conversation mode) Turn number
"builds_on": str, # (conversation mode) Previous query reference
}