from fair_forge.generators import BaseGenerator, create_markdown_loader
from langchain_groq import ChatGroq
# Create loader
loader = create_markdown_loader(
max_chunk_size=2000,
header_levels=[1, 2, 3],
)
# Preview chunks
chunks = loader.load("./docs/api.md")
print(f"Will generate from {len(chunks)} chunks:")
for chunk in chunks:
print(f" - {chunk.chunk_id}: {len(chunk.content)} chars")
# Use with generator
model = ChatGroq(model="llama-3.1-8b-instant")
generator = BaseGenerator(model=model, use_structured_output=True)
datasets = await generator.generate_dataset(
context_loader=loader,
source="./docs/api.md",
assistant_id="api-assistant",
num_queries_per_chunk=3,
)