You must format prompts according to your model’s requirements. The explainability module does not handle prompt formatting to avoid coupling with specific LLM formats.
Copy
messages = [ {"role": "system", "content": "Answer concisely."}, {"role": "user", "content": "What is photosynthesis?"}]# Use the tokenizer's chat templateprompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True)
from fair_forge.explainability import AttributionExplainer, Lime, Granularityexplainer = AttributionExplainer( model=model, tokenizer=tokenizer, default_method=Lime, default_granularity=Granularity.WORD, verbose=True,)result = explainer.explain( prompt=prompt, target="Photosynthesis is the process plants use to convert sunlight into energy.",)
# Get top contributing wordsprint("Top 10 Most Important Words:")for attr in result.get_top_k(10): print(f" '{attr.text}': {attr.score:+.4f}")# Access all attributionsfor attr in result.attributions: print(f"Position {attr.position}: '{attr.text}' = {attr.score:.4f}")
# Prepare batch (prompts must be pre-formatted)items = [ ( tokenizer.apply_chat_template( [{"role": "user", "content": "What is AI?"}], tokenize=False, add_generation_prompt=True ), "AI is artificial intelligence." ), ( tokenizer.apply_chat_template( [{"role": "user", "content": "What is ML?"}], tokenize=False, add_generation_prompt=True ), "ML is machine learning, a subset of AI." ),]# Process batchbatch_results = explainer.explain_batch(items)print(f"Processed {len(batch_results)} items")print(f"Total time: {batch_results.total_compute_time_seconds:.2f}s")for i, result in enumerate(batch_results): print(f"\nItem {i+1}: Top words = {[a.text for a in result.get_top_k(3)]}")
class TokenAttribution(BaseModel): text: str # Token/word/sentence text score: float # Attribution score (can be negative) position: int # Position in sequence normalized_score: float | None # Score normalized to [0, 1]
# Get top K most important tokenstop_tokens = result.get_top_k(10)# Get all attributions sorted by importancesorted_attrs = result.top_attributions# Export for visualizationviz_data = result.to_dict_for_visualization()# Returns: {"tokens": [...], "scores": [...], "normalized_scores": [...]}# Export as dict (JSON-serializable)result_dict = result.model_dump()
# Get HTML stringhtml = explainer.visualize(result, return_html=True)# Use in web apps, save to file, etc.with open("attribution.html", "w") as f: f.write(html)
print("Analysis of attribution results:")print("=" * 50)# Positive contributorspositive = [a for a in result.attributions if a.score > 0]print(f"\nPositive contributors ({len(positive)} tokens):")for attr in sorted(positive, key=lambda x: x.score, reverse=True)[:5]: print(f" '{attr.text}': +{attr.score:.4f}")# Negative contributorsnegative = [a for a in result.attributions if a.score < 0]print(f"\nNegative contributors ({len(negative)} tokens):")for attr in sorted(negative, key=lambda x: x.score)[:5]: print(f" '{attr.text}': {attr.score:.4f}")