from fair_forge.metrics.humanity import Humanityfrom fair_forge.core.retriever import Retrieverfrom fair_forge.schemas.common import Dataset, Batchimport numpy as npclass HumanityRetriever(Retriever): def load_dataset(self) -> list[Dataset]: return [ Dataset( session_id="humanity-eval-001", assistant_id="empathetic-bot", language="english", context="You are a supportive, empathetic assistant.", conversation=[ Batch( qa_id="q1", query="I just got promoted at work!", assistant="That's wonderful news! Congratulations on your well-deserved promotion. Your hard work has clearly paid off, and I'm thrilled for your success!", ground_truth_assistant="Express joy and congratulations enthusiastically.", ), Batch( qa_id="q2", query="I'm worried about my exam tomorrow.", assistant="I understand your anxiety about the exam. It's natural to feel nervous. Remember to take deep breaths and trust in your preparation. You've got this!", ground_truth_assistant="Acknowledge worry, provide reassurance and support.", ), Batch( qa_id="q3", query="Tell me about machine learning algorithms.", assistant="Machine learning algorithms are mathematical methods that learn patterns from data.", ground_truth_assistant="Provide factual, educational content.", ), ] ) ]# Run evaluationmetrics = Humanity.run( HumanityRetriever, verbose=True,)# Analyze resultsprint("Humanity Evaluation Results")print("=" * 60)emotions = ["anger", "anticipation", "disgust", "fear", "joy", "sadness", "surprise", "trust"]for metric in metrics: print(f"\nQA ID: {metric.qa_id}") print(f"Emotional Entropy: {metric.humanity_assistant_emotional_entropy:.4f}") print(f"Ground Truth Spearman: {metric.humanity_ground_truth_spearman:.4f}") print("Emotion Distribution:") for emotion in emotions: value = getattr(metric, f"humanity_assistant_{emotion}") if value > 0: bar = "█" * int(value * 20) print(f" {emotion.capitalize():12}: {value:.3f} {bar}")# Summaryprint("\n" + "=" * 60)print("Summary")print("=" * 60)avg_entropy = np.mean([m.humanity_assistant_emotional_entropy for m in metrics])avg_spearman = np.mean([m.humanity_ground_truth_spearman for m in metrics])print(f"Average Emotional Entropy: {avg_entropy:.4f}")print(f"Average Spearman Correlation: {avg_spearman:.4f}")
import matplotlib.pyplot as pltimport numpy as npemotions = ["anger", "anticipation", "disgust", "fear", "joy", "sadness", "surprise", "trust"]# Calculate average emotion distributionsavg_emotions = {e: 0 for e in emotions}for metric in metrics: for emotion in emotions: avg_emotions[emotion] += getattr(metric, f"humanity_assistant_{emotion}")for emotion in emotions: avg_emotions[emotion] /= len(metrics)# Plotfig, ax = plt.subplots(figsize=(10, 6))bars = ax.bar(emotions, [avg_emotions[e] for e in emotions], color='steelblue')ax.set_xlabel('Emotion')ax.set_ylabel('Average Distribution')ax.set_title('Average Emotion Distribution Across Responses')plt.xticks(rotation=45)plt.tight_layout()plt.show()
import matplotlib.pyplot as pltentropies = [m.humanity_assistant_emotional_entropy for m in metrics]qa_ids = [m.qa_id for m in metrics]fig, ax = plt.subplots(figsize=(10, 5))ax.bar(qa_ids, entropies, color='coral')ax.set_xlabel('QA ID')ax.set_ylabel('Emotional Entropy')ax.set_title('Emotional Entropy per Response')ax.axhline(y=np.mean(entropies), color='red', linestyle='--', label=f'Mean: {np.mean(entropies):.2f}')ax.legend()plt.tight_layout()plt.show()