TraceMind / app.py
Mandark-droid
Fix screen navigation: DrillDown to Run Detail switching now works
659d404
raw
history blame
32 kB
"""
TraceMind-AI - Agent Evaluation Platform
Enterprise-grade AI agent evaluation with MCP integration
"""
import os
import pandas as pd
import gradio as gr
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Import data loader and components
from data_loader import create_data_loader_from_env
from components.leaderboard_table import generate_leaderboard_html
from components.analytics_charts import (
create_trends_plot,
create_performance_heatmap,
create_speed_accuracy_scatter,
create_cost_efficiency_scatter
)
from components.report_cards import generate_leaderboard_summary_card
from utils.navigation import Navigator, Screen
# Initialize data loader
data_loader = create_data_loader_from_env()
navigator = Navigator()
# Pre-load and cache the leaderboard data before building UI
print("πŸ“₯ Pre-loading leaderboard data from HuggingFace...")
leaderboard_df_cache = data_loader.load_leaderboard()
print(f"βœ… Loaded {len(leaderboard_df_cache)} evaluation runs")
# Global state (already populated)
# leaderboard_df_cache is now set
# Additional global state for navigation
current_selected_run = None
current_selected_trace = None
current_drilldown_df = None # Store currently displayed drilldown data
def load_leaderboard():
"""Load initial leaderboard data from cache"""
global leaderboard_df_cache
# Use pre-cached data (already loaded before UI build)
df = leaderboard_df_cache.copy()
html = generate_leaderboard_html(df)
# Get filter choices
models = ["All Models"] + sorted(df['model'].unique().tolist())
return html, gr.update(choices=models), gr.update(choices=models)
def refresh_leaderboard():
"""Refresh leaderboard data from source (for reload button)"""
global leaderboard_df_cache
print("πŸ”„ Refreshing leaderboard data...")
df = data_loader.refresh_leaderboard() # Clears cache and reloads
leaderboard_df_cache = df.copy()
print(f"βœ… Refreshed {len(df)} evaluation runs")
html = generate_leaderboard_html(df)
models = ["All Models"] + sorted(df['model'].unique().tolist())
return html, gr.update(choices=models), gr.update(choices=models)
def apply_filters(model, provider, sort_by_col):
"""Apply filters and sorting to leaderboard"""
global leaderboard_df_cache
df = leaderboard_df_cache.copy() if leaderboard_df_cache is not None else data_loader.load_leaderboard()
# Apply filters
if model != "All Models":
df = df[df['model'] == model]
if provider != "All":
df = df[df['provider'] == provider]
# Sort
df = df.sort_values(by=sort_by_col, ascending=False)
html = generate_leaderboard_html(df, sort_by_col)
return html
def load_drilldown(agent_type, provider):
"""Load drilldown data with filters"""
global current_drilldown_df
try:
df = data_loader.load_leaderboard()
if df.empty:
current_drilldown_df = pd.DataFrame()
return pd.DataFrame()
if agent_type != "All" and 'agent_type' in df.columns:
df = df[df['agent_type'] == agent_type]
if provider != "All" and 'provider' in df.columns:
df = df[df['provider'] == provider]
# IMPORTANT: Store the FULL dataframe in global state (with ALL columns)
# This ensures the event handler has access to results_dataset, traces_dataset, etc.
current_drilldown_df = df.copy()
# Select only columns for DISPLAY
desired_columns = [
'run_id', 'model', 'agent_type', 'provider',
'success_rate', 'total_tests', 'avg_duration_ms', 'total_cost_usd'
]
# Filter to only existing columns
available_columns = [col for col in desired_columns if col in df.columns]
if not available_columns:
# If no desired columns exist, return empty dataframe
return pd.DataFrame()
display_df = df[available_columns].copy()
# Return ONLY display columns for the UI table
return display_df
except Exception as e:
print(f"[ERROR] load_drilldown: {e}")
import traceback
traceback.print_exc()
return pd.DataFrame()
def load_trends():
"""Load trends visualization"""
df = data_loader.load_leaderboard()
fig = create_trends_plot(df)
return fig
def update_analytics(viz_type):
"""Update analytics chart based on visualization type"""
df = data_loader.load_leaderboard()
if "Heatmap" in viz_type:
return create_performance_heatmap(df)
elif "Speed" in viz_type:
return create_speed_accuracy_scatter(df)
else:
return create_cost_efficiency_scatter(df)
def generate_card(top_n):
"""Generate summary card HTML"""
df = data_loader.load_leaderboard()
html = generate_leaderboard_summary_card(df, top_n)
return html
def generate_insights():
"""Generate AI insights summary"""
try:
df = data_loader.load_leaderboard()
if df.empty or 'success_rate' not in df.columns:
return "## πŸ“Š Leaderboard Summary\n\nNo data available for insights."
top_model = df.loc[df['success_rate'].idxmax()]
most_cost_effective = df.loc[(df['success_rate'] / (df['total_cost_usd'] + 0.0001)).idxmax()]
fastest = df.loc[df['avg_duration_ms'].idxmin()]
insights = f"""
## πŸ“Š Leaderboard Summary
**Total Runs:** {len(df)}
**Top Performers:**
- πŸ₯‡ **Best Accuracy:** {top_model['model']} ({top_model['success_rate']:.1f}%)
- πŸ’° **Most Cost-Effective:** {most_cost_effective['model']} ({most_cost_effective['success_rate']:.1f}% @ ${most_cost_effective['total_cost_usd']:.4f})
- ⚑ **Fastest:** {fastest['model']} ({fastest['avg_duration_ms']:.0f}ms avg)
**Key Trends:**
- Average Success Rate: {df['success_rate'].mean():.1f}%
- Average Cost: ${df['total_cost_usd'].mean():.4f}
- Average Duration: {df['avg_duration_ms'].mean():.0f}ms
---
*Note: AI-powered insights will be available via MCP integration in the full version.*
"""
return insights
except Exception as e:
print(f"[ERROR] generate_insights: {e}")
import traceback
traceback.print_exc()
return f"## πŸ“Š Leaderboard Summary\n\nError generating insights: {str(e)}"
def on_html_table_row_click(row_index_str):
"""Handle row click from HTML table via JavaScript (hidden textbox bridge)"""
global current_selected_run, leaderboard_df_cache
print(f"[DEBUG] on_html_table_row_click called with: '{row_index_str}'")
try:
# Parse row index from string
if not row_index_str or row_index_str == "" or row_index_str.strip() == "":
print("[DEBUG] Empty row index, ignoring")
return {
leaderboard_screen: gr.update(),
run_detail_screen: gr.update(),
run_metadata_html: gr.update(),
test_cases_table: gr.update(),
selected_row_index: gr.update(value="") # Clear textbox
}
selected_idx = int(row_index_str)
print(f"[DEBUG] Parsed row index: {selected_idx}")
# Get the full run data from cache
if leaderboard_df_cache is None or leaderboard_df_cache.empty:
print("[ERROR] Leaderboard cache is empty")
gr.Warning("Leaderboard data not loaded")
return {
leaderboard_screen: gr.update(),
run_detail_screen: gr.update(),
run_metadata_html: gr.update(),
test_cases_table: gr.update(),
selected_row_index: gr.update(value="") # Clear textbox
}
if selected_idx < 0 or selected_idx >= len(leaderboard_df_cache):
print(f"[ERROR] Invalid row index: {selected_idx}, cache size: {len(leaderboard_df_cache)}")
gr.Warning(f"Invalid row index: {selected_idx}")
return {
leaderboard_screen: gr.update(),
run_detail_screen: gr.update(),
run_metadata_html: gr.update(),
test_cases_table: gr.update(),
selected_row_index: gr.update(value="") # Clear textbox
}
run_data = leaderboard_df_cache.iloc[selected_idx].to_dict()
# Set global
current_selected_run = run_data
print(f"[DEBUG] Selected run from HTML table: {run_data.get('model', 'Unknown')} (row {selected_idx})")
# Load results for this run
results_dataset = run_data.get('results_dataset')
if not results_dataset:
gr.Warning("No results dataset found for this run")
return {
leaderboard_screen: gr.update(visible=True),
run_detail_screen: gr.update(visible=False),
run_metadata_html: gr.update(value="<h3>No results dataset found</h3>"),
test_cases_table: gr.update(value=pd.DataFrame()),
selected_row_index: gr.update(value="")
}
results_df = data_loader.load_results(results_dataset)
# Create metadata HTML
metadata_html = f"""
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px; border-radius: 10px; color: white; margin-bottom: 20px;">
<h2 style="margin: 0 0 10px 0;">πŸ“Š Run Detail: {run_data.get('model', 'Unknown')}</h2>
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-top: 15px;">
<div>
<strong>Agent Type:</strong> {run_data.get('agent_type', 'N/A')}<br>
<strong>Provider:</strong> {run_data.get('provider', 'N/A')}<br>
<strong>Success Rate:</strong> {run_data.get('success_rate', 0):.1f}%
</div>
<div>
<strong>Total Tests:</strong> {run_data.get('total_tests', 0)}<br>
<strong>Successful:</strong> {run_data.get('successful_tests', 0)}<br>
<strong>Failed:</strong> {run_data.get('failed_tests', 0)}
</div>
<div>
<strong>Total Cost:</strong> ${run_data.get('total_cost_usd', 0):.4f}<br>
<strong>Avg Duration:</strong> {run_data.get('avg_duration_ms', 0):.0f}ms<br>
<strong>Submitted By:</strong> {run_data.get('submitted_by', 'Unknown')}
</div>
</div>
</div>
"""
# Format results for display
display_df = results_df.copy()
# Select and format columns if they exist
display_columns = []
if 'task_id' in display_df.columns:
display_columns.append('task_id')
if 'success' in display_df.columns:
display_df['success'] = display_df['success'].apply(lambda x: "βœ…" if x else "❌")
display_columns.append('success')
if 'tool_called' in display_df.columns:
display_columns.append('tool_called')
if 'execution_time_ms' in display_df.columns:
display_df['execution_time_ms'] = display_df['execution_time_ms'].apply(lambda x: f"{x:.0f}ms")
display_columns.append('execution_time_ms')
if 'total_tokens' in display_df.columns:
display_columns.append('total_tokens')
if 'cost_usd' in display_df.columns:
display_df['cost_usd'] = display_df['cost_usd'].apply(lambda x: f"${x:.4f}")
display_columns.append('cost_usd')
if 'trace_id' in display_df.columns:
display_columns.append('trace_id')
if display_columns:
display_df = display_df[display_columns]
print(f"[DEBUG] Successfully loaded run detail for: {run_data.get('model', 'Unknown')}")
return {
# Hide leaderboard, show run detail
leaderboard_screen: gr.update(visible=False),
run_detail_screen: gr.update(visible=True),
run_metadata_html: gr.update(value=metadata_html),
test_cases_table: gr.update(value=display_df),
selected_row_index: gr.update(value="") # Clear textbox
}
except Exception as e:
print(f"[ERROR] Handling HTML table row click: {e}")
import traceback
traceback.print_exc()
gr.Warning(f"Error loading run details: {str(e)}")
return {
leaderboard_screen: gr.update(visible=True), # Stay on leaderboard
run_detail_screen: gr.update(visible=False),
run_metadata_html: gr.update(),
test_cases_table: gr.update(),
selected_row_index: gr.update(value="") # Clear textbox
}
def load_run_detail(run_id):
"""Load run detail data including results dataset"""
global current_selected_run, leaderboard_df_cache
try:
# Find run in cache
df = leaderboard_df_cache
run_data = df[df['run_id'] == run_id].iloc[0].to_dict()
current_selected_run = run_data
# Load results dataset
results_dataset = run_data.get('results_dataset')
if not results_dataset:
return pd.DataFrame(), f"# Error\n\nNo results dataset found for this run", ""
results_df = data_loader.load_results(results_dataset)
# Create metadata HTML
metadata_html = f"""
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px; border-radius: 10px; color: white; margin-bottom: 20px;">
<h2 style="margin: 0 0 10px 0;">πŸ“Š Run Detail: {run_data.get('model', 'Unknown')}</h2>
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-top: 15px;">
<div>
<strong>Agent Type:</strong> {run_data.get('agent_type', 'N/A')}<br>
<strong>Provider:</strong> {run_data.get('provider', 'N/A')}<br>
<strong>Success Rate:</strong> {run_data.get('success_rate', 0):.1f}%
</div>
<div>
<strong>Total Tests:</strong> {run_data.get('total_tests', 0)}<br>
<strong>Successful:</strong> {run_data.get('successful_tests', 0)}<br>
<strong>Failed:</strong> {run_data.get('failed_tests', 0)}
</div>
<div>
<strong>Total Cost:</strong> ${run_data.get('total_cost_usd', 0):.4f}<br>
<strong>Avg Duration:</strong> {run_data.get('avg_duration_ms', 0):.0f}ms<br>
<strong>Submitted By:</strong> {run_data.get('submitted_by', 'Unknown')}
</div>
</div>
</div>
"""
# Format results for display
display_df = results_df.copy()
# Select and format columns if they exist
display_columns = []
if 'task_id' in display_df.columns:
display_columns.append('task_id')
if 'success' in display_df.columns:
display_df['success'] = display_df['success'].apply(lambda x: "βœ…" if x else "❌")
display_columns.append('success')
if 'tool_called' in display_df.columns:
display_columns.append('tool_called')
if 'execution_time_ms' in display_df.columns:
display_df['execution_time_ms'] = display_df['execution_time_ms'].apply(lambda x: f"{x:.0f}ms")
display_columns.append('execution_time_ms')
if 'total_tokens' in display_df.columns:
display_columns.append('total_tokens')
if 'cost_usd' in display_df.columns:
display_df['cost_usd'] = display_df['cost_usd'].apply(lambda x: f"${x:.4f}")
display_columns.append('cost_usd')
if 'trace_id' in display_df.columns:
display_columns.append('trace_id')
if display_columns:
display_df = display_df[display_columns]
return display_df, metadata_html, run_data.get('run_id', '')
except Exception as e:
print(f"[ERROR] load_run_detail: {e}")
import traceback
traceback.print_exc()
return pd.DataFrame(), f"# Error\n\nError loading run detail: {str(e)}", ""
# Screen 3 (Run Detail) event handlers
def on_drilldown_select(evt: gr.SelectData, df):
"""Handle row selection from DrillDown table - EXACT COPY from MockTraceMind"""
global current_selected_run, current_drilldown_df
try:
# Get selected run - use currently displayed dataframe (filtered/sorted)
selected_idx = evt.index[0]
# Get the full run data from the displayed dataframe
# This ensures we get the correct row even after filtering/sorting
if current_drilldown_df is not None and not current_drilldown_df.empty:
if selected_idx < len(current_drilldown_df):
run_data = current_drilldown_df.iloc[selected_idx].to_dict()
else:
gr.Warning(f"Invalid row selection: index {selected_idx} out of bounds")
return {}
else:
gr.Warning("Leaderboard data not available")
return {}
# IMPORTANT: Set global FIRST before any operations that might fail
current_selected_run = run_data
print(f"[DEBUG] Selected run: {run_data.get('model', 'Unknown')} (run_id: {run_data.get('run_id', 'N/A')[:8]}...)")
# Load results for this run
results_dataset = run_data.get('results_dataset')
if not results_dataset:
gr.Warning("No results dataset found for this run")
return {
leaderboard_screen: gr.update(visible=True),
run_detail_screen: gr.update(visible=False),
run_metadata_html: gr.update(value="<h3>No results dataset found</h3>"),
test_cases_table: gr.update(value=pd.DataFrame())
}
results_df = data_loader.load_results(results_dataset)
# Create metadata HTML
metadata_html = f"""
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px; border-radius: 10px; color: white; margin-bottom: 20px;">
<h2 style="margin: 0 0 10px 0;">πŸ“Š Run Detail: {run_data.get('model', 'Unknown')}</h2>
<div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 20px; margin-top: 15px;">
<div>
<strong>Agent Type:</strong> {run_data.get('agent_type', 'N/A')}<br>
<strong>Provider:</strong> {run_data.get('provider', 'N/A')}<br>
<strong>Success Rate:</strong> {run_data.get('success_rate', 0):.1f}%
</div>
<div>
<strong>Total Tests:</strong> {run_data.get('total_tests', 0)}<br>
<strong>Successful:</strong> {run_data.get('successful_tests', 0)}<br>
<strong>Failed:</strong> {run_data.get('failed_tests', 0)}
</div>
<div>
<strong>Total Cost:</strong> ${run_data.get('total_cost_usd', 0):.4f}<br>
<strong>Avg Duration:</strong> {run_data.get('avg_duration_ms', 0):.0f}ms<br>
<strong>Submitted By:</strong> {run_data.get('submitted_by', 'Unknown')}
</div>
</div>
</div>
"""
# Format results for display
display_df = results_df.copy()
# Select and format columns if they exist
display_columns = []
if 'task_id' in display_df.columns:
display_columns.append('task_id')
if 'success' in display_df.columns:
display_df['success'] = display_df['success'].apply(lambda x: "βœ…" if x else "❌")
display_columns.append('success')
if 'tool_called' in display_df.columns:
display_columns.append('tool_called')
if 'execution_time_ms' in display_df.columns:
display_df['execution_time_ms'] = display_df['execution_time_ms'].apply(lambda x: f"{x:.0f}ms")
display_columns.append('execution_time_ms')
if 'total_tokens' in display_df.columns:
display_columns.append('total_tokens')
if 'cost_usd' in display_df.columns:
display_df['cost_usd'] = display_df['cost_usd'].apply(lambda x: f"${x:.4f}")
display_columns.append('cost_usd')
if 'trace_id' in display_df.columns:
display_columns.append('trace_id')
if display_columns:
display_df = display_df[display_columns]
print(f"[DEBUG] Successfully loaded run detail for: {run_data.get('model', 'Unknown')}")
return {
# Hide leaderboard, show run detail
leaderboard_screen: gr.update(visible=False),
run_detail_screen: gr.update(visible=True),
run_metadata_html: gr.update(value=metadata_html),
test_cases_table: gr.update(value=display_df)
}
except Exception as e:
print(f"[ERROR] Loading run details: {e}")
import traceback
traceback.print_exc()
gr.Warning(f"Error loading run details: {e}")
# Return updates for all output components to avoid Gradio error
return {
leaderboard_screen: gr.update(visible=True), # Stay on leaderboard
run_detail_screen: gr.update(visible=False),
run_metadata_html: gr.update(value="<h3>Error loading run detail</h3>"),
test_cases_table: gr.update(value=pd.DataFrame())
}
def go_back_to_leaderboard():
"""Navigate back to leaderboard screen"""
return {
leaderboard_screen: gr.update(visible=True),
run_detail_screen: gr.update(visible=False)
}
# Build Gradio app
# Theme configuration (like MockTraceMind)
theme = gr.themes.Base(
primary_hue="indigo",
secondary_hue="purple",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
).set(
body_background_fill="*neutral_50",
body_background_fill_dark="*neutral_900",
button_primary_background_fill="*primary_500",
button_primary_background_fill_hover="*primary_600",
button_primary_text_color="white",
)
with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
# Top Banner
gr.HTML("""
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 25px;
border-radius: 10px;
margin-bottom: 20px;
text-align: center;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
<h1 style="color: white !important; margin: 0; font-size: 2.5em; font-weight: bold;">
🧠 TraceMind
</h1>
<p style="color: rgba(255,255,255,0.9); margin: 10px 0 0 0; font-size: 1.2em;">
Agent Evaluation Platform
</p>
<p style="color: rgba(255,255,255,0.8); margin: 10px 0 0 0; font-size: 0.9em;">
Powered by Gradio 6 πŸš€ | HuggingFace Jobs | MCP Integration
</p>
</div>
""")
# Main app container (wraps Sidebar + all screens like MockTraceMind)
with gr.Column() as main_app_container:
# Sidebar Navigation
with gr.Sidebar():
gr.Markdown("## 🧠 TraceMind")
gr.Markdown("*Navigation & Controls*")
gr.Markdown("---")
# Navigation section
gr.Markdown("### 🧭 Navigation")
# Navigation buttons
leaderboard_nav_btn = gr.Button("πŸ† Leaderboard", variant="primary", size="lg")
compare_nav_btn = gr.Button("βš–οΈ Compare", variant="secondary", size="lg")
docs_nav_btn = gr.Button("πŸ“š Documentation", variant="secondary", size="lg")
gr.Markdown("---")
# Data Controls
gr.Markdown("### πŸ”„ Data Controls")
refresh_leaderboard_btn = gr.Button("πŸ”„ Refresh Data", variant="secondary", size="sm")
gr.Markdown("*Reload leaderboard from HuggingFace*")
gr.Markdown("---")
# Filters section
gr.Markdown("### πŸ” Global Filters")
sidebar_model_filter = gr.Dropdown(
choices=["All Models"],
value="All Models",
label="Model",
info="Filter evaluations by AI model"
)
sidebar_agent_type_filter = gr.Radio(
choices=["All", "tool", "code", "both"],
value="All",
label="Agent Type",
info="Tool: Function calling | Code: Code execution | Both: Hybrid"
)
# Main content area
# Screen 1: Main Leaderboard
with gr.Column(visible=True) as leaderboard_screen:
gr.Markdown("## πŸ† Agent Evaluation Leaderboard")
with gr.Tabs():
with gr.TabItem("πŸ† Leaderboard"):
# Filters
with gr.Row():
model_filter = gr.Dropdown(
choices=["All Models"],
value="All Models",
label="Filter by Model"
)
provider_filter = gr.Dropdown(
choices=["All", "litellm", "transformers"],
value="All",
label="Provider"
)
sort_by = gr.Dropdown(
choices=["success_rate", "total_cost_usd", "avg_duration_ms"],
value="success_rate",
label="Sort By"
)
apply_filters_btn = gr.Button("πŸ” Apply Filters")
# HTML table
leaderboard_by_model = gr.HTML()
with gr.TabItem("πŸ“‹ DrillDown"):
with gr.Row():
drilldown_agent_type = gr.Radio(
choices=["All", "tool", "code", "both"],
value="All",
label="Agent Type"
)
drilldown_provider = gr.Dropdown(
choices=["All", "litellm", "transformers"],
value="All",
label="Provider"
)
apply_drilldown_btn = gr.Button("πŸ” Apply")
leaderboard_table = gr.Dataframe(
headers=["Run ID", "Model", "Agent Type", "Provider", "Success Rate", "Tests", "Duration", "Cost"],
interactive=False
)
with gr.TabItem("πŸ“ˆ Trends"):
trends_plot = gr.Plot()
with gr.TabItem("πŸ“Š Analytics"):
viz_type = gr.Radio(
choices=["πŸ”₯ Performance Heatmap", "⚑ Speed vs Accuracy", "πŸ’° Cost Efficiency"],
value="πŸ”₯ Performance Heatmap",
label="Select Visualization"
)
analytics_chart = gr.Plot()
with gr.TabItem("πŸ“₯ Summary Card"):
top_n_slider = gr.Slider(1, 5, 3, step=1, label="Top N Models")
generate_card_btn = gr.Button("🎨 Generate Card")
card_preview = gr.HTML()
with gr.TabItem("πŸ€– AI Insights"):
regenerate_btn = gr.Button("πŸ”„ Regenerate")
mcp_insights = gr.Markdown("*Loading insights...*")
# Hidden textbox for row selection (JavaScript bridge)
selected_row_index = gr.Textbox(visible=False, elem_id="selected_row_index")
# Screen 3: Run Detail
with gr.Column(visible=False) as run_detail_screen:
# Navigation
with gr.Row():
back_to_leaderboard_btn = gr.Button("⬅️ Back to Leaderboard", variant="secondary", size="sm")
# Run metadata display
run_metadata_html = gr.HTML()
# Test cases table
gr.Markdown("## πŸ“‹ Test Cases")
test_cases_table = gr.Dataframe(
headers=["Task ID", "Status", "Tool", "Duration", "Tokens", "Cost", "Trace ID"],
interactive=False,
wrap=True
)
# Event handlers
app.load(
fn=load_leaderboard,
outputs=[leaderboard_by_model, model_filter, sidebar_model_filter]
)
app.load(
fn=load_trends,
outputs=[trends_plot]
)
# Load drilldown data on page load
app.load(
fn=load_drilldown,
inputs=[drilldown_agent_type, drilldown_provider],
outputs=[leaderboard_table]
)
# Refresh button handler
refresh_leaderboard_btn.click(
fn=refresh_leaderboard,
outputs=[leaderboard_by_model, model_filter, sidebar_model_filter]
)
apply_filters_btn.click(
fn=apply_filters,
inputs=[model_filter, provider_filter, sort_by],
outputs=[leaderboard_by_model]
)
apply_drilldown_btn.click(
fn=load_drilldown,
inputs=[drilldown_agent_type, drilldown_provider],
outputs=[leaderboard_table]
)
# Sidebar filter handlers
def apply_sidebar_model_filter(model, sort_by_col):
"""Apply sidebar model filter to leaderboard"""
return apply_filters(model, "All", sort_by_col), gr.update(value=model)
sidebar_model_filter.change(
fn=apply_sidebar_model_filter,
inputs=[sidebar_model_filter, sort_by],
outputs=[leaderboard_by_model, model_filter]
)
def apply_sidebar_agent_type_filter(agent_type):
"""Apply sidebar agent type filter to drilldown"""
return load_drilldown(agent_type, "All"), gr.update(value=agent_type)
sidebar_agent_type_filter.change(
fn=apply_sidebar_agent_type_filter,
inputs=[sidebar_agent_type_filter],
outputs=[leaderboard_table, drilldown_agent_type]
)
viz_type.change(
fn=update_analytics,
inputs=[viz_type],
outputs=[analytics_chart]
)
app.load(
fn=update_analytics,
inputs=[viz_type],
outputs=[analytics_chart]
)
generate_card_btn.click(
fn=generate_card,
inputs=[top_n_slider],
outputs=[card_preview]
)
app.load(
fn=generate_insights,
outputs=[mcp_insights]
)
regenerate_btn.click(
fn=generate_insights,
outputs=[mcp_insights]
)
leaderboard_table.select(
fn=on_drilldown_select,
inputs=[leaderboard_table], # Pass dataframe to handler (like MockTraceMind)
outputs=[leaderboard_screen, run_detail_screen, run_metadata_html, test_cases_table]
)
back_to_leaderboard_btn.click(
fn=go_back_to_leaderboard,
inputs=[],
outputs=[leaderboard_screen, run_detail_screen]
)
# HTML table row click handler (JavaScript bridge via hidden textbox)
selected_row_index.change(
fn=on_html_table_row_click,
inputs=[selected_row_index],
outputs=[leaderboard_screen, run_detail_screen, run_metadata_html, test_cases_table, selected_row_index]
)
if __name__ == "__main__":
print("πŸš€ Starting TraceMind-AI...")
print(f"πŸ“Š Data Source: {os.getenv('DATA_SOURCE', 'both')}")
print(f"πŸ“ JSON Path: {os.getenv('JSON_DATA_PATH', './sample_data')}")
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)