kshitijthakkar commited on
Commit
315aa68
Β·
1 Parent(s): 5930644

Wire MCP server tools to UI screens

Browse files

- Add mcp_helpers.py with sync/async functions to call MCP server tools
- Wire analyze_leaderboard to Leaderboard screen AI Insights tab
- Wire debug_trace to Trace Detail screen with Q&A interface
- Wire compare_runs to Compare screen AI Insights tab
- Wire analyze_results to Run Detail screen AI Insights tab
- Fix API endpoint names to match MCP server (/run_* endpoints)
- Fix parameter names for all MCP tool calls
- Update all navigation paths to set global state for MCP tools
- Parse composite run IDs for compare_runs tool

Files changed (3) hide show
  1. app.py +245 -28
  2. screens/compare.py +21 -0
  3. screens/mcp_helpers.py +245 -0
app.py CHANGED
@@ -59,6 +59,12 @@ from screens.chat import (
59
  on_clear_chat,
60
  on_quick_action
61
  )
 
 
 
 
 
 
62
  from utils.navigation import Navigator, Screen
63
 
64
 
@@ -162,7 +168,7 @@ def create_trace_metadata_html(trace_data: dict) -> str:
162
 
163
  def on_test_case_select(evt: gr.SelectData, df):
164
  """Handle test case selection in run detail - navigate to trace detail"""
165
- global current_selected_run, current_selected_trace
166
 
167
  print(f"[DEBUG] on_test_case_select called with index: {evt.index}")
168
 
@@ -190,6 +196,11 @@ def on_test_case_select(evt: gr.SelectData, df):
190
  gr.Warning("No traces dataset found in current run")
191
  return {}
192
 
 
 
 
 
 
193
  trace_data = data_loader.get_trace_by_id(traces_dataset, trace_id)
194
 
195
  if not trace_data:
@@ -690,48 +701,187 @@ def generate_card(top_n):
690
 
691
 
692
  def generate_insights():
693
- """Generate AI insights summary"""
694
  try:
 
695
  df = data_loader.load_leaderboard()
696
 
697
- if df.empty or 'success_rate' not in df.columns:
698
- return "## πŸ“Š Leaderboard Summary\n\nNo data available for insights."
 
 
 
 
 
 
 
 
 
699
 
700
- top_model = df.loc[df['success_rate'].idxmax()]
701
- most_cost_effective = df.loc[(df['success_rate'] / (df['total_cost_usd'] + 0.0001)).idxmax()]
702
- fastest = df.loc[df['avg_duration_ms'].idxmin()]
703
 
704
- insights = f"""
705
- ## πŸ“Š Leaderboard Summary
 
 
 
706
 
707
- **Total Runs:** {len(df)}
708
 
709
- **Top Performers:**
710
- - πŸ₯‡ **Best Accuracy:** {top_model['model']} ({top_model['success_rate']:.1f}%)
711
- - πŸ’° **Most Cost-Effective:** {most_cost_effective['model']} ({most_cost_effective['success_rate']:.1f}% @ ${most_cost_effective['total_cost_usd']:.4f})
712
- - ⚑ **Fastest:** {fastest['model']} ({fastest['avg_duration_ms']:.0f}ms avg)
713
 
714
- **Key Trends:**
715
- - Average Success Rate: {df['success_rate'].mean():.1f}%
716
- - Average Cost: ${df['total_cost_usd'].mean():.4f}
717
- - Average Duration: {df['avg_duration_ms'].mean():.0f}ms
718
 
719
- ---
 
 
720
 
721
- *Note: AI-powered insights will be available via MCP integration in the full version.*
722
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
 
724
  return insights
 
725
  except Exception as e:
726
- print(f"[ERROR] generate_insights: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
727
  import traceback
728
  traceback.print_exc()
729
- return f"## πŸ“Š Leaderboard Summary\n\nError generating insights: {str(e)}"
730
 
731
 
732
  def on_html_table_row_click(row_index_str):
733
  """Handle row click from HTML table via JavaScript (hidden textbox bridge)"""
734
- global current_selected_run, leaderboard_df_cache
735
 
736
  print(f"[DEBUG] on_html_table_row_click called with: '{row_index_str}'")
737
 
@@ -795,6 +945,10 @@ def on_html_table_row_click(row_index_str):
795
  selected_row_index: gr.update(value="")
796
  }
797
 
 
 
 
 
798
  results_df = data_loader.load_results(results_dataset)
799
 
800
  # Generate performance chart
@@ -909,7 +1063,7 @@ def on_html_table_row_click(row_index_str):
909
 
910
  def load_run_detail(run_id):
911
  """Load run detail data including results dataset"""
912
- global current_selected_run, leaderboard_df_cache
913
 
914
  try:
915
  # Find run in cache
@@ -922,6 +1076,10 @@ def load_run_detail(run_id):
922
  if not results_dataset:
923
  return pd.DataFrame(), f"# Error\n\nNo results dataset found for this run", ""
924
 
 
 
 
 
925
  results_df = data_loader.load_results(results_dataset)
926
 
927
  # Generate performance chart
@@ -994,7 +1152,7 @@ def load_run_detail(run_id):
994
  # Screen 3 (Run Detail) event handlers
995
  def on_drilldown_select(evt: gr.SelectData, df):
996
  """Handle row selection from DrillDown table - EXACT COPY from MockTraceMind"""
997
- global current_selected_run, current_drilldown_df
998
 
999
  try:
1000
  # Get selected run - use currently displayed dataframe (filtered/sorted)
@@ -1030,6 +1188,10 @@ def on_drilldown_select(evt: gr.SelectData, df):
1030
  run_card_html: gr.update()
1031
  }
1032
 
 
 
 
 
1033
  results_df = data_loader.load_results(results_dataset)
1034
 
1035
  # Generate performance chart
@@ -1145,7 +1307,7 @@ def on_drilldown_select(evt: gr.SelectData, df):
1145
 
1146
  def on_html_leaderboard_select(evt: gr.SelectData):
1147
  """Handle row selection from HTMLPlus leaderboard (By Model tab)"""
1148
- global current_selected_run, leaderboard_df_cache
1149
 
1150
  try:
1151
  # HTMLPlus returns data attributes from the selected row
@@ -1247,6 +1409,10 @@ def on_html_leaderboard_select(evt: gr.SelectData):
1247
  run_gpu_metrics_json: gr.update()
1248
  }
1249
 
 
 
 
 
1250
  results_df = data_loader.load_results(results_dataset)
1251
 
1252
  # Generate performance chart
@@ -1813,6 +1979,37 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1813
  with gr.TabItem("πŸ“‹ Raw Metrics Data"):
1814
  run_gpu_metrics_json = gr.JSON(label="GPU Metrics Data")
1815
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1816
  # Screen 4: Trace Detail with Sub-tabs
1817
  with gr.Column(visible=False) as trace_detail_screen:
1818
  with gr.Row():
@@ -2161,7 +2358,7 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
2161
 
2162
  # Compare button handler
2163
  compare_components['compare_button'].click(
2164
- fn=lambda run_a, run_b: on_compare_runs(run_a, run_b, leaderboard_df_cache, compare_components),
2165
  inputs=[
2166
  compare_components['compare_run_a_dropdown'],
2167
  compare_components['compare_run_b_dropdown']
@@ -2177,6 +2374,20 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
2177
  ]
2178
  )
2179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2180
  # Back to leaderboard from compare
2181
  compare_components['back_to_leaderboard_btn'].click(
2182
  fn=navigate_to_leaderboard,
@@ -2236,6 +2447,12 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
2236
  outputs=[run_detail_screen, trace_detail_screen]
2237
  )
2238
 
 
 
 
 
 
 
2239
 
2240
  # HTML table row click handler (JavaScript bridge via hidden textbox)
2241
  selected_row_index.change(
 
59
  on_clear_chat,
60
  on_quick_action
61
  )
62
+ from screens.mcp_helpers import (
63
+ call_analyze_leaderboard_sync,
64
+ call_debug_trace_sync,
65
+ call_compare_runs_sync,
66
+ call_analyze_results_sync
67
+ )
68
  from utils.navigation import Navigator, Screen
69
 
70
 
 
168
 
169
  def on_test_case_select(evt: gr.SelectData, df):
170
  """Handle test case selection in run detail - navigate to trace detail"""
171
+ global current_selected_run, current_selected_trace, _current_trace_info
172
 
173
  print(f"[DEBUG] on_test_case_select called with index: {evt.index}")
174
 
 
196
  gr.Warning("No traces dataset found in current run")
197
  return {}
198
 
199
+ # Update global trace info for MCP debug_trace tool
200
+ _current_trace_info["trace_id"] = trace_id
201
+ _current_trace_info["traces_repo"] = traces_dataset
202
+ print(f"[MCP] Updated trace info for debug_trace: trace_id={trace_id}, traces_repo={traces_dataset}")
203
+
204
  trace_data = data_loader.get_trace_by_id(traces_dataset, trace_id)
205
 
206
  if not trace_data:
 
701
 
702
 
703
  def generate_insights():
704
+ """Generate AI insights summary using MCP server"""
705
  try:
706
+ # Load leaderboard to check if data exists
707
  df = data_loader.load_leaderboard()
708
 
709
+ if df is None or df.empty:
710
+ return "## πŸ“Š AI Insights\n\nNo leaderboard data available. Please refresh the data."
711
+
712
+ # Call MCP server's analyze_leaderboard tool
713
+ print("[MCP] Calling analyze_leaderboard MCP tool...")
714
+ insights = call_analyze_leaderboard_sync(
715
+ leaderboard_repo="kshitijthakkar/smoltrace-leaderboard",
716
+ metric_focus="overall",
717
+ time_range="last_week",
718
+ top_n=5
719
+ )
720
 
721
+ return insights
 
 
722
 
723
+ except Exception as e:
724
+ print(f"[ERROR] generate_insights: {e}")
725
+ import traceback
726
+ traceback.print_exc()
727
+ return f"## πŸ“Š AI Insights\n\n❌ **Error generating insights**: {str(e)}\n\nPlease check:\n- MCP server is running\n- Network connectivity\n- Leaderboard dataset is accessible"
728
 
 
729
 
730
+ # Global variable to store current trace info for debug_trace MCP tool
731
+ _current_trace_info = {"trace_id": None, "traces_repo": None}
 
 
732
 
 
 
 
 
733
 
734
+ def ask_about_trace(question: str) -> str:
735
+ """
736
+ Call debug_trace MCP tool to answer questions about current trace
737
 
738
+ Args:
739
+ question: User's question about the trace
740
+
741
+ Returns:
742
+ AI-powered answer from MCP server
743
+ """
744
+ global _current_trace_info
745
+
746
+ try:
747
+ if not _current_trace_info["trace_id"] or not _current_trace_info["traces_repo"]:
748
+ return "❌ **No trace selected**\n\nPlease navigate to a trace first by clicking on a test case from the Run Detail screen."
749
+
750
+ if not question or question.strip() == "":
751
+ return "❌ **Please enter a question**\n\nFor example:\n- Why was the tool called twice?\n- Which step took the most time?\n- Why did this test fail?"
752
+
753
+ print(f"[MCP] Calling debug_trace MCP tool for trace_id: {_current_trace_info['trace_id']}")
754
+
755
+ # Call MCP server's debug_trace tool
756
+ answer = call_debug_trace_sync(
757
+ trace_id=_current_trace_info["trace_id"],
758
+ traces_repo=_current_trace_info["traces_repo"],
759
+ question=question
760
+ )
761
+
762
+ return answer
763
+
764
+ except Exception as e:
765
+ print(f"[ERROR] ask_about_trace: {e}")
766
+ import traceback
767
+ traceback.print_exc()
768
+ return f"❌ **Error asking about trace**: {str(e)}\n\nPlease check:\n- MCP server is running\n- Network connectivity\n- Trace data is accessible"
769
+
770
+
771
+ # Global variable to store current comparison for compare_runs MCP tool
772
+ _current_comparison = {"run_id_1": None, "run_id_2": None}
773
+
774
+
775
+ def handle_compare_runs(run_a_id: str, run_b_id: str, leaderboard_df, components):
776
+ """
777
+ Wrapper function to handle run comparison and update global state
778
+
779
+ Args:
780
+ run_a_id: ID of first run (composite key: run_id|timestamp)
781
+ run_b_id: ID of second run (composite key: run_id|timestamp)
782
+ leaderboard_df: Full leaderboard dataframe
783
+ components: Dictionary of Gradio components
784
+
785
+ Returns:
786
+ Dictionary of component updates from on_compare_runs
787
+ """
788
+ global _current_comparison
789
+
790
+ # Parse composite keys (run_id|timestamp) to extract just the run_id
791
+ run_a_parts = run_a_id.split('|') if run_a_id else []
792
+ run_b_parts = run_b_id.split('|') if run_b_id else []
793
+
794
+ # Extract just the run_id portion for MCP server
795
+ run_a_id_parsed = run_a_parts[0] if len(run_a_parts) >= 1 else run_a_id
796
+ run_b_id_parsed = run_b_parts[0] if len(run_b_parts) >= 1 else run_b_id
797
+
798
+ # Update global state for MCP compare_runs tool
799
+ _current_comparison["run_id_1"] = run_a_id_parsed
800
+ _current_comparison["run_id_2"] = run_b_id_parsed
801
+ print(f"[MCP] Updated comparison state: {run_a_id_parsed} vs {run_b_id_parsed}")
802
+
803
+ # Call the original compare function (with original composite keys)
804
+ from screens.compare import on_compare_runs
805
+ return on_compare_runs(run_a_id, run_b_id, leaderboard_df, components)
806
+
807
+
808
+ def generate_ai_comparison(comparison_focus: str) -> str:
809
+ """
810
+ Call compare_runs MCP tool to generate AI insights about run comparison
811
+
812
+ Args:
813
+ comparison_focus: Focus area - "comprehensive", "cost", "performance", or "eco_friendly"
814
+
815
+ Returns:
816
+ AI-powered comparison analysis from MCP server
817
+ """
818
+ global _current_comparison
819
+
820
+ try:
821
+ if not _current_comparison["run_id_1"] or not _current_comparison["run_id_2"]:
822
+ return "❌ **No runs selected for comparison**\n\nPlease select two runs and click 'Compare Selected Runs' first."
823
+
824
+ print(f"[MCP] Calling compare_runs MCP tool: {_current_comparison['run_id_1']} vs {_current_comparison['run_id_2']}")
825
+
826
+ # Call MCP server's compare_runs tool
827
+ insights = call_compare_runs_sync(
828
+ run_id_1=_current_comparison["run_id_1"],
829
+ run_id_2=_current_comparison["run_id_2"],
830
+ leaderboard_repo="kshitijthakkar/smoltrace-leaderboard",
831
+ comparison_focus=comparison_focus
832
+ )
833
 
834
  return insights
835
+
836
  except Exception as e:
837
+ print(f"[ERROR] generate_ai_comparison: {e}")
838
+ import traceback
839
+ traceback.print_exc()
840
+ return f"❌ **Error generating AI comparison**: {str(e)}\n\nPlease check:\n- MCP server is running\n- Network connectivity\n- Leaderboard dataset is accessible"
841
+
842
+
843
+ # Global variable to store current run's results dataset for analyze_results MCP tool
844
+ _current_run_results_repo = None
845
+
846
+
847
+ def generate_run_ai_insights(focus_area: str, max_rows: int) -> str:
848
+ """
849
+ Call analyze_results MCP tool to generate AI insights about run results
850
+
851
+ Args:
852
+ focus_area: Focus area - "overall", "failures", "performance", or "tools"
853
+ max_rows: Maximum number of test cases to analyze
854
+
855
+ Returns:
856
+ AI-powered results analysis from MCP server
857
+ """
858
+ global _current_run_results_repo
859
+
860
+ try:
861
+ if not _current_run_results_repo:
862
+ return "❌ **No run selected**\n\nPlease navigate to a run detail first by clicking on a run from the Leaderboard screen."
863
+
864
+ print(f"[MCP] Calling analyze_results MCP tool for: {_current_run_results_repo}")
865
+
866
+ # Call MCP server's analyze_results tool
867
+ insights = call_analyze_results_sync(
868
+ results_repo=_current_run_results_repo,
869
+ focus_area=focus_area,
870
+ max_rows=max_rows
871
+ )
872
+
873
+ return insights
874
+
875
+ except Exception as e:
876
+ print(f"[ERROR] generate_run_ai_insights: {e}")
877
  import traceback
878
  traceback.print_exc()
879
+ return f"❌ **Error generating run insights**: {str(e)}\n\nPlease check:\n- MCP server is running\n- Network connectivity\n- Results dataset is accessible"
880
 
881
 
882
  def on_html_table_row_click(row_index_str):
883
  """Handle row click from HTML table via JavaScript (hidden textbox bridge)"""
884
+ global current_selected_run, leaderboard_df_cache, _current_run_results_repo
885
 
886
  print(f"[DEBUG] on_html_table_row_click called with: '{row_index_str}'")
887
 
 
945
  selected_row_index: gr.update(value="")
946
  }
947
 
948
+ # Update global state for MCP analyze_results tool
949
+ _current_run_results_repo = results_dataset
950
+ print(f"[MCP] Updated results repo for analyze_results: {results_dataset}")
951
+
952
  results_df = data_loader.load_results(results_dataset)
953
 
954
  # Generate performance chart
 
1063
 
1064
  def load_run_detail(run_id):
1065
  """Load run detail data including results dataset"""
1066
+ global current_selected_run, leaderboard_df_cache, _current_run_results_repo
1067
 
1068
  try:
1069
  # Find run in cache
 
1076
  if not results_dataset:
1077
  return pd.DataFrame(), f"# Error\n\nNo results dataset found for this run", ""
1078
 
1079
+ # Update global state for MCP analyze_results tool
1080
+ _current_run_results_repo = results_dataset
1081
+ print(f"[MCP] Updated results repo for analyze_results (load_run_detail): {results_dataset}")
1082
+
1083
  results_df = data_loader.load_results(results_dataset)
1084
 
1085
  # Generate performance chart
 
1152
  # Screen 3 (Run Detail) event handlers
1153
  def on_drilldown_select(evt: gr.SelectData, df):
1154
  """Handle row selection from DrillDown table - EXACT COPY from MockTraceMind"""
1155
+ global current_selected_run, current_drilldown_df, _current_run_results_repo
1156
 
1157
  try:
1158
  # Get selected run - use currently displayed dataframe (filtered/sorted)
 
1188
  run_card_html: gr.update()
1189
  }
1190
 
1191
+ # Update global state for MCP analyze_results tool
1192
+ _current_run_results_repo = results_dataset
1193
+ print(f"[MCP] Updated results repo for analyze_results (on_drilldown_select): {results_dataset}")
1194
+
1195
  results_df = data_loader.load_results(results_dataset)
1196
 
1197
  # Generate performance chart
 
1307
 
1308
  def on_html_leaderboard_select(evt: gr.SelectData):
1309
  """Handle row selection from HTMLPlus leaderboard (By Model tab)"""
1310
+ global current_selected_run, leaderboard_df_cache, _current_run_results_repo
1311
 
1312
  try:
1313
  # HTMLPlus returns data attributes from the selected row
 
1409
  run_gpu_metrics_json: gr.update()
1410
  }
1411
 
1412
+ # Update global state for MCP analyze_results tool
1413
+ _current_run_results_repo = results_dataset
1414
+ print(f"[MCP] Updated results repo for analyze_results (on_html_leaderboard_select): {results_dataset}")
1415
+
1416
  results_df = data_loader.load_results(results_dataset)
1417
 
1418
  # Generate performance chart
 
1979
  with gr.TabItem("πŸ“‹ Raw Metrics Data"):
1980
  run_gpu_metrics_json = gr.JSON(label="GPU Metrics Data")
1981
 
1982
+ with gr.TabItem("πŸ€– AI Insights"):
1983
+ gr.Markdown("### AI-Powered Results Analysis")
1984
+ gr.Markdown("*Get intelligent insights about test results and optimization recommendations using the MCP server*")
1985
+
1986
+ with gr.Row():
1987
+ with gr.Column(scale=1):
1988
+ run_analysis_focus = gr.Dropdown(
1989
+ label="Analysis Focus",
1990
+ choices=["overall", "failures", "performance", "tools"],
1991
+ value="overall",
1992
+ info="Choose what aspect to focus on in the AI analysis"
1993
+ )
1994
+ run_max_rows = gr.Slider(
1995
+ label="Max Test Cases to Analyze",
1996
+ minimum=10,
1997
+ maximum=200,
1998
+ value=100,
1999
+ step=10,
2000
+ info="Limit analysis to reduce processing time"
2001
+ )
2002
+ with gr.Column(scale=1):
2003
+ generate_run_ai_insights_btn = gr.Button(
2004
+ "πŸ€– Generate AI Insights",
2005
+ variant="primary",
2006
+ size="lg"
2007
+ )
2008
+
2009
+ run_ai_insights = gr.Markdown(
2010
+ "*Click 'Generate AI Insights' to get intelligent analysis powered by the MCP server*"
2011
+ )
2012
+
2013
  # Screen 4: Trace Detail with Sub-tabs
2014
  with gr.Column(visible=False) as trace_detail_screen:
2015
  with gr.Row():
 
2358
 
2359
  # Compare button handler
2360
  compare_components['compare_button'].click(
2361
+ fn=lambda run_a, run_b: handle_compare_runs(run_a, run_b, leaderboard_df_cache, compare_components),
2362
  inputs=[
2363
  compare_components['compare_run_a_dropdown'],
2364
  compare_components['compare_run_b_dropdown']
 
2374
  ]
2375
  )
2376
 
2377
+ # Wire up AI comparison insights button (MCP compare_runs tool)
2378
+ compare_components['generate_ai_comparison_btn'].click(
2379
+ fn=generate_ai_comparison,
2380
+ inputs=[compare_components['comparison_focus']],
2381
+ outputs=[compare_components['ai_comparison_insights']]
2382
+ )
2383
+
2384
+ # Wire up run AI insights button (MCP analyze_results tool)
2385
+ generate_run_ai_insights_btn.click(
2386
+ fn=generate_run_ai_insights,
2387
+ inputs=[run_analysis_focus, run_max_rows],
2388
+ outputs=[run_ai_insights]
2389
+ )
2390
+
2391
  # Back to leaderboard from compare
2392
  compare_components['back_to_leaderboard_btn'].click(
2393
  fn=navigate_to_leaderboard,
 
2447
  outputs=[run_detail_screen, trace_detail_screen]
2448
  )
2449
 
2450
+ # Wire up trace AI question button (MCP debug_trace tool)
2451
+ trace_ask_btn.click(
2452
+ fn=ask_about_trace,
2453
+ inputs=[trace_question],
2454
+ outputs=[trace_answer]
2455
+ )
2456
 
2457
  # HTML table row click handler (JavaScript bridge via hidden textbox)
2458
  selected_row_index.change(
screens/compare.py CHANGED
@@ -307,6 +307,27 @@ def create_compare_ui():
307
  elem_id="comparison-card-html"
308
  )
309
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  components['comparison_output'] = comparison_output
311
 
312
  return compare_screen, components
 
307
  elem_id="comparison-card-html"
308
  )
309
 
310
+ with gr.TabItem("πŸ€– AI Insights"):
311
+ gr.Markdown("### AI-Powered Comparison Analysis")
312
+ gr.Markdown("*Get intelligent insights about the differences between these runs using the MCP server*")
313
+
314
+ with gr.Row():
315
+ components['comparison_focus'] = gr.Dropdown(
316
+ label="Analysis Focus",
317
+ choices=["comprehensive", "cost", "performance", "eco_friendly"],
318
+ value="comprehensive",
319
+ info="Choose what aspect to focus on in the AI analysis"
320
+ )
321
+ components['generate_ai_comparison_btn'] = gr.Button(
322
+ "πŸ€– Generate AI Insights",
323
+ variant="primary",
324
+ size="lg"
325
+ )
326
+
327
+ components['ai_comparison_insights'] = gr.Markdown(
328
+ "*Click 'Generate AI Insights' to get intelligent analysis powered by the MCP server*"
329
+ )
330
+
331
  components['comparison_output'] = comparison_output
332
 
333
  return compare_screen, components
screens/mcp_helpers.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MCP Helper Functions for TraceMind-AI Screens
3
+ Provides simplified interfaces to call MCP server tools from various screens
4
+ """
5
+
6
+ import os
7
+ from gradio_client import Client
8
+ from typing import Optional, Dict, Any
9
+ import json
10
+
11
+
12
+ # MCP Server URL (from environment or default)
13
+ MCP_SERVER_URL = os.getenv(
14
+ "MCP_SERVER_URL",
15
+ "https://mcp-1st-birthday-tracemind-mcp-server.hf.space/"
16
+ )
17
+
18
+
19
+ def get_mcp_client() -> Client:
20
+ """
21
+ Get Gradio client for MCP server
22
+
23
+ Returns:
24
+ gradio_client.Client instance
25
+ """
26
+ return Client(MCP_SERVER_URL)
27
+
28
+
29
+ async def call_analyze_leaderboard(
30
+ leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
31
+ metric_focus: str = "overall",
32
+ time_range: str = "last_week",
33
+ top_n: int = 5
34
+ ) -> str:
35
+ """
36
+ Call the analyze_leaderboard MCP tool
37
+
38
+ Args:
39
+ leaderboard_repo: HuggingFace dataset repository
40
+ metric_focus: Focus area - "overall", "accuracy", "cost", "latency", or "co2"
41
+ time_range: Time range - "last_week", "last_month", or "all_time"
42
+ top_n: Number of top models to highlight (3-10)
43
+
44
+ Returns:
45
+ Markdown-formatted analysis from Gemini
46
+ """
47
+ try:
48
+ client = get_mcp_client()
49
+ result = client.predict(
50
+ repo=leaderboard_repo,
51
+ metric=metric_focus,
52
+ time_range=time_range,
53
+ top_n=top_n,
54
+ api_name="/run_analyze_leaderboard"
55
+ )
56
+ return result
57
+ except Exception as e:
58
+ return f"❌ **Error calling analyze_leaderboard**: {str(e)}\n\nPlease check:\n- MCP server is running\n- Network connectivity\n- API parameters are correct"
59
+
60
+
61
+ async def call_debug_trace(
62
+ trace_id: str,
63
+ traces_repo: str,
64
+ question: str = "Analyze this trace and explain what happened"
65
+ ) -> str:
66
+ """
67
+ Call the debug_trace MCP tool
68
+
69
+ Args:
70
+ trace_id: Unique identifier for the trace
71
+ traces_repo: HuggingFace dataset repository with trace data
72
+ question: Specific question about the trace
73
+
74
+ Returns:
75
+ Markdown-formatted debug analysis from Gemini
76
+ """
77
+ try:
78
+ client = get_mcp_client()
79
+ result = client.predict(
80
+ trace_id_val=trace_id,
81
+ traces_repo_val=traces_repo,
82
+ question_val=question,
83
+ api_name="/run_debug_trace"
84
+ )
85
+ return result
86
+ except Exception as e:
87
+ return f"❌ **Error calling debug_trace**: {str(e)}\n\nPlease check:\n- Trace ID exists in dataset\n- Traces repository is accessible\n- MCP server is running"
88
+
89
+
90
+ async def call_compare_runs(
91
+ run_id_1: str,
92
+ run_id_2: str,
93
+ leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
94
+ comparison_focus: str = "comprehensive"
95
+ ) -> str:
96
+ """
97
+ Call the compare_runs MCP tool
98
+
99
+ Args:
100
+ run_id_1: First run ID from leaderboard
101
+ run_id_2: Second run ID to compare against
102
+ leaderboard_repo: HuggingFace dataset repository
103
+ comparison_focus: Focus area - "comprehensive", "cost", "performance", or "eco_friendly"
104
+
105
+ Returns:
106
+ Markdown-formatted comparison analysis from Gemini
107
+ """
108
+ try:
109
+ client = get_mcp_client()
110
+ result = client.predict(
111
+ run_id_1=run_id_1,
112
+ run_id_2=run_id_2,
113
+ focus=comparison_focus,
114
+ repo=leaderboard_repo,
115
+ api_name="/run_compare_runs"
116
+ )
117
+ return result
118
+ except Exception as e:
119
+ return f"❌ **Error calling compare_runs**: {str(e)}\n\nPlease check:\n- Both run IDs exist in leaderboard\n- MCP server is running\n- Network connectivity"
120
+
121
+
122
+ async def call_analyze_results(
123
+ results_repo: str,
124
+ focus_area: str = "overall",
125
+ max_rows: int = 100
126
+ ) -> str:
127
+ """
128
+ Call the analyze_results MCP tool
129
+
130
+ Args:
131
+ results_repo: HuggingFace dataset repository with results data
132
+ focus_area: Focus area - "overall", "failures", "performance", or "tools"
133
+ max_rows: Maximum number of test cases to analyze
134
+
135
+ Returns:
136
+ Markdown-formatted results analysis from Gemini
137
+ """
138
+ try:
139
+ client = get_mcp_client()
140
+ result = client.predict(
141
+ repo=results_repo,
142
+ focus=focus_area,
143
+ max_rows=max_rows,
144
+ api_name="/run_analyze_results"
145
+ )
146
+ return result
147
+ except Exception as e:
148
+ return f"❌ **Error calling analyze_results**: {str(e)}\n\nPlease check:\n- Results repository exists and is accessible\n- MCP server is running\n- Network connectivity"
149
+
150
+
151
+ def call_analyze_leaderboard_sync(
152
+ leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
153
+ metric_focus: str = "overall",
154
+ time_range: str = "last_week",
155
+ top_n: int = 5
156
+ ) -> str:
157
+ """
158
+ Synchronous version of call_analyze_leaderboard for Gradio event handlers
159
+
160
+ Args:
161
+ leaderboard_repo: HuggingFace dataset repository
162
+ metric_focus: Focus area - "overall", "accuracy", "cost", "latency", or "co2"
163
+ time_range: Time range - "last_week", "last_month", or "all_time"
164
+ top_n: Number of top models to highlight (3-10)
165
+
166
+ Returns:
167
+ Markdown-formatted analysis from Gemini
168
+ """
169
+ try:
170
+ client = get_mcp_client()
171
+ result = client.predict(
172
+ repo=leaderboard_repo,
173
+ metric=metric_focus,
174
+ time_range=time_range,
175
+ top_n=top_n,
176
+ api_name="/run_analyze_leaderboard"
177
+ )
178
+ return result
179
+ except Exception as e:
180
+ return f"❌ **Error calling analyze_leaderboard**: {str(e)}\n\nPlease check:\n- MCP server is running at {MCP_SERVER_URL}\n- Network connectivity\n- API parameters are correct"
181
+
182
+
183
+ def call_debug_trace_sync(
184
+ trace_id: str,
185
+ traces_repo: str,
186
+ question: str = "Analyze this trace and explain what happened"
187
+ ) -> str:
188
+ """
189
+ Synchronous version of call_debug_trace for Gradio event handlers
190
+ """
191
+ try:
192
+ client = get_mcp_client()
193
+ result = client.predict(
194
+ trace_id_val=trace_id,
195
+ traces_repo_val=traces_repo,
196
+ question_val=question,
197
+ api_name="/run_debug_trace"
198
+ )
199
+ return result
200
+ except Exception as e:
201
+ return f"❌ **Error calling debug_trace**: {str(e)}"
202
+
203
+
204
+ def call_compare_runs_sync(
205
+ run_id_1: str,
206
+ run_id_2: str,
207
+ leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
208
+ comparison_focus: str = "comprehensive"
209
+ ) -> str:
210
+ """
211
+ Synchronous version of call_compare_runs for Gradio event handlers
212
+ """
213
+ try:
214
+ client = get_mcp_client()
215
+ result = client.predict(
216
+ run_id_1=run_id_1,
217
+ run_id_2=run_id_2,
218
+ focus=comparison_focus,
219
+ repo=leaderboard_repo,
220
+ api_name="/run_compare_runs"
221
+ )
222
+ return result
223
+ except Exception as e:
224
+ return f"❌ **Error calling compare_runs**: {str(e)}"
225
+
226
+
227
+ def call_analyze_results_sync(
228
+ results_repo: str,
229
+ focus_area: str = "overall",
230
+ max_rows: int = 100
231
+ ) -> str:
232
+ """
233
+ Synchronous version of call_analyze_results for Gradio event handlers
234
+ """
235
+ try:
236
+ client = get_mcp_client()
237
+ result = client.predict(
238
+ repo=results_repo,
239
+ focus=focus_area,
240
+ max_rows=max_rows,
241
+ api_name="/run_analyze_results"
242
+ )
243
+ return result
244
+ except Exception as e:
245
+ return f"❌ **Error calling analyze_results**: {str(e)}"