Mandark-droid commited on
Commit
84a5f9c
·
1 Parent(s): efe1cbf

Fix: Remove gr.State parameter from MCP tools to resolve ASGI errors

Browse files

- Changed gemini_client parameter from GeminiClient object to optional gemini_api_key string
- MCP tools now initialize GeminiClient internally using provided key or environment variable
- This fixes the ASGI protocol error when Claude Desktop calls the MCP tools
- All 4 affected tools updated: analyze_leaderboard, debug_trace, estimate_cost, compare_runs
- Updated app.py wrapper functions to pass gemini_api_key instead of gemini_client object
- Added .gradio/ to .gitignore

Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +16 -36
  3. mcp_tools.py +20 -12
.gitignore CHANGED
@@ -29,6 +29,7 @@ Thumbs.db
29
  # Gradio
30
  flagged/
31
  gradio_cached_examples/
 
32
 
33
  # Logs
34
  *.log
 
29
  # Gradio
30
  flagged/
31
  gradio_cached_examples/
32
+ .gradio/
33
 
34
  # Logs
35
  *.log
app.py CHANGED
@@ -267,21 +267,16 @@ def create_gradio_ui():
267
  str: Markdown-formatted analysis with top performers, trends, and recommendations
268
  """
269
  try:
270
- # Create GeminiClient with user-provided key or fallback to default
271
- if gemini_key and gemini_key.strip():
272
- client = GeminiClient(api_key=gemini_key)
273
- elif default_gemini_client:
274
- client = default_gemini_client
275
- else:
276
- return "❌ **Error**: No Gemini API key configured. Please set it in the Settings tab."
277
 
278
  result = await analyze_leaderboard(
279
- gemini_client=client,
280
  leaderboard_repo=repo,
281
  metric_focus=metric,
282
  time_range=time_range,
283
  top_n=int(top_n),
284
- hf_token=hf_token if hf_token and hf_token.strip() else None
 
285
  )
286
  return result
287
  except Exception as e:
@@ -341,20 +336,15 @@ def create_gradio_ui():
341
  if not trace_id_val or not traces_repo_val:
342
  return "❌ **Error**: Please provide both Trace ID and Traces Repository"
343
 
344
- # Create GeminiClient with user-provided key or fallback to default
345
- if gemini_key and gemini_key.strip():
346
- client = GeminiClient(api_key=gemini_key)
347
- elif default_gemini_client:
348
- client = default_gemini_client
349
- else:
350
- return "❌ **Error**: No Gemini API key configured. Please set it in the Settings tab."
351
 
352
  result = await debug_trace(
353
- gemini_client=client,
354
  trace_id=trace_id_val,
355
  traces_repo=traces_repo_val,
356
  question=question_val or "Analyze this trace",
357
- hf_token=hf_token if hf_token and hf_token.strip() else None
 
358
  )
359
  return result
360
  except Exception as e:
@@ -422,20 +412,15 @@ def create_gradio_ui():
422
  if not model:
423
  return "❌ **Error**: Please provide a model name"
424
 
425
- # Create GeminiClient with user-provided key or fallback to default
426
- if gemini_key and gemini_key.strip():
427
- client = GeminiClient(api_key=gemini_key)
428
- elif default_gemini_client:
429
- client = default_gemini_client
430
- else:
431
- return "❌ **Error**: No Gemini API key configured. Please set it in the Settings tab."
432
 
433
  result = await estimate_cost(
434
- gemini_client=client,
435
  model=model,
436
  agent_type=agent_type,
437
  num_tests=int(num_tests),
438
- hardware=hardware
 
439
  )
440
  return result
441
  except Exception as e:
@@ -506,21 +491,16 @@ def create_gradio_ui():
506
  str: Markdown-formatted comparative analysis with winners, trade-offs, and recommendations
507
  """
508
  try:
509
- # Create GeminiClient with user-provided key or fallback to default
510
- if gemini_key and gemini_key.strip():
511
- client = GeminiClient(api_key=gemini_key)
512
- elif default_gemini_client:
513
- client = default_gemini_client
514
- else:
515
- return "❌ **Error**: No Gemini API key configured. Please set it in the Settings tab."
516
 
517
  result = await compare_runs(
518
- gemini_client=client,
519
  run_id_1=run_id_1,
520
  run_id_2=run_id_2,
521
  leaderboard_repo=repo,
522
  comparison_focus=focus,
523
- hf_token=hf_token if hf_token and hf_token.strip() else None
 
524
  )
525
  return result
526
  except Exception as e:
 
267
  str: Markdown-formatted analysis with top performers, trends, and recommendations
268
  """
269
  try:
270
+ # Use user-provided key or fall back to environment variable
271
+ api_key = gemini_key if gemini_key and gemini_key.strip() else None
 
 
 
 
 
272
 
273
  result = await analyze_leaderboard(
 
274
  leaderboard_repo=repo,
275
  metric_focus=metric,
276
  time_range=time_range,
277
  top_n=int(top_n),
278
+ hf_token=hf_token if hf_token and hf_token.strip() else None,
279
+ gemini_api_key=api_key
280
  )
281
  return result
282
  except Exception as e:
 
336
  if not trace_id_val or not traces_repo_val:
337
  return "❌ **Error**: Please provide both Trace ID and Traces Repository"
338
 
339
+ # Use user-provided key or fall back to environment variable
340
+ api_key = gemini_key if gemini_key and gemini_key.strip() else None
 
 
 
 
 
341
 
342
  result = await debug_trace(
 
343
  trace_id=trace_id_val,
344
  traces_repo=traces_repo_val,
345
  question=question_val or "Analyze this trace",
346
+ hf_token=hf_token if hf_token and hf_token.strip() else None,
347
+ gemini_api_key=api_key
348
  )
349
  return result
350
  except Exception as e:
 
412
  if not model:
413
  return "❌ **Error**: Please provide a model name"
414
 
415
+ # Use user-provided key or fall back to environment variable
416
+ api_key = gemini_key if gemini_key and gemini_key.strip() else None
 
 
 
 
 
417
 
418
  result = await estimate_cost(
 
419
  model=model,
420
  agent_type=agent_type,
421
  num_tests=int(num_tests),
422
+ hardware=hardware,
423
+ gemini_api_key=api_key
424
  )
425
  return result
426
  except Exception as e:
 
491
  str: Markdown-formatted comparative analysis with winners, trade-offs, and recommendations
492
  """
493
  try:
494
+ # Use user-provided key or fall back to environment variable
495
+ api_key = gemini_key if gemini_key and gemini_key.strip() else None
 
 
 
 
 
496
 
497
  result = await compare_runs(
 
498
  run_id_1=run_id_1,
499
  run_id_2=run_id_2,
500
  leaderboard_repo=repo,
501
  comparison_focus=focus,
502
+ hf_token=hf_token if hf_token and hf_token.strip() else None,
503
+ gemini_api_key=api_key
504
  )
505
  return result
506
  except Exception as e:
mcp_tools.py CHANGED
@@ -23,12 +23,12 @@ from gemini_client import GeminiClient
23
 
24
 
25
  async def analyze_leaderboard(
26
- gemini_client: GeminiClient,
27
  leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
28
  metric_focus: str = "overall",
29
  time_range: str = "last_week",
30
  top_n: int = 5,
31
- hf_token: Optional[str] = None
 
32
  ) -> str:
33
  """
34
  Analyze evaluation leaderboard and generate AI-powered insights.
@@ -38,17 +38,19 @@ async def analyze_leaderboard(
38
  trends, cost/performance trade-offs, and actionable recommendations.
39
 
40
  Args:
41
- gemini_client (GeminiClient): Initialized Gemini client for AI analysis
42
  leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
43
  metric_focus (str): Primary metric to focus analysis on. Options: "overall", "accuracy", "cost", "latency", "co2". Default: "overall"
44
  time_range (str): Time range for analysis. Options: "last_week", "last_month", "all_time". Default: "last_week"
45
  top_n (int): Number of top models to highlight in analysis. Must be between 3 and 10. Default: 5
46
  hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
 
47
 
48
  Returns:
49
  str: Markdown-formatted analysis with top performers, insights, trade-offs, and recommendations
50
  """
51
  try:
 
 
52
  # Load leaderboard data from HuggingFace
53
  print(f"Loading leaderboard from {leaderboard_repo}...")
54
 
@@ -120,11 +122,11 @@ async def analyze_leaderboard(
120
 
121
 
122
  async def debug_trace(
123
- gemini_client: GeminiClient,
124
  trace_id: str,
125
  traces_repo: str,
126
  question: str = "Analyze this trace and explain what happened",
127
- hf_token: Optional[str] = None
 
128
  ) -> str:
129
  """
130
  Debug a specific agent execution trace using OpenTelemetry data.
@@ -134,16 +136,18 @@ async def debug_trace(
134
  identify bottlenecks, and explain agent behavior.
135
 
136
  Args:
137
- gemini_client (GeminiClient): Initialized Gemini client for AI analysis
138
  trace_id (str): Unique identifier for the trace to analyze (e.g., "trace_abc123")
139
  traces_repo (str): HuggingFace dataset repository containing trace data (e.g., "username/agent-traces-model-timestamp")
140
  question (str): Specific question about the trace. Default: "Analyze this trace and explain what happened"
141
  hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
 
142
 
143
  Returns:
144
  str: Markdown-formatted debug analysis with step-by-step breakdown, timing information, and answer to the question
145
  """
146
  try:
 
 
147
  # Load traces dataset
148
  print(f"Loading traces from {traces_repo}...")
149
 
@@ -215,11 +219,11 @@ async def debug_trace(
215
 
216
 
217
  async def estimate_cost(
218
- gemini_client: GeminiClient,
219
  model: str,
220
  agent_type: str,
221
  num_tests: int = 100,
222
- hardware: str = "auto"
 
223
  ) -> str:
224
  """
225
  Estimate the cost, duration, and CO2 emissions of running agent evaluations.
@@ -229,16 +233,18 @@ async def estimate_cost(
229
  to provide cost breakdown and optimization recommendations.
230
 
231
  Args:
232
- gemini_client (GeminiClient): Initialized Gemini client for AI analysis
233
  model (str): Model identifier in litellm format (e.g., "openai/gpt-4", "meta-llama/Llama-3.1-8B")
234
  agent_type (str): Type of agent capabilities to test. Options: "tool", "code", "both"
235
  num_tests (int): Number of test cases to run. Must be between 10 and 1000. Default: 100
236
  hardware (str): Hardware type for HuggingFace Jobs. Options: "auto", "cpu", "gpu_a10", "gpu_h200". Default: "auto"
 
237
 
238
  Returns:
239
  str: Markdown-formatted cost estimate with breakdown of LLM costs, HF Jobs costs, duration, CO2 emissions, and optimization tips
240
  """
241
  try:
 
 
242
  # Determine if API or local model
243
  is_api_model = any(provider in model.lower() for provider in ["openai", "anthropic", "google", "cohere"])
244
 
@@ -338,12 +344,12 @@ async def estimate_cost(
338
 
339
 
340
  async def compare_runs(
341
- gemini_client: GeminiClient,
342
  run_id_1: str,
343
  run_id_2: str,
344
  leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
345
  comparison_focus: str = "comprehensive",
346
- hf_token: Optional[str] = None
 
347
  ) -> str:
348
  """
349
  Compare two evaluation runs and generate AI-powered comparative analysis.
@@ -353,17 +359,19 @@ async def compare_runs(
353
  success rate, cost efficiency, speed, environmental impact, and use case recommendations.
354
 
355
  Args:
356
- gemini_client (GeminiClient): Initialized Gemini client for AI analysis
357
  run_id_1 (str): First run ID to compare
358
  run_id_2 (str): Second run ID to compare
359
  leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
360
  comparison_focus (str): Focus area for comparison. Options: "comprehensive", "cost", "performance", "eco_friendly". Default: "comprehensive"
361
  hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
 
362
 
363
  Returns:
364
  str: Markdown-formatted comparative analysis with winner for each category, trade-offs, and use case recommendations
365
  """
366
  try:
 
 
367
  # Load leaderboard data
368
  # Use user-provided token or fall back to environment variable
369
  token = hf_token if hf_token else os.getenv("HF_TOKEN")
 
23
 
24
 
25
  async def analyze_leaderboard(
 
26
  leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
27
  metric_focus: str = "overall",
28
  time_range: str = "last_week",
29
  top_n: int = 5,
30
+ hf_token: Optional[str] = None,
31
+ gemini_api_key: Optional[str] = None
32
  ) -> str:
33
  """
34
  Analyze evaluation leaderboard and generate AI-powered insights.
 
38
  trends, cost/performance trade-offs, and actionable recommendations.
39
 
40
  Args:
 
41
  leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
42
  metric_focus (str): Primary metric to focus analysis on. Options: "overall", "accuracy", "cost", "latency", "co2". Default: "overall"
43
  time_range (str): Time range for analysis. Options: "last_week", "last_month", "all_time". Default: "last_week"
44
  top_n (int): Number of top models to highlight in analysis. Must be between 3 and 10. Default: 5
45
  hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
46
+ gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
47
 
48
  Returns:
49
  str: Markdown-formatted analysis with top performers, insights, trade-offs, and recommendations
50
  """
51
  try:
52
+ # Initialize Gemini client with provided key or from environment
53
+ gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
54
  # Load leaderboard data from HuggingFace
55
  print(f"Loading leaderboard from {leaderboard_repo}...")
56
 
 
122
 
123
 
124
  async def debug_trace(
 
125
  trace_id: str,
126
  traces_repo: str,
127
  question: str = "Analyze this trace and explain what happened",
128
+ hf_token: Optional[str] = None,
129
+ gemini_api_key: Optional[str] = None
130
  ) -> str:
131
  """
132
  Debug a specific agent execution trace using OpenTelemetry data.
 
136
  identify bottlenecks, and explain agent behavior.
137
 
138
  Args:
 
139
  trace_id (str): Unique identifier for the trace to analyze (e.g., "trace_abc123")
140
  traces_repo (str): HuggingFace dataset repository containing trace data (e.g., "username/agent-traces-model-timestamp")
141
  question (str): Specific question about the trace. Default: "Analyze this trace and explain what happened"
142
  hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
143
+ gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
144
 
145
  Returns:
146
  str: Markdown-formatted debug analysis with step-by-step breakdown, timing information, and answer to the question
147
  """
148
  try:
149
+ # Initialize Gemini client with provided key or from environment
150
+ gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
151
  # Load traces dataset
152
  print(f"Loading traces from {traces_repo}...")
153
 
 
219
 
220
 
221
  async def estimate_cost(
 
222
  model: str,
223
  agent_type: str,
224
  num_tests: int = 100,
225
+ hardware: str = "auto",
226
+ gemini_api_key: Optional[str] = None
227
  ) -> str:
228
  """
229
  Estimate the cost, duration, and CO2 emissions of running agent evaluations.
 
233
  to provide cost breakdown and optimization recommendations.
234
 
235
  Args:
 
236
  model (str): Model identifier in litellm format (e.g., "openai/gpt-4", "meta-llama/Llama-3.1-8B")
237
  agent_type (str): Type of agent capabilities to test. Options: "tool", "code", "both"
238
  num_tests (int): Number of test cases to run. Must be between 10 and 1000. Default: 100
239
  hardware (str): Hardware type for HuggingFace Jobs. Options: "auto", "cpu", "gpu_a10", "gpu_h200". Default: "auto"
240
+ gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
241
 
242
  Returns:
243
  str: Markdown-formatted cost estimate with breakdown of LLM costs, HF Jobs costs, duration, CO2 emissions, and optimization tips
244
  """
245
  try:
246
+ # Initialize Gemini client with provided key or from environment
247
+ gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
248
  # Determine if API or local model
249
  is_api_model = any(provider in model.lower() for provider in ["openai", "anthropic", "google", "cohere"])
250
 
 
344
 
345
 
346
  async def compare_runs(
 
347
  run_id_1: str,
348
  run_id_2: str,
349
  leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
350
  comparison_focus: str = "comprehensive",
351
+ hf_token: Optional[str] = None,
352
+ gemini_api_key: Optional[str] = None
353
  ) -> str:
354
  """
355
  Compare two evaluation runs and generate AI-powered comparative analysis.
 
359
  success rate, cost efficiency, speed, environmental impact, and use case recommendations.
360
 
361
  Args:
 
362
  run_id_1 (str): First run ID to compare
363
  run_id_2 (str): Second run ID to compare
364
  leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
365
  comparison_focus (str): Focus area for comparison. Options: "comprehensive", "cost", "performance", "eco_friendly". Default: "comprehensive"
366
  hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
367
+ gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
368
 
369
  Returns:
370
  str: Markdown-formatted comparative analysis with winner for each category, trade-offs, and use case recommendations
371
  """
372
  try:
373
+ # Initialize Gemini client with provided key or from environment
374
+ gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
375
  # Load leaderboard data
376
  # Use user-provided token or fall back to environment variable
377
  token = hf_token if hf_token else os.getenv("HF_TOKEN")