Spaces:

MCP-1st-Birthday
/

TraceMind-mcp-server

Running

Mandark-droid commited on Nov 16

Commit

fbd2ae8

1 Parent(s): 0c0a9f1

fix: Remove all API key parameters from MCP tools - use environment variables only

- Removed hf_token and gemini_api_key parameters from all MCP tool functions
- Removed Settings tab from UI (no more API key inputs)
- All SMOLTRACE datasets are public - no HF token required
- Only GEMINI_API_KEY environment variable needed
- Fixes security vulnerability where API keys were exposed in MCP examples

Security Impact: ZERO risk of API key exposure in MCP examples

Files changed (2) hide show

app.py +18 -188
mcp_tools.py +33 -69

app.py CHANGED Viewed

@@ -80,151 +80,7 @@ def create_gradio_ui():
         **MCP Endpoint (SSE - Deprecated)**: `https://mcp-1st-birthday-tracemind-mcp-server.hf.space/gradio_api/mcp/sse`
         """)
-        # Session state for API keys
-        gemini_key_state = gr.State(value=os.getenv("GEMINI_API_KEY", ""))
-        hf_token_state = gr.State(value=os.getenv("HF_TOKEN", ""))
         with gr.Tabs():
-            # Tab 0: Settings (API Keys)
-            with gr.Tab("⚙️ Settings"):
-                gr.Markdown("""
-                ## 🔑 API Key Configuration
-                Configure your API keys here. These will override environment variables for this session only.
-                **Why configure here?**
-                - No need to set environment variables
-                - Test with different API keys easily
-                - Secure session-only storage (not persisted)
-                **Security Note**: API keys are stored in session state only and are not saved permanently.
-                """)
-                with gr.Row():
-                    with gr.Column():
-                        gr.Markdown("### Google Gemini API Key")
-                        gemini_key_input = gr.Textbox(
-                            label="Gemini API Key",
-                            placeholder="Enter your Google Gemini API key",
-                            type="password",
-                            value=os.getenv("GEMINI_API_KEY", ""),
-                            info="Get your key from: https://aistudio.google.com/app/apikey"
-                        )
-                        gemini_status = gr.Markdown("Status: Using environment variable" if os.getenv("GEMINI_API_KEY") else "⚠️ Status: No API key configured")
-                    with gr.Column():
-                        gr.Markdown("### HuggingFace Token")
-                        hf_token_input = gr.Textbox(
-                            label="HuggingFace Token",
-                            placeholder="Enter your HuggingFace token",
-                            type="password",
-                            value=os.getenv("HF_TOKEN", ""),
-                            info="Get your token from: https://huggingface.co/settings/tokens"
-                        )
-                        hf_status = gr.Markdown("Status: Using environment variable" if os.getenv("HF_TOKEN") else "⚠️ Status: No token configured")
-                with gr.Row():
-                    save_keys_button = gr.Button("💾 Save API Keys for This Session", variant="primary", size="lg")
-                    clear_keys_button = gr.Button("🗑️ Clear Session Keys", variant="secondary")
-                keys_save_status = gr.Markdown("")
-                def save_api_keys(gemini_key, hf_token):
-                    """
-                    Save API keys to session state.
-                    Args:
-                        gemini_key (str): Google Gemini API key
-                        hf_token (str): HuggingFace token
-                    Returns:
-                        tuple: Updated state values and status message
-                    """
-                    status_messages = []
-                    # Validate and save Gemini key
-                    if gemini_key and gemini_key.strip():
-                        try:
-                            # Test the key by creating a client
-                            test_client = GeminiClient(api_key=gemini_key.strip())
-                            gemini_saved = gemini_key.strip()
-                            status_messages.append("✅ Gemini API key validated and saved")
-                        except Exception as e:
-                            gemini_saved = os.getenv("GEMINI_API_KEY", "")
-                            status_messages.append(f"❌ Gemini API key invalid: {str(e)}")
-                    else:
-                        gemini_saved = os.getenv("GEMINI_API_KEY", "")
-                        status_messages.append("ℹ️ Gemini API key cleared (using environment variable if set)")
-                    # Validate and save HF token
-                    if hf_token and hf_token.strip():
-                        hf_saved = hf_token.strip()
-                        status_messages.append("✅ HuggingFace token saved")
-                    else:
-                        hf_saved = os.getenv("HF_TOKEN", "")
-                        status_messages.append("ℹ️ HuggingFace token cleared (using environment variable if set)")
-                    status_markdown = "\n\n".join(status_messages)
-                    return gemini_saved, hf_saved, f"### Save Status\n\n{status_markdown}"
-                def clear_api_keys():
-                    """
-                    Clear session API keys and revert to environment variables.
-                    Returns:
-                        tuple: Cleared state values and status message
-                    """
-                    env_gemini = os.getenv("GEMINI_API_KEY", "")
-                    env_hf = os.getenv("HF_TOKEN", "")
-                    status = "### Keys Cleared\n\nReverted to environment variables.\n\n"
-                    if env_gemini:
-                        status += "✅ Using GEMINI_API_KEY from environment\n\n"
-                    else:
-                        status += "⚠️ No GEMINI_API_KEY in environment\n\n"
-                    if env_hf:
-                        status += "✅ Using HF_TOKEN from environment"
-                    else:
-                        status += "⚠️ No HF_TOKEN in environment"
-                    return env_gemini, env_hf, status
-                save_keys_button.click(
-                    fn=save_api_keys,
-                    inputs=[gemini_key_input, hf_token_input],
-                    outputs=[gemini_key_state, hf_token_state, keys_save_status]
-                )
-                clear_keys_button.click(
-                    fn=clear_api_keys,
-                    inputs=[],
-                    outputs=[gemini_key_state, hf_token_state, keys_save_status]
-                )
-                gr.Markdown("""
-                ---
-                ### How It Works
-                1. **Enter your API keys** in the fields above
-                2. **Click "Save API Keys"** to validate and store them for this session
-                3. **Use any tool** - they will automatically use your configured keys
-                4. **Keys are session-only** - they won't be saved when you close the browser
-                ### Environment Variables (Alternative)
-                You can also set these as environment variables:
-                ```bash
-                export GEMINI_API_KEY="your-key-here"
-                export HF_TOKEN="your-token-here"
-                ```
-                UI-configured keys will always override environment variables.
-                """)
             # Tab 1: Analyze Leaderboard
             with gr.Tab("📊 Analyze Leaderboard"):
                 gr.Markdown("### Get AI-powered insights from evaluation leaderboard")
@@ -258,7 +114,7 @@ def create_gradio_ui():
                     with gr.Column():
                         lb_output = gr.Markdown(label="Analysis Results")
-                async def run_analyze_leaderboard(repo, metric, time_range, top_n, gemini_key, hf_token):
                     """
                     Analyze agent evaluation leaderboard and generate AI-powered insights.
@@ -278,16 +134,11 @@ def create_gradio_ui():
                         str: Markdown-formatted analysis with top performers, trends, and recommendations
                     """
                     try:
-                        # Use user-provided key or fall back to environment variable
-                        api_key = gemini_key if gemini_key and gemini_key.strip() else None
                         result = await analyze_leaderboard(
                             leaderboard_repo=repo,
                             metric_focus=metric,
                             time_range=time_range,
-                            top_n=int(top_n),
-                            hf_token=hf_token if hf_token and hf_token.strip() else None,
-                            gemini_api_key=api_key
                         )
                         return result
                     except Exception as e:
@@ -295,7 +146,7 @@ def create_gradio_ui():
                 lb_button.click(
                     fn=run_analyze_leaderboard,
-                    inputs=[lb_repo, lb_metric, lb_time, lb_top_n, gemini_key_state, hf_token_state],
                     outputs=[lb_output]
                 )
@@ -325,7 +176,7 @@ def create_gradio_ui():
                     with gr.Column():
                         trace_output = gr.Markdown(label="Debug Analysis")
-                async def run_debug_trace(trace_id_val, traces_repo_val, question_val, gemini_key, hf_token):
                     """
                     Debug a specific agent execution trace using OpenTelemetry data.
@@ -347,23 +198,17 @@ def create_gradio_ui():
                         if not trace_id_val or not traces_repo_val:
                             return "❌ **Error**: Please provide both Trace ID and Traces Repository"
-                        # Use user-provided key or fall back to environment variable
-                        api_key = gemini_key if gemini_key and gemini_key.strip() else None
                         result = await debug_trace(
                             trace_id=trace_id_val,
                             traces_repo=traces_repo_val,
-                            question=question_val or "Analyze this trace",
-                            hf_token=hf_token if hf_token and hf_token.strip() else None,
-                            gemini_api_key=api_key
-                        )
                         return result
                     except Exception as e:
                         return f"❌ **Error**: {str(e)}"
                 trace_button.click(
                     fn=run_debug_trace,
-                    inputs=[trace_id, traces_repo, question, gemini_key_state, hf_token_state],
                     outputs=[trace_output]
                 )
@@ -401,7 +246,7 @@ def create_gradio_ui():
                     with gr.Column():
                         cost_output = gr.Markdown(label="Cost Estimate")
-                async def run_estimate_cost(model, agent_type, num_tests, hardware, gemini_key):
                     """
                     Estimate the cost, duration, and CO2 emissions of running agent evaluations.
@@ -423,15 +268,11 @@ def create_gradio_ui():
                         if not model:
                             return "❌ **Error**: Please provide a model name"
-                        # Use user-provided key or fall back to environment variable
-                        api_key = gemini_key if gemini_key and gemini_key.strip() else None
                         result = await estimate_cost(
                             model=model,
                             agent_type=agent_type,
                             num_tests=int(num_tests),
-                            hardware=hardware,
-                            gemini_api_key=api_key
                         )
                         return result
                     except Exception as e:
@@ -439,7 +280,7 @@ def create_gradio_ui():
                 cost_button.click(
                     fn=run_estimate_cost,
-                    inputs=[cost_model, cost_agent_type, cost_num_tests, cost_hardware, gemini_key_state],
                     outputs=[cost_output]
                 )
@@ -482,7 +323,7 @@ def create_gradio_ui():
                 compare_button = gr.Button("🔍 Compare Runs", variant="primary")
                 compare_output = gr.Markdown()
-                async def run_compare_runs(run_id_1, run_id_2, focus, repo, gemini_key, hf_token):
                     """
                     Compare two evaluation runs and generate AI-powered comparative analysis.
@@ -502,16 +343,11 @@ def create_gradio_ui():
                         str: Markdown-formatted comparative analysis with winners, trade-offs, and recommendations
                     """
                     try:
-                        # Use user-provided key or fall back to environment variable
-                        api_key = gemini_key if gemini_key and gemini_key.strip() else None
                         result = await compare_runs(
                             run_id_1=run_id_1,
                             run_id_2=run_id_2,
                             leaderboard_repo=repo,
-                            comparison_focus=focus,
-                            hf_token=hf_token if hf_token and hf_token.strip() else None,
-                            gemini_api_key=api_key
                         )
                         return result
                     except Exception as e:
@@ -519,7 +355,7 @@ def create_gradio_ui():
                 compare_button.click(
                     fn=run_compare_runs,
-                    inputs=[compare_run_id_1, compare_run_id_2, compare_focus, compare_repo, gemini_key_state, hf_token_state],
                     outputs=[compare_output]
                 )
@@ -558,7 +394,7 @@ def create_gradio_ui():
                 results_button = gr.Button("🔍 Analyze Results", variant="primary")
                 results_output = gr.Markdown()
-                async def run_analyze_results(repo, focus, max_rows, gemini_key, hf_token):
                     """
                     Analyze detailed test results and provide optimization recommendations.
@@ -576,15 +412,10 @@ def create_gradio_ui():
                         if not repo:
                             return "❌ **Error**: Please provide a results repository"
-                        # Use user-provided key or fall back to environment variable
-                        api_key = gemini_key if gemini_key and gemini_key.strip() else None
                         result = await analyze_results(
                             results_repo=repo,
                             analysis_focus=focus,
-                            max_rows=int(max_rows),
-                            hf_token=hf_token if hf_token and hf_token.strip() else None,
-                            gemini_api_key=api_key
                         )
                         return result
                     except Exception as e:
@@ -592,7 +423,7 @@ def create_gradio_ui():
                 results_button.click(
                     fn=run_analyze_results,
-                    inputs=[results_repo_input, results_focus, results_max_rows, gemini_key_state, hf_token_state],
                     outputs=[results_output]
                 )
@@ -629,7 +460,7 @@ def create_gradio_ui():
                 dataset_button = gr.Button("📥 Load Dataset", variant="primary")
                 dataset_output = gr.JSON(label="Dataset JSON Output")
-                async def run_get_dataset(repo, max_rows, hf_token):
                     """
                     Load SMOLTRACE datasets from HuggingFace and return as JSON.
@@ -649,8 +480,7 @@ def create_gradio_ui():
                         import json
                         result = await get_dataset(
                             dataset_repo=repo,
-                            max_rows=int(max_rows),
-                            hf_token=hf_token if hf_token and hf_token.strip() else None
                         )
                         # Parse JSON string back to dict for JSON component
                         return json.loads(result)
@@ -659,7 +489,7 @@ def create_gradio_ui():
                 dataset_button.click(
                     fn=run_get_dataset,
-                    inputs=[dataset_repo_input, dataset_max_rows, hf_token_state],
                     outputs=[dataset_output]
                 )

         **MCP Endpoint (SSE - Deprecated)**: `https://mcp-1st-birthday-tracemind-mcp-server.hf.space/gradio_api/mcp/sse`
         """)
         with gr.Tabs():
             # Tab 1: Analyze Leaderboard
             with gr.Tab("📊 Analyze Leaderboard"):
                 gr.Markdown("### Get AI-powered insights from evaluation leaderboard")
                     with gr.Column():
                         lb_output = gr.Markdown(label="Analysis Results")
+                async def run_analyze_leaderboard(repo, metric, time_range, top_n):
                     """
                     Analyze agent evaluation leaderboard and generate AI-powered insights.
                         str: Markdown-formatted analysis with top performers, trends, and recommendations
                     """
                     try:
                         result = await analyze_leaderboard(
                             leaderboard_repo=repo,
                             metric_focus=metric,
                             time_range=time_range,
+                            top_n=int(top_n)
                         )
                         return result
                     except Exception as e:
                 lb_button.click(
                     fn=run_analyze_leaderboard,
+                    inputs=[lb_repo, lb_metric, lb_time, lb_top_n],
                     outputs=[lb_output]
                 )
                     with gr.Column():
                         trace_output = gr.Markdown(label="Debug Analysis")
+                async def run_debug_trace(trace_id_val, traces_repo_val, question_val):
                     """
                     Debug a specific agent execution trace using OpenTelemetry data.
                         if not trace_id_val or not traces_repo_val:
                             return "❌ **Error**: Please provide both Trace ID and Traces Repository"
                         result = await debug_trace(
                             trace_id=trace_id_val,
                             traces_repo=traces_repo_val,
+                            question=question_val or "Analyze this trace")
                         return result
                     except Exception as e:
                         return f"❌ **Error**: {str(e)}"
                 trace_button.click(
                     fn=run_debug_trace,
+                    inputs=[trace_id, traces_repo, question],
                     outputs=[trace_output]
                 )
                     with gr.Column():
                         cost_output = gr.Markdown(label="Cost Estimate")
+                async def run_estimate_cost(model, agent_type, num_tests, hardware):
                     """
                     Estimate the cost, duration, and CO2 emissions of running agent evaluations.
                         if not model:
                             return "❌ **Error**: Please provide a model name"
                         result = await estimate_cost(
                             model=model,
                             agent_type=agent_type,
                             num_tests=int(num_tests),
+                            hardware=hardware
                         )
                         return result
                     except Exception as e:
                 cost_button.click(
                     fn=run_estimate_cost,
+                    inputs=[cost_model, cost_agent_type, cost_num_tests, cost_hardware],
                     outputs=[cost_output]
                 )
                 compare_button = gr.Button("🔍 Compare Runs", variant="primary")
                 compare_output = gr.Markdown()
+                async def run_compare_runs(run_id_1, run_id_2, focus, repo):
                     """
                     Compare two evaluation runs and generate AI-powered comparative analysis.
                         str: Markdown-formatted comparative analysis with winners, trade-offs, and recommendations
                     """
                     try:
                         result = await compare_runs(
                             run_id_1=run_id_1,
                             run_id_2=run_id_2,
                             leaderboard_repo=repo,
+                            comparison_focus=focus
                         )
                         return result
                     except Exception as e:
                 compare_button.click(
                     fn=run_compare_runs,
+                    inputs=[compare_run_id_1, compare_run_id_2, compare_focus, compare_repo],
                     outputs=[compare_output]
                 )
                 results_button = gr.Button("🔍 Analyze Results", variant="primary")
                 results_output = gr.Markdown()
+                async def run_analyze_results(repo, focus, max_rows):
                     """
                     Analyze detailed test results and provide optimization recommendations.
                         if not repo:
                             return "❌ **Error**: Please provide a results repository"
                         result = await analyze_results(
                             results_repo=repo,
                             analysis_focus=focus,
+                            max_rows=int(max_rows)
                         )
                         return result
                     except Exception as e:
                 results_button.click(
                     fn=run_analyze_results,
+                    inputs=[results_repo_input, results_focus, results_max_rows],
                     outputs=[results_output]
                 )
                 dataset_button = gr.Button("📥 Load Dataset", variant="primary")
                 dataset_output = gr.JSON(label="Dataset JSON Output")
+                async def run_get_dataset(repo, max_rows):
                     """
                     Load SMOLTRACE datasets from HuggingFace and return as JSON.
                         import json
                         result = await get_dataset(
                             dataset_repo=repo,
+                            max_rows=int(max_rows)
                         )
                         # Parse JSON string back to dict for JSON component
                         return json.loads(result)
                 dataset_button.click(
                     fn=run_get_dataset,
+                    inputs=[dataset_repo_input, dataset_max_rows],
                     outputs=[dataset_output]
                 )

mcp_tools.py CHANGED Viewed

@@ -27,9 +27,7 @@ async def analyze_leaderboard(
     leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
     metric_focus: str = "overall",
     time_range: str = "last_week",
-    top_n: int = 5,
-    hf_token: Optional[str] = None,
-    gemini_api_key: Optional[str] = None
 ) -> str:
     """
     Answer questions about the leaderboard with AI-powered analysis and insights.
@@ -47,26 +45,25 @@ async def analyze_leaderboard(
     agent evaluation results, including top performers, trends, cost/performance
     trade-offs, and actionable recommendations.
     Args:
         leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
         metric_focus (str): Primary metric to focus analysis on. Options: "overall", "accuracy", "cost", "latency", "co2". Default: "overall"
         time_range (str): Time range for analysis. Options: "last_week", "last_month", "all_time". Default: "last_week"
         top_n (int): Number of top models to highlight in analysis. Must be between 3 and 10. Default: 5
-        hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
-        gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
     Returns:
         str: Markdown-formatted analysis with top performers, insights, trade-offs, and recommendations
     """
     try:
-        # Initialize Gemini client with provided key or from environment
-        gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
-        # Load leaderboard data from HuggingFace
         print(f"Loading leaderboard from {leaderboard_repo}...")
-        # Use user-provided token or fall back to environment variable
-        token = hf_token if hf_token else os.getenv("HF_TOKEN")
-        ds = load_dataset(leaderboard_repo, split="train", token=token)
         df = pd.DataFrame(ds)
         # Filter by time range
@@ -135,9 +132,7 @@ async def analyze_leaderboard(
 async def debug_trace(
     trace_id: str,
     traces_repo: str,
-    question: str = "Analyze this trace and explain what happened",
-    hf_token: Optional[str] = None,
-    gemini_api_key: Optional[str] = None
 ) -> str:
     """
     Answer questions about agent traces with AI-powered debugging and analysis.
@@ -159,21 +154,16 @@ async def debug_trace(
         trace_id (str): Unique identifier for the trace to analyze (e.g., "trace_abc123")
         traces_repo (str): HuggingFace dataset repository containing trace data (e.g., "username/agent-traces-model-timestamp")
         question (str): Specific question about the trace. Default: "Analyze this trace and explain what happened"
-        hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
-        gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
     Returns:
         str: Markdown-formatted debug analysis with step-by-step breakdown, timing information, and answer to the question
     """
     try:
         # Initialize Gemini client with provided key or from environment
-        gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
-        # Load traces dataset
         print(f"Loading traces from {traces_repo}...")
-        # Use user-provided token or fall back to environment variable
-        token = hf_token if hf_token else os.getenv("HF_TOKEN")
-        ds = load_dataset(traces_repo, split="train", token=token)
         df = pd.DataFrame(ds)
         # Find the specific trace
@@ -243,8 +233,7 @@ async def estimate_cost(
     model: str,
     agent_type: str,
     num_tests: int = 100,
-    hardware: str = "auto",
-    gemini_api_key: Optional[str] = None
 ) -> str:
     """
     Answer questions about evaluation costs with AI-powered estimates and recommendations.
@@ -267,14 +256,12 @@ async def estimate_cost(
         agent_type (str): Type of agent capabilities to test. Options: "tool", "code", "both"
         num_tests (int): Number of test cases to run. Must be between 10 and 1000. Default: 100
         hardware (str): Hardware type for HuggingFace Jobs. Options: "auto", "cpu", "gpu_a10", "gpu_h200". Default: "auto"
-        gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
     Returns:
         str: Markdown-formatted cost estimate with breakdown of LLM costs, HF Jobs costs, duration, CO2 emissions, and optimization tips
     """
     try:
         # Initialize Gemini client with provided key or from environment
-        gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
         # Determine if API or local model
         is_api_model = any(provider in model.lower() for provider in ["openai", "anthropic", "google", "cohere"])
@@ -378,9 +365,7 @@ async def compare_runs(
     run_id_1: str,
     run_id_2: str,
     leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
-    comparison_focus: str = "comprehensive",
-    hf_token: Optional[str] = None,
-    gemini_api_key: Optional[str] = None
 ) -> str:
     """
     Compare two evaluation runs and generate AI-powered comparative analysis.
@@ -394,19 +379,13 @@ async def compare_runs(
         run_id_2 (str): Second run ID to compare
         leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
         comparison_focus (str): Focus area for comparison. Options: "comprehensive", "cost", "performance", "eco_friendly". Default: "comprehensive"
-        hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
-        gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
     Returns:
         str: Markdown-formatted comparative analysis with winner for each category, trade-offs, and use case recommendations
     """
     try:
         # Initialize Gemini client with provided key or from environment
-        gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
-        # Load leaderboard data
-        # Use user-provided token or fall back to environment variable
-        token = hf_token if hf_token else os.getenv("HF_TOKEN")
-        dataset = load_dataset(leaderboard_repo, split="train", token=token)
         df = pd.DataFrame(dataset)
         # Find the two runs
@@ -580,9 +559,7 @@ Provide eco-conscious recommendations for sustainable AI deployment.
 async def analyze_results(
     results_repo: str,
     analysis_focus: str = "comprehensive",
-    max_rows: int = 100,
-    hf_token: Optional[str] = None,
-    gemini_api_key: Optional[str] = None
 ) -> str:
     """
     Analyze detailed test results and provide optimization recommendations.
@@ -601,20 +578,17 @@ async def analyze_results(
         results_repo (str): HuggingFace dataset repository containing results (e.g., "username/smoltrace-results-gpt4-20251114")
         analysis_focus (str): Focus area. Options: "failures", "performance", "cost", "comprehensive". Default: "comprehensive"
         max_rows (int): Maximum test cases to analyze. Default: 100. Range: 10-500
-        hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
-        gemini_api_key (Optional[str]): Google Gemini API key. If None, uses GEMINI_API_KEY environment variable.
     Returns:
         str: Markdown-formatted analysis with failure patterns, performance insights, cost analysis, and optimization recommendations
     """
     try:
         # Initialize Gemini client
-        gemini_client = GeminiClient(api_key=gemini_api_key) if gemini_api_key else GeminiClient()
         # Load results dataset
         print(f"Loading results from {results_repo}...")
-        token = hf_token if hf_token else os.getenv("HF_TOKEN")
-        ds = load_dataset(results_repo, split="train", token=token)
         df = pd.DataFrame(ds)
         if df.empty:
@@ -727,8 +701,7 @@ async def analyze_results(
 @gr.mcp.tool()
 async def get_dataset(
     dataset_repo: str,
-    max_rows: int = 50,
-    hf_token: Optional[str] = None
 ) -> str:
     """
     Load SMOLTRACE datasets from HuggingFace and return as JSON.
@@ -748,8 +721,6 @@ async def get_dataset(
     Args:
         dataset_repo (str): HuggingFace dataset repository path with "smoltrace-" prefix (e.g., "kshitijthakkar/smoltrace-leaderboard")
         max_rows (int): Maximum number of rows to return. Default: 50. Range: 1-200
-        hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
     Returns:
         str: JSON object with dataset data and metadata
     """
@@ -762,10 +733,7 @@ async def get_dataset(
                 "data": []
             }, indent=2, default=str)
-        # Load dataset from HuggingFace
-        # Use user-provided token or fall back to environment variable
-        token = hf_token if hf_token else os.getenv("HF_TOKEN")
-        dataset = load_dataset(dataset_repo, split="train", token=token)
         df = pd.DataFrame(dataset)
         if df.empty:
@@ -815,7 +783,7 @@ async def get_dataset(
 # ============================================================================
 @gr.mcp.resource("leaderboard://{repo}")
-def get_leaderboard_data(repo: str = "kshitijthakkar/smoltrace-leaderboard", hf_token: Optional[str] = None) -> str:
     """
     [RAW DATA ONLY] Get raw leaderboard data in JSON format - NO analysis or insights.
@@ -829,17 +797,15 @@ def get_leaderboard_data(repo: str = "kshitijthakkar/smoltrace-leaderboard", hf_
     For questions, insights, recommendations, or analysis → use analyze_leaderboard tool instead!
     Args:
         repo (str): HuggingFace dataset repository name. Default: "kshitijthakkar/smoltrace-leaderboard"
-        hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
     Returns:
         str: Raw JSON string containing all evaluation runs without any analysis
     """
-    try:
-        # Use user-provided token or fall back to environment variable
-        token = hf_token if hf_token else os.getenv("HF_TOKEN")
-        ds = load_dataset(repo, split="train", token=token)
         df = pd.DataFrame(ds)
         # Convert to JSON with proper formatting
@@ -858,7 +824,7 @@ def get_leaderboard_data(repo: str = "kshitijthakkar/smoltrace-leaderboard", hf_
 @gr.mcp.resource("trace://{trace_id}/{repo}")
-def get_trace_data(trace_id: str, repo: str, hf_token: Optional[str] = None) -> str:
     """
     [RAW DATA ONLY] Get raw OpenTelemetry trace data in JSON format - NO analysis.
@@ -872,18 +838,16 @@ def get_trace_data(trace_id: str, repo: str, hf_token: Optional[str] = None) ->
     For debugging, questions, or analysis → use debug_trace tool instead!
     Args:
         trace_id (str): Unique identifier for the trace (e.g., "trace_abc123")
         repo (str): HuggingFace dataset repository containing traces (e.g., "username/agent-traces-model")
-        hf_token (Optional[str]): HuggingFace token for dataset access. If None, uses HF_TOKEN environment variable.
     Returns:
         str: Raw JSON string containing OpenTelemetry spans without any analysis
     """
-    try:
-        # Use user-provided token or fall back to environment variable
-        token = hf_token if hf_token else os.getenv("HF_TOKEN")
-        ds = load_dataset(repo, split="train", token=token)
         df = pd.DataFrame(ds)
         # Find specific trace

     leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
     metric_focus: str = "overall",
     time_range: str = "last_week",
+    top_n: int = 5
 ) -> str:
     """
     Answer questions about the leaderboard with AI-powered analysis and insights.
     agent evaluation results, including top performers, trends, cost/performance
     trade-offs, and actionable recommendations.
+    **Security**: Requires GEMINI_API_KEY environment variable.
+    **Note**: All SMOLTRACE datasets are public - no HF token required.
     Args:
         leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
         metric_focus (str): Primary metric to focus analysis on. Options: "overall", "accuracy", "cost", "latency", "co2". Default: "overall"
         time_range (str): Time range for analysis. Options: "last_week", "last_month", "all_time". Default: "last_week"
         top_n (int): Number of top models to highlight in analysis. Must be between 3 and 10. Default: 5
     Returns:
         str: Markdown-formatted analysis with top performers, insights, trade-offs, and recommendations
     """
     try:
+        # Initialize Gemini client from environment variable only
+        gemini_client = GeminiClient()
+        # Load leaderboard data from HuggingFace (public dataset)
         print(f"Loading leaderboard from {leaderboard_repo}...")
+        ds = load_dataset(leaderboard_repo, split="train")
         df = pd.DataFrame(ds)
         # Filter by time range
 async def debug_trace(
     trace_id: str,
     traces_repo: str,
+    question: str = "Analyze this trace and explain what happened"
 ) -> str:
     """
     Answer questions about agent traces with AI-powered debugging and analysis.
         trace_id (str): Unique identifier for the trace to analyze (e.g., "trace_abc123")
         traces_repo (str): HuggingFace dataset repository containing trace data (e.g., "username/agent-traces-model-timestamp")
         question (str): Specific question about the trace. Default: "Analyze this trace and explain what happened"
     Returns:
         str: Markdown-formatted debug analysis with step-by-step breakdown, timing information, and answer to the question
     """
     try:
         # Initialize Gemini client with provided key or from environment
+        gemini_client = GeminiClient()
+        # Load traces dataset (public dataset)
         print(f"Loading traces from {traces_repo}...")
+        ds = load_dataset(traces_repo, split="train")
         df = pd.DataFrame(ds)
         # Find the specific trace
     model: str,
     agent_type: str,
     num_tests: int = 100,
+    hardware: str = "auto"
 ) -> str:
     """
     Answer questions about evaluation costs with AI-powered estimates and recommendations.
         agent_type (str): Type of agent capabilities to test. Options: "tool", "code", "both"
         num_tests (int): Number of test cases to run. Must be between 10 and 1000. Default: 100
         hardware (str): Hardware type for HuggingFace Jobs. Options: "auto", "cpu", "gpu_a10", "gpu_h200". Default: "auto"
     Returns:
         str: Markdown-formatted cost estimate with breakdown of LLM costs, HF Jobs costs, duration, CO2 emissions, and optimization tips
     """
     try:
         # Initialize Gemini client with provided key or from environment
+        gemini_client = GeminiClient()
         # Determine if API or local model
         is_api_model = any(provider in model.lower() for provider in ["openai", "anthropic", "google", "cohere"])
     run_id_1: str,
     run_id_2: str,
     leaderboard_repo: str = "kshitijthakkar/smoltrace-leaderboard",
+    comparison_focus: str = "comprehensive"
 ) -> str:
     """
     Compare two evaluation runs and generate AI-powered comparative analysis.
         run_id_2 (str): Second run ID to compare
         leaderboard_repo (str): HuggingFace dataset repository containing leaderboard data. Default: "kshitijthakkar/smoltrace-leaderboard"
         comparison_focus (str): Focus area for comparison. Options: "comprehensive", "cost", "performance", "eco_friendly". Default: "comprehensive"
     Returns:
         str: Markdown-formatted comparative analysis with winner for each category, trade-offs, and use case recommendations
     """
     try:
         # Initialize Gemini client with provided key or from environment
+        gemini_client = GeminiClient()
+        # Load leaderboard data        dataset = load_dataset(leaderboard_repo, split="train")
         df = pd.DataFrame(dataset)
         # Find the two runs
 async def analyze_results(
     results_repo: str,
     analysis_focus: str = "comprehensive",
+    max_rows: int = 100
 ) -> str:
     """
     Analyze detailed test results and provide optimization recommendations.
         results_repo (str): HuggingFace dataset repository containing results (e.g., "username/smoltrace-results-gpt4-20251114")
         analysis_focus (str): Focus area. Options: "failures", "performance", "cost", "comprehensive". Default: "comprehensive"
         max_rows (int): Maximum test cases to analyze. Default: 100. Range: 10-500
     Returns:
         str: Markdown-formatted analysis with failure patterns, performance insights, cost analysis, and optimization recommendations
     """
     try:
         # Initialize Gemini client
+        gemini_client = GeminiClient()
         # Load results dataset
         print(f"Loading results from {results_repo}...")
+        ds = load_dataset(results_repo, split="train")
         df = pd.DataFrame(ds)
         if df.empty:
 @gr.mcp.tool()
 async def get_dataset(
     dataset_repo: str,
+    max_rows: int = 50
 ) -> str:
     """
     Load SMOLTRACE datasets from HuggingFace and return as JSON.
     Args:
         dataset_repo (str): HuggingFace dataset repository path with "smoltrace-" prefix (e.g., "kshitijthakkar/smoltrace-leaderboard")
         max_rows (int): Maximum number of rows to return. Default: 50. Range: 1-200
     Returns:
         str: JSON object with dataset data and metadata
     """
                 "data": []
             }, indent=2, default=str)
+        # Load dataset from HuggingFace        dataset = load_dataset(dataset_repo, split="train")
         df = pd.DataFrame(dataset)
         if df.empty:
 # ============================================================================
 @gr.mcp.resource("leaderboard://{repo}")
+def get_leaderboard_data(repo: str = "kshitijthakkar/smoltrace-leaderboard") -> str:
     """
     [RAW DATA ONLY] Get raw leaderboard data in JSON format - NO analysis or insights.
     For questions, insights, recommendations, or analysis → use analyze_leaderboard tool instead!
+    **Note**: All SMOLTRACE datasets are public - no authentication required.
     Args:
         repo (str): HuggingFace dataset repository name. Default: "kshitijthakkar/smoltrace-leaderboard"
     Returns:
         str: Raw JSON string containing all evaluation runs without any analysis
     """
+    try:
+        ds = load_dataset(repo, split="train")
         df = pd.DataFrame(ds)
         # Convert to JSON with proper formatting
 @gr.mcp.resource("trace://{trace_id}/{repo}")
+def get_trace_data(trace_id: str, repo: str) -> str:
     """
     [RAW DATA ONLY] Get raw OpenTelemetry trace data in JSON format - NO analysis.
     For debugging, questions, or analysis → use debug_trace tool instead!
+    **Note**: All SMOLTRACE datasets are public - no authentication required.
     Args:
         trace_id (str): Unique identifier for the trace (e.g., "trace_abc123")
         repo (str): HuggingFace dataset repository containing traces (e.g., "username/agent-traces-model")
     Returns:
         str: Raw JSON string containing OpenTelemetry spans without any analysis
     """
+    try:
+        ds = load_dataset(repo, split="train")
         df = pd.DataFrame(ds)
         # Find specific trace