Spaces:

MCP-1st-Birthday
/

TraceMind

Running

Mandark-droid commited on 27 days ago

Commit

60b7b04

1 Parent(s): 920ea09

Fix compare screen to handle duplicate run_ids using composite keys

Updated dropdown value generation to use run_id|timestamp composite
keys to ensure unique identification when comparing runs with
duplicate run_ids but different timestamps.

Changes:
- navigate_to_compare(): Use composite keys in dropdown values
- apply_sidebar_filters(): Use composite keys for compare dropdowns
- on_compare_runs(): Parse composite keys and filter by both run_id and timestamp

Files changed (2) hide show

app.py +5 -3
screens/compare.py +31 -3

app.py CHANGED Viewed

@@ -532,7 +532,8 @@ def apply_sidebar_filters(selected_model, selected_agent_type):
     compare_choices = []
     for _, row in df.iterrows():
         label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
-        value = row.get('run_id', '')
         if value:
             compare_choices.append((label, value))
@@ -1288,11 +1289,12 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
             try:
                 leaderboard_df = data_loader.load_leaderboard()
-                # Create run choices for dropdowns (model name with run_id)
                 run_choices = []
                 for _, row in leaderboard_df.iterrows():
                     label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
-                    value = row.get('run_id', '')
                     if value:
                         run_choices.append((label, value))

     compare_choices = []
     for _, row in df.iterrows():
         label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
+        # Use composite key: run_id|timestamp to ensure uniqueness
+        value = f"{row.get('run_id', '')}|{row.get('timestamp', '')}"
         if value:
             compare_choices.append((label, value))
             try:
                 leaderboard_df = data_loader.load_leaderboard()
+                # Create run choices for dropdowns (model name with composite unique identifier)
                 run_choices = []
                 for _, row in leaderboard_df.iterrows():
                     label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
+                    # Use composite key: run_id|timestamp to ensure uniqueness
+                    value = f"{row.get('run_id', '')}|{row.get('timestamp', '')}"
                     if value:
                         run_choices.append((label, value))

screens/compare.py CHANGED Viewed

@@ -325,9 +325,37 @@ def on_compare_runs(run_a_id: str, run_b_id: str, leaderboard_df, components: Di
                 components['comparison_output']: gr.update(visible=False)
             }
-        # Find the runs in the dataframe
-        run_a = leaderboard_df[leaderboard_df['run_id'] == run_a_id].iloc[0].to_dict()
-        run_b = leaderboard_df[leaderboard_df['run_id'] == run_b_id].iloc[0].to_dict()
         # Create comparison visualizations
         card_a = create_run_comparison_card(run_a, "A")

                 components['comparison_output']: gr.update(visible=False)
             }
+        # Parse composite keys (run_id|timestamp)
+        run_a_parts = run_a_id.split('|')
+        run_b_parts = run_b_id.split('|')
+        if len(run_a_parts) != 2 or len(run_b_parts) != 2:
+            gr.Warning("Invalid run selection")
+            return {
+                components['comparison_output']: gr.update(visible=False)
+            }
+        run_a_id_parsed, run_a_timestamp = run_a_parts
+        run_b_id_parsed, run_b_timestamp = run_b_parts
+        # Find the runs in the dataframe using both run_id and timestamp
+        run_a_match = leaderboard_df[
+            (leaderboard_df['run_id'] == run_a_id_parsed) &
+            (leaderboard_df['timestamp'] == run_a_timestamp)
+        ]
+        run_b_match = leaderboard_df[
+            (leaderboard_df['run_id'] == run_b_id_parsed) &
+            (leaderboard_df['timestamp'] == run_b_timestamp)
+        ]
+        if run_a_match.empty or run_b_match.empty:
+            gr.Warning("Could not find selected runs in leaderboard data")
+            return {
+                components['comparison_output']: gr.update(visible=False)
+            }
+        run_a = run_a_match.iloc[0].to_dict()
+        run_b = run_b_match.iloc[0].to_dict()
         # Create comparison visualizations
         card_a = create_run_comparison_card(run_a, "A")