Mandark-droid commited on
Commit
60b7b04
·
1 Parent(s): 920ea09

Fix compare screen to handle duplicate run_ids using composite keys

Browse files

Updated dropdown value generation to use run_id|timestamp composite
keys to ensure unique identification when comparing runs with
duplicate run_ids but different timestamps.

Changes:
- navigate_to_compare(): Use composite keys in dropdown values
- apply_sidebar_filters(): Use composite keys for compare dropdowns
- on_compare_runs(): Parse composite keys and filter by both run_id and timestamp

Files changed (2) hide show
  1. app.py +5 -3
  2. screens/compare.py +31 -3
app.py CHANGED
@@ -532,7 +532,8 @@ def apply_sidebar_filters(selected_model, selected_agent_type):
532
  compare_choices = []
533
  for _, row in df.iterrows():
534
  label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
535
- value = row.get('run_id', '')
 
536
  if value:
537
  compare_choices.append((label, value))
538
 
@@ -1288,11 +1289,12 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1288
  try:
1289
  leaderboard_df = data_loader.load_leaderboard()
1290
 
1291
- # Create run choices for dropdowns (model name with run_id)
1292
  run_choices = []
1293
  for _, row in leaderboard_df.iterrows():
1294
  label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
1295
- value = row.get('run_id', '')
 
1296
  if value:
1297
  run_choices.append((label, value))
1298
 
 
532
  compare_choices = []
533
  for _, row in df.iterrows():
534
  label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
535
+ # Use composite key: run_id|timestamp to ensure uniqueness
536
+ value = f"{row.get('run_id', '')}|{row.get('timestamp', '')}"
537
  if value:
538
  compare_choices.append((label, value))
539
 
 
1289
  try:
1290
  leaderboard_df = data_loader.load_leaderboard()
1291
 
1292
+ # Create run choices for dropdowns (model name with composite unique identifier)
1293
  run_choices = []
1294
  for _, row in leaderboard_df.iterrows():
1295
  label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
1296
+ # Use composite key: run_id|timestamp to ensure uniqueness
1297
+ value = f"{row.get('run_id', '')}|{row.get('timestamp', '')}"
1298
  if value:
1299
  run_choices.append((label, value))
1300
 
screens/compare.py CHANGED
@@ -325,9 +325,37 @@ def on_compare_runs(run_a_id: str, run_b_id: str, leaderboard_df, components: Di
325
  components['comparison_output']: gr.update(visible=False)
326
  }
327
 
328
- # Find the runs in the dataframe
329
- run_a = leaderboard_df[leaderboard_df['run_id'] == run_a_id].iloc[0].to_dict()
330
- run_b = leaderboard_df[leaderboard_df['run_id'] == run_b_id].iloc[0].to_dict()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
  # Create comparison visualizations
333
  card_a = create_run_comparison_card(run_a, "A")
 
325
  components['comparison_output']: gr.update(visible=False)
326
  }
327
 
328
+ # Parse composite keys (run_id|timestamp)
329
+ run_a_parts = run_a_id.split('|')
330
+ run_b_parts = run_b_id.split('|')
331
+
332
+ if len(run_a_parts) != 2 or len(run_b_parts) != 2:
333
+ gr.Warning("Invalid run selection")
334
+ return {
335
+ components['comparison_output']: gr.update(visible=False)
336
+ }
337
+
338
+ run_a_id_parsed, run_a_timestamp = run_a_parts
339
+ run_b_id_parsed, run_b_timestamp = run_b_parts
340
+
341
+ # Find the runs in the dataframe using both run_id and timestamp
342
+ run_a_match = leaderboard_df[
343
+ (leaderboard_df['run_id'] == run_a_id_parsed) &
344
+ (leaderboard_df['timestamp'] == run_a_timestamp)
345
+ ]
346
+ run_b_match = leaderboard_df[
347
+ (leaderboard_df['run_id'] == run_b_id_parsed) &
348
+ (leaderboard_df['timestamp'] == run_b_timestamp)
349
+ ]
350
+
351
+ if run_a_match.empty or run_b_match.empty:
352
+ gr.Warning("Could not find selected runs in leaderboard data")
353
+ return {
354
+ components['comparison_output']: gr.update(visible=False)
355
+ }
356
+
357
+ run_a = run_a_match.iloc[0].to_dict()
358
+ run_b = run_b_match.iloc[0].to_dict()
359
 
360
  # Create comparison visualizations
361
  card_a = create_run_comparison_card(run_a, "A")