Spaces:
Running
Running
Mandark-droid
commited on
Commit
·
60b7b04
1
Parent(s):
920ea09
Fix compare screen to handle duplicate run_ids using composite keys
Browse filesUpdated dropdown value generation to use run_id|timestamp composite
keys to ensure unique identification when comparing runs with
duplicate run_ids but different timestamps.
Changes:
- navigate_to_compare(): Use composite keys in dropdown values
- apply_sidebar_filters(): Use composite keys for compare dropdowns
- on_compare_runs(): Parse composite keys and filter by both run_id and timestamp
- app.py +5 -3
- screens/compare.py +31 -3
app.py
CHANGED
|
@@ -532,7 +532,8 @@ def apply_sidebar_filters(selected_model, selected_agent_type):
|
|
| 532 |
compare_choices = []
|
| 533 |
for _, row in df.iterrows():
|
| 534 |
label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
|
| 535 |
-
|
|
|
|
| 536 |
if value:
|
| 537 |
compare_choices.append((label, value))
|
| 538 |
|
|
@@ -1288,11 +1289,12 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
|
|
| 1288 |
try:
|
| 1289 |
leaderboard_df = data_loader.load_leaderboard()
|
| 1290 |
|
| 1291 |
-
# Create run choices for dropdowns (model name with
|
| 1292 |
run_choices = []
|
| 1293 |
for _, row in leaderboard_df.iterrows():
|
| 1294 |
label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
|
| 1295 |
-
|
|
|
|
| 1296 |
if value:
|
| 1297 |
run_choices.append((label, value))
|
| 1298 |
|
|
|
|
| 532 |
compare_choices = []
|
| 533 |
for _, row in df.iterrows():
|
| 534 |
label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
|
| 535 |
+
# Use composite key: run_id|timestamp to ensure uniqueness
|
| 536 |
+
value = f"{row.get('run_id', '')}|{row.get('timestamp', '')}"
|
| 537 |
if value:
|
| 538 |
compare_choices.append((label, value))
|
| 539 |
|
|
|
|
| 1289 |
try:
|
| 1290 |
leaderboard_df = data_loader.load_leaderboard()
|
| 1291 |
|
| 1292 |
+
# Create run choices for dropdowns (model name with composite unique identifier)
|
| 1293 |
run_choices = []
|
| 1294 |
for _, row in leaderboard_df.iterrows():
|
| 1295 |
label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
|
| 1296 |
+
# Use composite key: run_id|timestamp to ensure uniqueness
|
| 1297 |
+
value = f"{row.get('run_id', '')}|{row.get('timestamp', '')}"
|
| 1298 |
if value:
|
| 1299 |
run_choices.append((label, value))
|
| 1300 |
|
screens/compare.py
CHANGED
|
@@ -325,9 +325,37 @@ def on_compare_runs(run_a_id: str, run_b_id: str, leaderboard_df, components: Di
|
|
| 325 |
components['comparison_output']: gr.update(visible=False)
|
| 326 |
}
|
| 327 |
|
| 328 |
-
#
|
| 329 |
-
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
# Create comparison visualizations
|
| 333 |
card_a = create_run_comparison_card(run_a, "A")
|
|
|
|
| 325 |
components['comparison_output']: gr.update(visible=False)
|
| 326 |
}
|
| 327 |
|
| 328 |
+
# Parse composite keys (run_id|timestamp)
|
| 329 |
+
run_a_parts = run_a_id.split('|')
|
| 330 |
+
run_b_parts = run_b_id.split('|')
|
| 331 |
+
|
| 332 |
+
if len(run_a_parts) != 2 or len(run_b_parts) != 2:
|
| 333 |
+
gr.Warning("Invalid run selection")
|
| 334 |
+
return {
|
| 335 |
+
components['comparison_output']: gr.update(visible=False)
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
run_a_id_parsed, run_a_timestamp = run_a_parts
|
| 339 |
+
run_b_id_parsed, run_b_timestamp = run_b_parts
|
| 340 |
+
|
| 341 |
+
# Find the runs in the dataframe using both run_id and timestamp
|
| 342 |
+
run_a_match = leaderboard_df[
|
| 343 |
+
(leaderboard_df['run_id'] == run_a_id_parsed) &
|
| 344 |
+
(leaderboard_df['timestamp'] == run_a_timestamp)
|
| 345 |
+
]
|
| 346 |
+
run_b_match = leaderboard_df[
|
| 347 |
+
(leaderboard_df['run_id'] == run_b_id_parsed) &
|
| 348 |
+
(leaderboard_df['timestamp'] == run_b_timestamp)
|
| 349 |
+
]
|
| 350 |
+
|
| 351 |
+
if run_a_match.empty or run_b_match.empty:
|
| 352 |
+
gr.Warning("Could not find selected runs in leaderboard data")
|
| 353 |
+
return {
|
| 354 |
+
components['comparison_output']: gr.update(visible=False)
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
run_a = run_a_match.iloc[0].to_dict()
|
| 358 |
+
run_b = run_b_match.iloc[0].to_dict()
|
| 359 |
|
| 360 |
# Create comparison visualizations
|
| 361 |
card_a = create_run_comparison_card(run_a, "A")
|