Mandark-droid commited on
Commit
920ea09
Β·
1 Parent(s): 5c51b47

Fix filter functionality and align with MockTraceMind structure

Browse files

- Rename sidebar_model_filter to model_filter for consistency
- Add agent_type_filter to Leaderboard tab with proper info text
- Update DrillDown tab filters with sort controls and info panels
- Add apply_leaderboard_filters function for HTML leaderboard
- Add apply_drilldown_filters function for data table
- Add apply_sidebar_filters to sync sidebar filters across all tabs
- Wire sidebar model_filter to update leaderboard, drilldown, trends, and compare
- Wire sidebar agent_type_filter to apply globally
- Add info text to all filter components for better UX
- Fix provider filter choices population
- Remove duplicate filter handler code

Files changed (1) hide show
  1. app.py +193 -75
app.py CHANGED
@@ -420,8 +420,9 @@ def load_leaderboard():
420
 
421
  # Get filter choices
422
  models = ["All Models"] + sorted(df['model'].unique().tolist())
 
423
 
424
- return html, gr.update(choices=models), gr.update(choices=models)
425
 
426
 
427
  def refresh_leaderboard():
@@ -439,25 +440,111 @@ def refresh_leaderboard():
439
  return html, gr.update(choices=models), gr.update(choices=models)
440
 
441
 
442
- def apply_filters(model, provider, sort_by_col):
443
- """Apply filters and sorting to leaderboard"""
444
- global leaderboard_df_cache
445
 
446
  df = leaderboard_df_cache.copy() if leaderboard_df_cache is not None else data_loader.load_leaderboard()
447
 
448
- # Apply filters
449
- if model != "All Models":
450
- df = df[df['model'] == model]
 
 
 
 
 
 
 
451
  if provider != "All":
452
  df = df[df['provider'] == provider]
453
 
454
  # Sort
455
- df = df.sort_values(by=sort_by_col, ascending=False)
 
456
 
457
- html = generate_leaderboard_html(df, sort_by_col)
458
  return html
459
 
460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  def load_drilldown(agent_type, provider):
462
  """Load drilldown data with filters"""
463
  global current_drilldown_df
@@ -938,20 +1025,20 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
938
  gr.Markdown("---")
939
 
940
  # Filters section
941
- gr.Markdown("### πŸ” Global Filters")
942
-
943
- sidebar_model_filter = gr.Dropdown(
944
  choices=["All Models"],
945
  value="All Models",
946
  label="Model",
947
- info="Filter evaluations by AI model"
948
  )
949
-
950
  sidebar_agent_type_filter = gr.Radio(
951
  choices=["All", "tool", "code", "both"],
952
  value="All",
953
  label="Agent Type",
954
- info="Tool: Function calling | Code: Code execution | Both: Hybrid"
955
  )
956
 
957
  # Main content area
@@ -963,47 +1050,83 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
963
  gr.Markdown("## πŸ† Agent Evaluation Leaderboard")
964
  with gr.Tabs():
965
  with gr.TabItem("πŸ† Leaderboard"):
966
- # Filters
 
 
967
  with gr.Row():
968
- model_filter = gr.Dropdown(
969
- choices=["All Models"],
970
- value="All Models",
971
- label="Filter by Model"
972
- )
973
- provider_filter = gr.Dropdown(
974
- choices=["All", "litellm", "transformers"],
975
- value="All",
976
- label="Provider"
977
- )
978
- sort_by = gr.Dropdown(
979
- choices=["success_rate", "total_cost_usd", "avg_duration_ms"],
980
- value="success_rate",
981
- label="Sort By"
982
- )
983
-
984
- apply_filters_btn = gr.Button("πŸ” Apply Filters")
985
-
986
- # HTML table
987
- leaderboard_by_model = gr.HTML()
 
 
 
 
 
 
 
 
 
 
 
 
988
 
989
  with gr.TabItem("πŸ“‹ DrillDown"):
 
 
 
990
  with gr.Row():
991
- drilldown_agent_type = gr.Radio(
992
- choices=["All", "tool", "code", "both"],
993
- value="All",
994
- label="Agent Type"
995
- )
996
- drilldown_provider = gr.Dropdown(
997
- choices=["All", "litellm", "transformers"],
998
- value="All",
999
- label="Provider"
1000
- )
1001
-
1002
- apply_drilldown_btn = gr.Button("πŸ” Apply")
1003
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1004
  leaderboard_table = gr.Dataframe(
1005
- headers=["Run ID", "Model", "Agent Type", "Provider", "Success Rate", "Tests", "Duration", "Cost"],
1006
- interactive=False
 
1007
  )
1008
 
1009
  with gr.TabItem("πŸ“ˆ Trends"):
@@ -1212,7 +1335,7 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1212
 
1213
  app.load(
1214
  fn=load_leaderboard,
1215
- outputs=[leaderboard_by_model, model_filter, sidebar_model_filter]
1216
  )
1217
 
1218
  app.load(
@@ -1223,49 +1346,44 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
1223
  # Load drilldown data on page load
1224
  app.load(
1225
  fn=load_drilldown,
1226
- inputs=[drilldown_agent_type, drilldown_provider],
1227
  outputs=[leaderboard_table]
1228
  )
1229
 
1230
  # Refresh button handler
1231
  refresh_leaderboard_btn.click(
1232
  fn=refresh_leaderboard,
1233
- outputs=[leaderboard_by_model, model_filter, sidebar_model_filter]
1234
  )
1235
 
 
1236
  apply_filters_btn.click(
1237
- fn=apply_filters,
1238
- inputs=[model_filter, provider_filter, sort_by],
1239
  outputs=[leaderboard_by_model]
1240
  )
1241
 
1242
- apply_drilldown_btn.click(
1243
- fn=load_drilldown,
1244
- inputs=[drilldown_agent_type, drilldown_provider],
 
1245
  outputs=[leaderboard_table]
1246
  )
1247
 
1248
- # Sidebar filter handlers
1249
- def apply_sidebar_model_filter(model, sort_by_col):
1250
- """Apply sidebar model filter to leaderboard"""
1251
- return apply_filters(model, "All", sort_by_col), gr.update(value=model)
1252
-
1253
- sidebar_model_filter.change(
1254
- fn=apply_sidebar_model_filter,
1255
- inputs=[sidebar_model_filter, sort_by],
1256
- outputs=[leaderboard_by_model, model_filter]
1257
  )
1258
 
1259
- def apply_sidebar_agent_type_filter(agent_type):
1260
- """Apply sidebar agent type filter to drilldown"""
1261
- return load_drilldown(agent_type, "All"), gr.update(value=agent_type)
1262
-
1263
  sidebar_agent_type_filter.change(
1264
- fn=apply_sidebar_agent_type_filter,
1265
- inputs=[sidebar_agent_type_filter],
1266
- outputs=[leaderboard_table, drilldown_agent_type]
1267
  )
1268
 
 
1269
  viz_type.change(
1270
  fn=update_analytics,
1271
  inputs=[viz_type],
 
420
 
421
  # Get filter choices
422
  models = ["All Models"] + sorted(df['model'].unique().tolist())
423
+ providers = ["All"] + sorted(df['provider'].unique().tolist())
424
 
425
+ return html, gr.update(choices=models), gr.update(choices=models), gr.update(choices=providers)
426
 
427
 
428
  def refresh_leaderboard():
 
440
  return html, gr.update(choices=models), gr.update(choices=models)
441
 
442
 
443
+ def apply_leaderboard_filters(agent_type, provider, sort_by_col, sort_order):
444
+ """Apply filters and sorting to styled HTML leaderboard"""
445
+ global leaderboard_df_cache, model_filter
446
 
447
  df = leaderboard_df_cache.copy() if leaderboard_df_cache is not None else data_loader.load_leaderboard()
448
 
449
+ # Apply model filter from sidebar
450
+ selected_model = model_filter.value if hasattr(model_filter, 'value') else "All Models"
451
+ if selected_model != "All Models":
452
+ df = df[df['model'] == selected_model]
453
+
454
+ # Apply agent type filter
455
+ if agent_type != "All":
456
+ df = df[df['agent_type'] == agent_type]
457
+
458
+ # Apply provider filter
459
  if provider != "All":
460
  df = df[df['provider'] == provider]
461
 
462
  # Sort
463
+ ascending = (sort_order == "Ascending")
464
+ df = df.sort_values(by=sort_by_col, ascending=ascending)
465
 
466
+ html = generate_leaderboard_html(df, sort_by_col, ascending)
467
  return html
468
 
469
 
470
+ def apply_drilldown_filters(agent_type, provider, sort_by_col, sort_order):
471
+ """Apply filters and sorting to drilldown table"""
472
+ global leaderboard_df_cache
473
+
474
+ df = leaderboard_df_cache.copy() if leaderboard_df_cache is not None else data_loader.load_leaderboard()
475
+
476
+ # Apply model filter from sidebar
477
+ selected_model = model_filter.value if hasattr(model_filter, 'value') else "All Models"
478
+ if selected_model != "All Models":
479
+ df = df[df['model'] == selected_model]
480
+
481
+ # Apply agent type filter
482
+ if agent_type != "All":
483
+ df = df[df['agent_type'] == agent_type]
484
+
485
+ # Apply provider filter
486
+ if provider != "All":
487
+ df = df[df['provider'] == provider]
488
+
489
+ # Sort
490
+ ascending = (sort_order == "Ascending")
491
+ df = df.sort_values(by=sort_by_col, ascending=ascending).reset_index(drop=True)
492
+
493
+ # Prepare simplified dataframe for display
494
+ display_df = df[[
495
+ 'run_id', 'model', 'agent_type', 'provider', 'success_rate',
496
+ 'total_tests', 'avg_duration_ms', 'total_cost_usd', 'submitted_by'
497
+ ]].copy()
498
+ display_df.columns = ['Run ID', 'Model', 'Agent Type', 'Provider', 'Success Rate', 'Tests', 'Duration (ms)', 'Cost (USD)', 'Submitted By']
499
+
500
+ return gr.update(value=display_df)
501
+
502
+
503
+ def apply_sidebar_filters(selected_model, selected_agent_type):
504
+ """Apply sidebar filters to both leaderboard tabs"""
505
+ global leaderboard_df_cache
506
+
507
+ df = leaderboard_df_cache.copy() if leaderboard_df_cache is not None else data_loader.load_leaderboard()
508
+
509
+ # Apply model filter
510
+ if selected_model != "All Models":
511
+ df = df[df['model'] == selected_model]
512
+
513
+ # Apply agent type filter
514
+ if selected_agent_type != "All":
515
+ df = df[df['agent_type'] == selected_agent_type]
516
+
517
+ # For HTML leaderboard
518
+ sorted_df = df.sort_values(by='success_rate', ascending=False).reset_index(drop=True)
519
+ html = generate_leaderboard_html(sorted_df, 'success_rate', False)
520
+
521
+ # For drilldown table
522
+ display_df = df[[
523
+ 'run_id', 'model', 'agent_type', 'provider', 'success_rate',
524
+ 'total_tests', 'avg_duration_ms', 'total_cost_usd', 'submitted_by'
525
+ ]].copy()
526
+ display_df.columns = ['Run ID', 'Model', 'Agent Type', 'Provider', 'Success Rate', 'Tests', 'Duration (ms)', 'Cost (USD)', 'Submitted By']
527
+
528
+ # Update trends
529
+ trends_fig = create_trends_plot(df)
530
+
531
+ # Update compare dropdowns
532
+ compare_choices = []
533
+ for _, row in df.iterrows():
534
+ label = f"{row.get('model', 'Unknown')} - {row.get('timestamp', 'N/A')}"
535
+ value = row.get('run_id', '')
536
+ if value:
537
+ compare_choices.append((label, value))
538
+
539
+ return {
540
+ leaderboard_by_model: gr.update(value=html),
541
+ leaderboard_table: gr.update(value=display_df),
542
+ trends_plot: gr.update(value=trends_fig),
543
+ compare_components['compare_run_a_dropdown']: gr.update(choices=compare_choices),
544
+ compare_components['compare_run_b_dropdown']: gr.update(choices=compare_choices)
545
+ }
546
+
547
+
548
  def load_drilldown(agent_type, provider):
549
  """Load drilldown data with filters"""
550
  global current_drilldown_df
 
1025
  gr.Markdown("---")
1026
 
1027
  # Filters section
1028
+ gr.Markdown("### πŸ” Filters")
1029
+
1030
+ model_filter = gr.Dropdown(
1031
  choices=["All Models"],
1032
  value="All Models",
1033
  label="Model",
1034
+ info="Filter evaluations by AI model. Select 'All Models' to see all runs."
1035
  )
1036
+
1037
  sidebar_agent_type_filter = gr.Radio(
1038
  choices=["All", "tool", "code", "both"],
1039
  value="All",
1040
  label="Agent Type",
1041
+ info="Tool: Function calling agents | Code: Code execution | Both: Hybrid agents"
1042
  )
1043
 
1044
  # Main content area
 
1050
  gr.Markdown("## πŸ† Agent Evaluation Leaderboard")
1051
  with gr.Tabs():
1052
  with gr.TabItem("πŸ† Leaderboard"):
1053
+ gr.Markdown("*Styled leaderboard with inline filters*")
1054
+
1055
+ # Inline filters for styled leaderboard
1056
  with gr.Row():
1057
+ with gr.Column(scale=1):
1058
+ agent_type_filter = gr.Radio(
1059
+ choices=["All", "tool", "code", "both"],
1060
+ value="All",
1061
+ label="Agent Type",
1062
+ info="Filter by agent type"
1063
+ )
1064
+ with gr.Column(scale=1):
1065
+ provider_filter = gr.Dropdown(
1066
+ choices=["All"],
1067
+ value="All",
1068
+ label="Provider",
1069
+ info="Filter by provider"
1070
+ )
1071
+ with gr.Column(scale=1):
1072
+ sort_by_dropdown = gr.Dropdown(
1073
+ choices=["success_rate", "total_cost_usd", "avg_duration_ms", "total_tokens"],
1074
+ value="success_rate",
1075
+ label="Sort By"
1076
+ )
1077
+ with gr.Column(scale=1):
1078
+ sort_order = gr.Radio(
1079
+ choices=["Descending", "Ascending"],
1080
+ value="Descending",
1081
+ label="Sort Order"
1082
+ )
1083
+
1084
+ with gr.Row():
1085
+ apply_filters_btn = gr.Button("πŸ” Apply Filters", variant="primary", size="sm")
1086
+
1087
+ # Styled HTML leaderboard
1088
+ leaderboard_by_model = gr.HTML(label="Styled Leaderboard")
1089
 
1090
  with gr.TabItem("πŸ“‹ DrillDown"):
1091
+ gr.Markdown("*Click any row to view detailed run information*")
1092
+
1093
+ # Inline filters for drilldown table
1094
  with gr.Row():
1095
+ with gr.Column(scale=1):
1096
+ drilldown_agent_type_filter = gr.Radio(
1097
+ choices=["All", "tool", "code", "both"],
1098
+ value="All",
1099
+ label="Agent Type",
1100
+ info="Filter by agent type"
1101
+ )
1102
+ with gr.Column(scale=1):
1103
+ drilldown_provider_filter = gr.Dropdown(
1104
+ choices=["All"],
1105
+ value="All",
1106
+ label="Provider",
1107
+ info="Filter by provider"
1108
+ )
1109
+ with gr.Column(scale=1):
1110
+ drilldown_sort_by_dropdown = gr.Dropdown(
1111
+ choices=["success_rate", "total_cost_usd", "avg_duration_ms", "total_tokens"],
1112
+ value="success_rate",
1113
+ label="Sort By"
1114
+ )
1115
+ with gr.Column(scale=1):
1116
+ drilldown_sort_order = gr.Radio(
1117
+ choices=["Descending", "Ascending"],
1118
+ value="Descending",
1119
+ label="Sort Order"
1120
+ )
1121
+
1122
+ with gr.Row():
1123
+ apply_drilldown_filters_btn = gr.Button("πŸ” Apply Filters", variant="primary", size="sm")
1124
+
1125
+ # Simple table controlled by inline filters
1126
  leaderboard_table = gr.Dataframe(
1127
+ headers=["Run ID", "Model", "Agent Type", "Provider", "Success Rate", "Tests", "Duration (ms)", "Cost (USD)", "Submitted By"],
1128
+ interactive=False,
1129
+ wrap=True
1130
  )
1131
 
1132
  with gr.TabItem("πŸ“ˆ Trends"):
 
1335
 
1336
  app.load(
1337
  fn=load_leaderboard,
1338
+ outputs=[leaderboard_by_model, model_filter, model_filter, provider_filter]
1339
  )
1340
 
1341
  app.load(
 
1346
  # Load drilldown data on page load
1347
  app.load(
1348
  fn=load_drilldown,
1349
+ inputs=[drilldown_agent_type_filter, drilldown_provider_filter],
1350
  outputs=[leaderboard_table]
1351
  )
1352
 
1353
  # Refresh button handler
1354
  refresh_leaderboard_btn.click(
1355
  fn=refresh_leaderboard,
1356
+ outputs=[leaderboard_by_model, model_filter, model_filter]
1357
  )
1358
 
1359
+ # Leaderboard tab inline filters
1360
  apply_filters_btn.click(
1361
+ fn=apply_leaderboard_filters,
1362
+ inputs=[agent_type_filter, provider_filter, sort_by_dropdown, sort_order],
1363
  outputs=[leaderboard_by_model]
1364
  )
1365
 
1366
+ # DrillDown tab inline filters
1367
+ apply_drilldown_filters_btn.click(
1368
+ fn=apply_drilldown_filters,
1369
+ inputs=[drilldown_agent_type_filter, drilldown_provider_filter, drilldown_sort_by_dropdown, drilldown_sort_order],
1370
  outputs=[leaderboard_table]
1371
  )
1372
 
1373
+ # Sidebar filters (apply to all tabs)
1374
+ model_filter.change(
1375
+ fn=apply_sidebar_filters,
1376
+ inputs=[model_filter, sidebar_agent_type_filter],
1377
+ outputs=[leaderboard_by_model, leaderboard_table, trends_plot, compare_components['compare_run_a_dropdown'], compare_components['compare_run_b_dropdown']]
 
 
 
 
1378
  )
1379
 
 
 
 
 
1380
  sidebar_agent_type_filter.change(
1381
+ fn=apply_sidebar_filters,
1382
+ inputs=[model_filter, sidebar_agent_type_filter],
1383
+ outputs=[leaderboard_by_model, leaderboard_table, trends_plot, compare_components['compare_run_a_dropdown'], compare_components['compare_run_b_dropdown']]
1384
  )
1385
 
1386
+
1387
  viz_type.change(
1388
  fn=update_analytics,
1389
  inputs=[viz_type],