Clémentine commited on
Commit
6e44082
·
1 Parent(s): 31c57c2

vc running 4 runs

Browse files
Files changed (2) hide show
  1. app.py +12 -12
  2. globals.py +2 -1
app.py CHANGED
@@ -4,7 +4,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
4
  import threading
5
  import globals
6
  from utils.io import initialize_models_providers_file, save_results, load_results, load_models_providers, get_results_table, load_models_providers_str, get_summary_stats
7
- from utils.jobs import run_single_job, launch_jobs, update_job_statuses, relaunch_failed_jobs
8
  from typing import List, Optional
9
 
10
 
@@ -65,24 +65,24 @@ def create_app() -> gr.Blocks:
65
  show_copy_button=True,
66
  show_fullscreen_button=True,
67
  wrap=True,
68
- static_columns=list(range(9)),
69
- datatype=["str", "str", "str", "str", "str", "str", "str", "str", "html", "str"],
70
  elem_id="results_table"
71
  )
72
 
73
 
74
  # Event handlers
75
  def launch_single_and_update(model: str, provider: str):
76
- """Launch a single job and return updated table and stats."""
77
  if not model or not provider:
78
  return "❌ Please provide both model and provider", get_results_table(), get_summary_stats()
79
 
80
- job_id = run_single_job(model, provider, globals.TASKS)
81
- if job_id == -1:
82
- return "❌ Failed to launch job (may already be running)", get_results_table(), get_summary_stats()
83
 
84
  save_results()
85
- return f"✅ Launched job for {model} on {provider} (ID: {job_id})", get_results_table(), get_summary_stats()
86
 
87
  launch_single_btn.click(
88
  fn=launch_single_and_update,
@@ -116,17 +116,17 @@ def create_app() -> gr.Blocks:
116
  print(f"[Relaunch] Cell selected - Row: {evt.index[0]}, Col: {evt.index[1]}, Value: {evt.value}")
117
 
118
  # If we selected a "rerun" cell, we relaunch a job
119
- if evt.index[1] == 9:
120
  # Get the full row data from the dataframe
121
  df = get_results_table()
122
  row_data = df.data.iloc[evt.index[0]]
123
 
124
  model = row_data['Model']
125
  provider = row_data['Provider']
126
- print(f"[Relaunch] Relaunching job - Model: {model}, Provider: {provider}")
127
 
128
- run_single_job(model, provider, globals.TASKS)
129
- # Save after individual relaunch
130
  save_results()
131
 
132
  # Then update the table and stats
 
4
  import threading
5
  import globals
6
  from utils.io import initialize_models_providers_file, save_results, load_results, load_models_providers, get_results_table, load_models_providers_str, get_summary_stats
7
+ from utils.jobs import run_single_job, run_multiple_jobs, launch_jobs, update_job_statuses, relaunch_failed_jobs
8
  from typing import List, Optional
9
 
10
 
 
65
  show_copy_button=True,
66
  show_fullscreen_button=True,
67
  wrap=True,
68
+ static_columns=list(range(11)),
69
+ datatype=["str", "str", "str", "str", "str", "str", "str", "str", "str", "str", "html", "str"],
70
  elem_id="results_table"
71
  )
72
 
73
 
74
  # Event handlers
75
  def launch_single_and_update(model: str, provider: str):
76
+ """Launch multiple jobs for a model-provider combination and return updated table and stats."""
77
  if not model or not provider:
78
  return "❌ Please provide both model and provider", get_results_table(), get_summary_stats()
79
 
80
+ job_ids = run_multiple_jobs(model, provider, globals.TASKS)
81
+ if not job_ids:
82
+ return "❌ Failed to launch jobs (may already be running)", get_results_table(), get_summary_stats()
83
 
84
  save_results()
85
+ return f"✅ Launched {len(job_ids)} jobs for {model} on {provider}", get_results_table(), get_summary_stats()
86
 
87
  launch_single_btn.click(
88
  fn=launch_single_and_update,
 
116
  print(f"[Relaunch] Cell selected - Row: {evt.index[0]}, Col: {evt.index[1]}, Value: {evt.value}")
117
 
118
  # If we selected a "rerun" cell, we relaunch a job
119
+ if evt.index[1] == 11:
120
  # Get the full row data from the dataframe
121
  df = get_results_table()
122
  row_data = df.data.iloc[evt.index[0]]
123
 
124
  model = row_data['Model']
125
  provider = row_data['Provider']
126
+ print(f"[Relaunch] Relaunching {globals.NUM_RUNS_PER_JOB} jobs - Model: {model}, Provider: {provider}")
127
 
128
+ run_multiple_jobs(model, provider, globals.TASKS)
129
+ # Save after relaunch
130
  save_results()
131
 
132
  # Then update the table and stats
globals.py CHANGED
@@ -4,7 +4,7 @@ import threading
4
  from typing import Dict, Any, Optional
5
 
6
  # Type definition for job result entries
7
- JobResult = Dict[str, Any] # {model, provider, last_run, status, current_score, previous_score, job_id, start_time, duration, completed_at}
8
 
9
  # Global variables to track jobs
10
  job_results: Dict[str, JobResult] = {} # {model_provider_key: JobResult}
@@ -12,6 +12,7 @@ results_lock: threading.Lock = threading.Lock()
12
 
13
  # Configuration
14
  NUM_MODELS_RUN: int = 100
 
15
  RESULTS_DATASET_NAME: str = "IPTesting/inference-provider-test-results"
16
  LOCAL_CONFIG_FILE: str = "/home/user/app/model_providers.txt"
17
  TASKS: str = "extended|ifeval|0,lighteval|gsm_plus|0,lighteval|gpqa:diamond|0"
 
4
  from typing import Dict, Any, Optional
5
 
6
  # Type definition for job result entries
7
+ JobResult = Dict[str, Any] # {model, provider, last_run, status, current_score, previous_score, job_id, start_time, duration, completed_at, runs: [{job_id, score, status, start_time, duration, completed_at}]}
8
 
9
  # Global variables to track jobs
10
  job_results: Dict[str, JobResult] = {} # {model_provider_key: JobResult}
 
12
 
13
  # Configuration
14
  NUM_MODELS_RUN: int = 100
15
+ NUM_RUNS_PER_JOB: int = 4 # Number of times to run each job for variance reduction
16
  RESULTS_DATASET_NAME: str = "IPTesting/inference-provider-test-results"
17
  LOCAL_CONFIG_FILE: str = "/home/user/app/model_providers.txt"
18
  TASKS: str = "extended|ifeval|0,lighteval|gsm_plus|0,lighteval|gpqa:diamond|0"