Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ def make_default_md(arena_df, elo_results):
|
|
| 18 |
|
| 19 |
leaderboard_md = f"""
|
| 20 |
# NeurIPS LLM Merging Competition Leaderboard
|
| 21 |
-
[Website]() | [
|
| 22 |
|
| 23 |
"""
|
| 24 |
return leaderboard_md
|
|
@@ -223,7 +223,6 @@ def recompute_final_ranking(arena_df):
|
|
| 223 |
|
| 224 |
def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
|
| 225 |
arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
|
| 226 |
-
arena_df = arena_df[arena_df["num_battles"] > 2000]
|
| 227 |
arena_df["final_ranking"] = recompute_final_ranking(arena_df)
|
| 228 |
arena_df = arena_df.sort_values(by=["final_ranking"], ascending=True)
|
| 229 |
|
|
@@ -234,7 +233,6 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
|
|
| 234 |
arena_subset_df = arena_subset_df[arena_subset_df.index.isin(arena_df.index)]
|
| 235 |
arena_subset_df = arena_subset_df.sort_values(by=["rating"], ascending=False)
|
| 236 |
# arena_subset_df = arena_subset_df.sort_values(by=["final_ranking"], ascending=True)
|
| 237 |
-
# arena_subset_df = arena_subset_df[arena_subset_df["num_battles"] > 500]
|
| 238 |
arena_subset_df["final_ranking"] = recompute_final_ranking(arena_subset_df)
|
| 239 |
# keep only the models in the subset in arena_df and recompute final_ranking
|
| 240 |
arena_df = arena_df[arena_df.index.isin(arena_subset_df.index)]
|
|
@@ -248,10 +246,6 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
|
|
| 248 |
arena_df = arena_subset_df.join(arena_df["final_ranking"], rsuffix="_global", how="inner")
|
| 249 |
arena_df["ranking_difference"] = arena_df["final_ranking_global"] - arena_df["final_ranking"]
|
| 250 |
|
| 251 |
-
# no tie version
|
| 252 |
-
# arena_df = arena_subset_df.join(arena_df["final_ranking_no_tie"], rsuffix="_global", how="inner")
|
| 253 |
-
# arena_df["ranking_difference"] = arena_df["final_ranking_no_tie_global"] - arena_df["final_ranking_no_tie"]
|
| 254 |
-
|
| 255 |
arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
|
| 256 |
arena_df["final_ranking"] = arena_df.apply(lambda x: create_ranking_str(x["final_ranking"], x["ranking_difference"]), axis=1)
|
| 257 |
|
|
@@ -272,15 +266,6 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
|
|
| 272 |
row.append(model_name)
|
| 273 |
# elo rating
|
| 274 |
row.append(round(arena_df.iloc[i]["rating"]))
|
| 275 |
-
upper_diff = round(
|
| 276 |
-
arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"]
|
| 277 |
-
)
|
| 278 |
-
lower_diff = round(
|
| 279 |
-
arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"]
|
| 280 |
-
)
|
| 281 |
-
row.append(f"+{upper_diff}/-{lower_diff}")
|
| 282 |
-
# num battles
|
| 283 |
-
row.append(round(arena_df.iloc[i]["num_battles"]))
|
| 284 |
# Organization
|
| 285 |
row.append(
|
| 286 |
model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
|
|
@@ -289,11 +274,6 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
|
|
| 289 |
row.append(
|
| 290 |
model_table_df[model_table_df["key"] == model_key]["License"].values[0]
|
| 291 |
)
|
| 292 |
-
cutoff_date = model_table_df[model_table_df["key"] == model_key]["Knowledge cutoff date"].values[0]
|
| 293 |
-
if cutoff_date == "-":
|
| 294 |
-
row.append("Unknown")
|
| 295 |
-
else:
|
| 296 |
-
row.append(cutoff_date)
|
| 297 |
values.append(row)
|
| 298 |
except Exception as e:
|
| 299 |
print(f"{model_key} - {e}")
|
|
@@ -301,23 +281,9 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
|
|
| 301 |
|
| 302 |
key_to_category_name = {
|
| 303 |
"full": "Overall",
|
| 304 |
-
"coding": "Coding",
|
| 305 |
-
"long_user": "Longer Query",
|
| 306 |
-
"english": "English",
|
| 307 |
-
"chinese": "Chinese",
|
| 308 |
-
"french": "French",
|
| 309 |
-
"no_tie": "Exclude Ties",
|
| 310 |
-
"no_short": "Exclude Short",
|
| 311 |
}
|
| 312 |
cat_name_to_explanation = {
|
| 313 |
"Overall": "Overall Questions",
|
| 314 |
-
"Coding": "Coding: whether conversation contains code snippets",
|
| 315 |
-
"Longer Query": "Longer Query (>= 500 tokens)",
|
| 316 |
-
"English": "English Prompts",
|
| 317 |
-
"Chinese": "Chinese Prompts",
|
| 318 |
-
"French": "French Prompts",
|
| 319 |
-
"Exclude Ties": "Exclude Ties and Bothbad",
|
| 320 |
-
"Exclude Short": "User Query >= 5 tokens",
|
| 321 |
}
|
| 322 |
|
| 323 |
def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
|
|
@@ -364,7 +330,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 364 |
"⭐ Arena Elo",
|
| 365 |
"Organization",
|
| 366 |
"License",
|
| 367 |
-
"Knowledge Cutoff",
|
| 368 |
],
|
| 369 |
datatype=[
|
| 370 |
"number",
|
|
@@ -372,7 +337,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 372 |
"number",
|
| 373 |
"str",
|
| 374 |
"str",
|
| 375 |
-
"str",
|
| 376 |
],
|
| 377 |
value=arena_table_vals,
|
| 378 |
elem_id="arena_leaderboard_dataframe",
|
|
@@ -419,7 +383,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 419 |
pass
|
| 420 |
|
| 421 |
def update_leaderboard_df(arena_table_vals):
|
| 422 |
-
elo_datarame = pd.DataFrame(arena_table_vals, columns=[ "Rank", "🤖 Model", "⭐ Arena Elo", "Organization", "License"
|
| 423 |
|
| 424 |
# goal: color the rows based on the rank with styler
|
| 425 |
def highlight_max(s):
|
|
@@ -446,7 +410,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 446 |
"⭐ Arena Elo",
|
| 447 |
"Organization",
|
| 448 |
"License",
|
| 449 |
-
"Knowledge Cutoff",
|
| 450 |
],
|
| 451 |
datatype=[
|
| 452 |
"number",
|
|
@@ -454,7 +417,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 454 |
"number",
|
| 455 |
"str",
|
| 456 |
"str",
|
| 457 |
-
"str",
|
| 458 |
],
|
| 459 |
value=arena_values,
|
| 460 |
elem_id="arena_leaderboard_dataframe",
|
|
@@ -470,7 +432,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 470 |
"⭐ Arena Elo",
|
| 471 |
"Organization",
|
| 472 |
"License",
|
| 473 |
-
"Knowledge Cutoff",
|
| 474 |
],
|
| 475 |
datatype=[
|
| 476 |
"number",
|
|
@@ -478,7 +439,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 478 |
"number",
|
| 479 |
"str",
|
| 480 |
"str",
|
| 481 |
-
"str",
|
| 482 |
],
|
| 483 |
value=arena_values,
|
| 484 |
elem_id="arena_leaderboard_dataframe",
|
|
|
|
| 18 |
|
| 19 |
leaderboard_md = f"""
|
| 20 |
# NeurIPS LLM Merging Competition Leaderboard
|
| 21 |
+
[Website](https://llm-merging.github.io/index) | [Starter Kit (Github)]() | [Discord](https://discord.com/invite/dPBHEVnV) |
|
| 22 |
|
| 23 |
"""
|
| 24 |
return leaderboard_md
|
|
|
|
| 223 |
|
| 224 |
def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
|
| 225 |
arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
|
|
|
|
| 226 |
arena_df["final_ranking"] = recompute_final_ranking(arena_df)
|
| 227 |
arena_df = arena_df.sort_values(by=["final_ranking"], ascending=True)
|
| 228 |
|
|
|
|
| 233 |
arena_subset_df = arena_subset_df[arena_subset_df.index.isin(arena_df.index)]
|
| 234 |
arena_subset_df = arena_subset_df.sort_values(by=["rating"], ascending=False)
|
| 235 |
# arena_subset_df = arena_subset_df.sort_values(by=["final_ranking"], ascending=True)
|
|
|
|
| 236 |
arena_subset_df["final_ranking"] = recompute_final_ranking(arena_subset_df)
|
| 237 |
# keep only the models in the subset in arena_df and recompute final_ranking
|
| 238 |
arena_df = arena_df[arena_df.index.isin(arena_subset_df.index)]
|
|
|
|
| 246 |
arena_df = arena_subset_df.join(arena_df["final_ranking"], rsuffix="_global", how="inner")
|
| 247 |
arena_df["ranking_difference"] = arena_df["final_ranking_global"] - arena_df["final_ranking"]
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
|
| 250 |
arena_df["final_ranking"] = arena_df.apply(lambda x: create_ranking_str(x["final_ranking"], x["ranking_difference"]), axis=1)
|
| 251 |
|
|
|
|
| 266 |
row.append(model_name)
|
| 267 |
# elo rating
|
| 268 |
row.append(round(arena_df.iloc[i]["rating"]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
# Organization
|
| 270 |
row.append(
|
| 271 |
model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
|
|
|
|
| 274 |
row.append(
|
| 275 |
model_table_df[model_table_df["key"] == model_key]["License"].values[0]
|
| 276 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
values.append(row)
|
| 278 |
except Exception as e:
|
| 279 |
print(f"{model_key} - {e}")
|
|
|
|
| 281 |
|
| 282 |
key_to_category_name = {
|
| 283 |
"full": "Overall",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
}
|
| 285 |
cat_name_to_explanation = {
|
| 286 |
"Overall": "Overall Questions",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
}
|
| 288 |
|
| 289 |
def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
|
|
|
|
| 330 |
"⭐ Arena Elo",
|
| 331 |
"Organization",
|
| 332 |
"License",
|
|
|
|
| 333 |
],
|
| 334 |
datatype=[
|
| 335 |
"number",
|
|
|
|
| 337 |
"number",
|
| 338 |
"str",
|
| 339 |
"str",
|
|
|
|
| 340 |
],
|
| 341 |
value=arena_table_vals,
|
| 342 |
elem_id="arena_leaderboard_dataframe",
|
|
|
|
| 383 |
pass
|
| 384 |
|
| 385 |
def update_leaderboard_df(arena_table_vals):
|
| 386 |
+
elo_datarame = pd.DataFrame(arena_table_vals, columns=[ "Rank", "🤖 Model", "⭐ Arena Elo", "Organization", "License"])
|
| 387 |
|
| 388 |
# goal: color the rows based on the rank with styler
|
| 389 |
def highlight_max(s):
|
|
|
|
| 410 |
"⭐ Arena Elo",
|
| 411 |
"Organization",
|
| 412 |
"License",
|
|
|
|
| 413 |
],
|
| 414 |
datatype=[
|
| 415 |
"number",
|
|
|
|
| 417 |
"number",
|
| 418 |
"str",
|
| 419 |
"str",
|
|
|
|
| 420 |
],
|
| 421 |
value=arena_values,
|
| 422 |
elem_id="arena_leaderboard_dataframe",
|
|
|
|
| 432 |
"⭐ Arena Elo",
|
| 433 |
"Organization",
|
| 434 |
"License",
|
|
|
|
| 435 |
],
|
| 436 |
datatype=[
|
| 437 |
"number",
|
|
|
|
| 439 |
"number",
|
| 440 |
"str",
|
| 441 |
"str",
|
|
|
|
| 442 |
],
|
| 443 |
value=arena_values,
|
| 444 |
elem_id="arena_leaderboard_dataframe",
|