amd-leaderboard / app.py
Tonic's picture
Solves 500 error for some users
c66819e verified
raw
history blame
6.37 kB
import gradio as gr
import pandas as pd
from datetime import datetime
import os
from datasets import load_dataset, Dataset
# Configuration
DATASET_ID = "siro1/amd-hackathon"
HF_TOKEN = os.environ.get("HF_TOKEN") # Optional: for write access
dataframe: pd.DataFrame = None
def load_dataframe(dataset=None) -> pd.DataFrame:
global dataframe
if dataset is None:
# Force download of latest data by disabling cache
dataset = load_dataset(
DATASET_ID, split="train", download_mode="force_redownload"
)
print(f"Loaded dataset: {len(dataset)}")
results = dataset.map(
lambda item: {
"Team": item["team"],
"Timestamp": item["timestamp"],
"TTFT (ms)": item["ttft"],
"TPOT (ms)": item["tpot"],
"ITL (ms)": item["itl"],
"E2E Latency (ms)": item["e2e"],
"Throughput (tokens/s)": item["throughput"],
"Bits per Byte": item["bits_per_byte"],
"Byte Perplexity": item["byte_perplexity"],
"Word Perplexity": item["word_perplexity"],
},
batch_size=64,
remove_columns=dataset.column_names,
)
df = results.to_pandas()
df = df.sort_values("Throughput (tokens/s)", ascending=False)
dataframe = df
return df
def update_data(
team_name,
ttft,
tpot,
itl,
e2e,
throughput,
bits_per_byte,
byte_perplexity,
word_perplexity,
):
global dataframe
"""Insert a new row into the Hugging Face dataset"""
existing_data = dataframe.to_dict(orient="records")
print(f"Current data length: {len(existing_data)}")
new_entry = {
"Team": team_name,
"Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"TTFT (ms)": float(ttft),
"TPOT (ms)": float(tpot),
"ITL (ms)": float(itl),
"E2E Latency (ms)": float(e2e),
"Throughput (tokens/s)": float(throughput),
"Bits per Byte": float(bits_per_byte),
"Byte Perplexity": float(byte_perplexity),
"Word Perplexity": float(word_perplexity),
}
existing_data.append(new_entry)
df = pd.DataFrame(existing_data)
df = df.sort_values("Throughput (tokens/s)", ascending=False)
dataframe = df
print(f"New data length: {len(dataframe)}")
return df
def api_submit_results(
team_name: str,
ttft: float,
tpot: float,
itl: float,
e2e: float,
throughput: float,
bits_per_byte: float,
byte_perplexity: float,
word_perplexity: float,
) -> str:
try:
# Update the dataset with new submission
new_data = update_data(
team_name=team_name,
ttft=ttft,
tpot=tpot,
itl=itl,
e2e=e2e,
throughput=throughput,
bits_per_byte=bits_per_byte,
byte_perplexity=byte_perplexity,
word_perplexity=word_perplexity,
)
refresh_leaderboard(push_to_hub=False)
return ["Success", new_data]
except Exception as e:
return ["Failed: " + str(e), None]
def refresh_leaderboard(push_to_hub: bool = True):
global dataframe
dataset = Dataset.from_pandas(dataframe)
dataset = dataset.map(
lambda item: {
"team": item["Team"],
"timestamp": item["Timestamp"],
"ttft": item["TTFT (ms)"],
"tpot": item["TPOT (ms)"],
"itl": item["ITL (ms)"],
"e2e": item["E2E Latency (ms)"],
"throughput": item["Throughput (tokens/s)"],
"bits_per_byte": item["Bits per Byte"],
"byte_perplexity": item["Byte Perplexity"],
"word_perplexity": item["Word Perplexity"],
},
remove_columns=dataset.column_names,
)
if push_to_hub:
dataset.push_to_hub(DATASET_ID, token=HF_TOKEN)
def get_leaderboard():
global dataframe
print(f"Getting leaderboard: {len(dataframe)}")
return dataframe
# Create Gradio interface
def create_interface():
global dataframe
with gr.Blocks(title="AMD vLLM Benchmark Leaderboard") as demo:
gr.Markdown("# AMD vLLM Benchmark Leaderboard")
gr.Markdown(
"Track and compare performance and accuracy metrics for vLLM benchmarks"
)
with gr.Tab("Leaderboard"):
# Initial load
leaderboard_table = gr.DataFrame(
value=load_dataframe(),
label="Benchmark Results",
interactive=False,
)
with gr.Column(visible=False):
team_input = gr.Textbox()
ttft_input = gr.Number()
tpot_input = gr.Number()
itl_input = gr.Number()
e2e_input = gr.Number()
throughput_input = gr.Number()
bits_input = gr.Number()
byte_perp_input = gr.Number()
word_perp_input = gr.Number()
submit_output = gr.Textbox()
submit_btn = gr.Button("Submit")
submit_btn.click(
fn=api_submit_results,
inputs=[
team_input,
ttft_input,
tpot_input,
itl_input,
e2e_input,
throughput_input,
bits_input,
byte_perp_input,
word_perp_input,
],
outputs=[submit_output, leaderboard_table],
api_name="submit_results",
concurrency_limit=10,
show_progress="full",
)
refresh_btn = gr.Button("Refresh Leaderboard")
refresh_btn.click(
fn=refresh_leaderboard,
)
# Auto-refresh every 30 seconds
timer = gr.Timer(15) # 30 seconds interval
timer.tick(
fn=refresh_leaderboard,
)
data_timer = gr.Timer(15)
data_timer.tick(
fn=get_leaderboard,
outputs=leaderboard_table,
)
return demo
# Create and launch the app
if __name__ == "__main__":
demo = create_interface()
demo.queue(max_size=100)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
ssr_mode=False
)