amd-leaderboard / src /utils.py
siro1's picture
Initial commit
48bb3eb
# Example result structure for vLLM benchmarks
RESULT_SCHEMA = {
"run_id": {
"model": "model_name",
"timestamp": "YYYY-MM-DD HH:MM:SS",
"config": {
"input_length": 128,
"output_length": 128,
"concurrent_requests": 16
},
"performance": {
"median_ttft_ms": 0.0, # Time To First Token
"median_tpot_ms": 0.0, # Time Per Output Token
"median_itl_ms": 0.0, # Inter-Token Latency
"median_e2el_ms": 0.0, # End-to-End Latency
"total_token_throughput": 0.0
},
"accuracy": {
"wikitext_perplexity": 0.0
}
}
}
# Mock data for testing
MOCK_RESULTS = {
"run_2024_12_01_baseline": {
"model": "amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV",
"timestamp": "2024-12-01 10:30:00",
"config": {
"input_length": 128,
"output_length": 128,
"concurrent_requests": 16
},
"performance": {
"median_ttft_ms": 45.23,
"median_tpot_ms": 12.56,
"median_itl_ms": 11.89,
"median_e2el_ms": 1589.45,
"total_token_throughput": 2048.67
},
"accuracy": {
"wikitext_perplexity": 7.89
}
},
"run_2024_12_02_optimized": {
"model": "amd/Mixtral-8x7B-Instruct-v0.1-FP8-KV",
"timestamp": "2024-12-02 14:15:00",
"config": {
"input_length": 128,
"output_length": 128,
"concurrent_requests": 16
},
"performance": {
"median_ttft_ms": 42.11,
"median_tpot_ms": 11.23,
"median_itl_ms": 10.95,
"median_e2el_ms": 1456.78,
"total_token_throughput": 2234.89
},
"accuracy": {
"wikitext_perplexity": 7.91
}
}
}