Spaces:
Running
Running
| import json | |
| import os | |
| import pandas as pd | |
| from utils import create_hyperlinked_names | |
| def sum_lst(lst): | |
| assert isinstance(lst, list) and lst, f"Input should be a non-empty list, got {type(lst)}, size {len(lst)}" | |
| total = lst[0] | |
| for item in lst[1:]: | |
| assert isinstance(item, (list, int, float)), f"Expected types are list and numbers, got {type(item)}" | |
| total += item | |
| return total | |
| SCORE_BASE_DIR = "Scores" | |
| META_DATA = ["model_name", "model_size", "url"] | |
| DATASETS = { | |
| "image": { | |
| "I-CLS": ['VOC2007', 'N24News', 'SUN397', 'ObjectNet', 'Country211', 'Place365', 'ImageNet-1K', 'HatefulMemes', 'ImageNet-A', 'ImageNet-R'], | |
| "I-QA": ['OK-VQA', 'A-OKVQA', 'DocVQA', 'InfographicsVQA', 'ChartQA', 'Visual7W-Pointing', 'ScienceQA', 'GQA', 'TextVQA', 'VizWiz'], | |
| "I-RET": ['VisDial', 'CIRR', 'VisualNews_t2i', 'VisualNews_i2t', 'MSCOCO_t2i', 'MSCOCO_i2t', 'NIGHTS', 'WebQA', 'FashionIQ', 'Wiki-SS-NQ', 'OVEN', 'EDIS'], | |
| "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W'] | |
| }, | |
| "visdoc": { | |
| "VisDoc": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry', 'VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA', 'ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc'] | |
| }, | |
| "video": { | |
| "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'], | |
| "V-QA": ['Video-MME', 'MVBench', 'NExTQA', 'EgoSchema'], | |
| "V-RET": ['MSR-VTT', 'MSVD', 'DiDeMo', 'VATEX', 'YouCook2'], | |
| "V-MRET": ['QVHighlight', 'Charades-STA', 'MomentSeeker', 'ActivityNetQA'] | |
| } | |
| } | |
| ALL_DATASETS_SPLITS = {k: sum_lst(list(v.values())) for k, v in DATASETS.items()} | |
| ALL_DATASETS = sum_lst(list(ALL_DATASETS_SPLITS.values())) | |
| MODALITIES = list(DATASETS.keys()) | |
| SPECIAL_METRICS = { | |
| '__default__': 'hit@1', | |
| } | |
| BASE_COLS = ['Rank', 'Models', 'Model Size(B)'] | |
| TASKS = ["Overall", "Image-Overall", "I-CLS", "I-QA", "I-RET", "I-VG", "VisDoc", "Video-Overall", "V-CLS", "V-QA", "V-RET", "V-MRET"] | |
| COLUMN_NAMES = BASE_COLS + TASKS | |
| DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown'] + \ | |
| ['number'] * len(TASKS) | |
| TABLE_INTRODUCTION = """""" | |
| LEADERBOARD_INFO = """ | |
| ## Dataset Summary | |
| """ | |
| CITATION_BUTTON_TEXT = r"""""" | |
| def load_single_json(file_path): | |
| with open(file_path, 'r') as file: | |
| data = json.load(file) | |
| return data | |
| def load_data(base_dir=SCORE_BASE_DIR): | |
| all_data = [] | |
| for file_name in os.listdir(base_dir): | |
| if file_name.endswith('-scores_report.json'): | |
| file_path = os.path.join(base_dir, file_name) | |
| data = load_single_json(file_path) | |
| all_data.append(data) | |
| return all_data | |
| def calculate_score(raw_scores=None): | |
| """This function calculates the overall average scores for all datasets as well as avg scores for each modality and sub-task based on the raw scores. | |
| Algorithm summary: | |
| """ | |
| def get_avg(sum_score, leng): | |
| avg = sum_score / leng if leng > 0 else 0.0 | |
| avg = round(avg, 2) # Round to 2 decimal places | |
| return avg | |
| avg_scores = {} | |
| overall_scores_summary = {} # Stores the scores sum and length for each modality and all datasets | |
| for modality, datasets_list in DATASETS.items(): # Ex.: ('image', {'I-CLS': [...], 'I-QA': [...]}) | |
| overall_scores_summary[modality] = (0.0, 0) # Initialize the sum and count for each modality | |
| for sub_task, datasets in datasets_list.items(): # Ex.: ('I-CLS', ['VOC2007', 'N24News', ...]) | |
| sub_task_sum_score, sub_task_datasets_len = 0.0, len(datasets) | |
| for dataset in datasets: # Ex.: 'VOC2007' | |
| score = raw_scores.get(modality, {}).get(dataset, 0.0) | |
| score = 0.0 if score == "FILE_N/A" else score | |
| metric = SPECIAL_METRICS.get(dataset, 'hit@1') | |
| if isinstance(score, dict): | |
| score = score.get(metric, 0.0) | |
| sub_task_sum_score += score | |
| sub_task_overall = get_avg(sub_task_sum_score, sub_task_datasets_len) | |
| avg_scores[sub_task] = sub_task_overall | |
| # Accumulate the scores sum and length for the each modality | |
| modality_sum_score, modality_datasets_len = overall_scores_summary[modality] | |
| modality_sum_score += sub_task_sum_score | |
| modality_datasets_len += sub_task_datasets_len | |
| overall_scores_summary[modality] = (modality_sum_score, modality_datasets_len) | |
| all_datasets_sum_score, all_datasets_len = 0.0, 0 | |
| for modality, (modality_sum_score, modality_datasets_len) in overall_scores_summary.items(): | |
| name = f"{modality.capitalize()}-Overall" | |
| avg_scores[name] = get_avg(modality_sum_score, modality_datasets_len) | |
| # Accumulate the scores sum and length for all datasets | |
| all_datasets_sum_score += modality_sum_score | |
| all_datasets_len += modality_datasets_len | |
| avg_scores['Overall'] = get_avg(all_datasets_sum_score, all_datasets_len) | |
| return avg_scores | |
| def generate_model_row(data): | |
| metadata = data['metadata'] | |
| row = { | |
| 'Models': metadata.get('model_name', None), | |
| 'Model Size(B)': metadata.get('model_size', None), | |
| 'URL': metadata.get('url', None) | |
| } | |
| scores = calculate_score(data['metrics']) | |
| row.update(scores) | |
| return row | |
| def get_df(): | |
| """Generates a DataFrame from the loaded data.""" | |
| all_data = load_data() | |
| rows = [generate_model_row(data) for data in all_data] | |
| df = pd.DataFrame(rows) | |
| df = df.sort_values(by='Overall', ascending=False).reset_index(drop=True) | |
| df['Rank'] = range(1, len(df) + 1) | |
| df = create_hyperlinked_names(df) | |
| return df | |
| def refresh_data(): | |
| df = get_df() | |
| return df[COLUMN_NAMES] | |
| def search_and_filter_models(df, query, min_size, max_size): | |
| filtered_df = df.copy() | |
| if query: | |
| filtered_df = filtered_df[filtered_df['Models'].str.contains(query, case=False, na=False)] | |
| size_mask = filtered_df['Model Size(B)'].apply(lambda x: | |
| (min_size <= 1000.0 <= max_size) if x == 'unknown' | |
| else (min_size <= x <= max_size)) | |
| filtered_df = filtered_df[size_mask] | |
| return filtered_df[COLUMN_NAMES] |