rename to cross_validation
Browse files- constants.py +4 -2
- validation.py +6 -4
constants.py
CHANGED
|
@@ -40,11 +40,13 @@ CV_COLUMN = "hierarchical_cluster_IgG_isotype_stratified_fold"
|
|
| 40 |
# Example files
|
| 41 |
EXAMPLE_FILE_DICT = {
|
| 42 |
"GDPa1": "data/example-predictions.csv",
|
| 43 |
-
"
|
| 44 |
}
|
| 45 |
ANTIBODY_NAMES_DICT = {
|
| 46 |
"GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(),
|
| 47 |
-
"
|
|
|
|
|
|
|
| 48 |
}
|
| 49 |
|
| 50 |
# Huggingface API
|
|
|
|
| 40 |
# Example files
|
| 41 |
EXAMPLE_FILE_DICT = {
|
| 42 |
"GDPa1": "data/example-predictions.csv",
|
| 43 |
+
"GDPa1_cross_validation": "data/example-predictions-cv.csv",
|
| 44 |
}
|
| 45 |
ANTIBODY_NAMES_DICT = {
|
| 46 |
"GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(),
|
| 47 |
+
"GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
|
| 48 |
+
"antibody_name"
|
| 49 |
+
].tolist(),
|
| 50 |
}
|
| 51 |
|
| 52 |
# Huggingface API
|
validation.py
CHANGED
|
@@ -47,7 +47,9 @@ def validate_csv_can_be_read(file_content: str) -> pd.DataFrame:
|
|
| 47 |
raise gr.Error(f"❌ Unexpected error reading CSV file: {str(e)}")
|
| 48 |
|
| 49 |
|
| 50 |
-
def validate_cv_submission(
|
|
|
|
|
|
|
| 51 |
"""Validate cross-validation submission"""
|
| 52 |
# Must have CV_COLUMN for CV submissions
|
| 53 |
if CV_COLUMN not in df.columns:
|
|
@@ -102,7 +104,7 @@ def validate_dataframe(df: pd.DataFrame, submission_type: str = "GDPa1") -> None
|
|
| 102 |
df: pd.DataFrame
|
| 103 |
The DataFrame to validate.
|
| 104 |
submission_type: str
|
| 105 |
-
Type of submission: "GDPa1" or "
|
| 106 |
|
| 107 |
Raises
|
| 108 |
------
|
|
@@ -160,7 +162,7 @@ def validate_dataframe(df: pd.DataFrame, submission_type: str = "GDPa1") -> None
|
|
| 160 |
f"❌ Missing predictions for {len(missing_antibodies)} antibodies: {', '.join(missing_antibodies)}"
|
| 161 |
)
|
| 162 |
# Submission-type specific validation
|
| 163 |
-
if submission_type.endswith("
|
| 164 |
validate_cv_submission(df, submission_type)
|
| 165 |
else: # full_dataset
|
| 166 |
validate_full_dataset_submission(df)
|
|
@@ -175,7 +177,7 @@ def validate_csv_file(file_content: str, submission_type: str = "GDPa1") -> None
|
|
| 175 |
file_content: str
|
| 176 |
The content of the uploaded CSV file.
|
| 177 |
submission_type: str
|
| 178 |
-
Type of submission: "
|
| 179 |
|
| 180 |
Raises
|
| 181 |
------
|
|
|
|
| 47 |
raise gr.Error(f"❌ Unexpected error reading CSV file: {str(e)}")
|
| 48 |
|
| 49 |
|
| 50 |
+
def validate_cv_submission(
|
| 51 |
+
df: pd.DataFrame, submission_type: str = "GDPa1_cross_validation"
|
| 52 |
+
) -> None:
|
| 53 |
"""Validate cross-validation submission"""
|
| 54 |
# Must have CV_COLUMN for CV submissions
|
| 55 |
if CV_COLUMN not in df.columns:
|
|
|
|
| 104 |
df: pd.DataFrame
|
| 105 |
The DataFrame to validate.
|
| 106 |
submission_type: str
|
| 107 |
+
Type of submission: "GDPa1" or "GDPa1_cross_validation"
|
| 108 |
|
| 109 |
Raises
|
| 110 |
------
|
|
|
|
| 162 |
f"❌ Missing predictions for {len(missing_antibodies)} antibodies: {', '.join(missing_antibodies)}"
|
| 163 |
)
|
| 164 |
# Submission-type specific validation
|
| 165 |
+
if submission_type.endswith("_cross_validation"):
|
| 166 |
validate_cv_submission(df, submission_type)
|
| 167 |
else: # full_dataset
|
| 168 |
validate_full_dataset_submission(df)
|
|
|
|
| 177 |
file_content: str
|
| 178 |
The content of the uploaded CSV file.
|
| 179 |
submission_type: str
|
| 180 |
+
Type of submission: "GDPa1" or "GDPa1_cross_validation"
|
| 181 |
|
| 182 |
Raises
|
| 183 |
------
|