Spaces:
Sleeping
Sleeping
Changed column header
Browse files- app.py +3 -3
- src/utils.py +48 -20
app.py
CHANGED
|
@@ -54,7 +54,7 @@ def update_dashboard(graph_years, graph_model_filter):
|
|
| 54 |
graph_years = cfg.get("years")
|
| 55 |
|
| 56 |
# keep some necessary metadata columns in the specified order
|
| 57 |
-
metadata_cols = ["Model", "Overall Average", "
|
| 58 |
cols = metadata_cols.copy()
|
| 59 |
|
| 60 |
yearly_df = df.copy()
|
|
@@ -63,7 +63,7 @@ def update_dashboard(graph_years, graph_model_filter):
|
|
| 63 |
|
| 64 |
# TODO if >1 year - aggregate the values to be per year, not per month
|
| 65 |
if len(table_years) > 1:
|
| 66 |
-
lb_cols = ["Model", "Overall Average", "
|
| 67 |
yearly_df = yearly_df[lb_cols]
|
| 68 |
|
| 69 |
# Expand years into their YYYY_MM columns (for table)
|
|
@@ -101,7 +101,7 @@ def update_dashboard(graph_years, graph_model_filter):
|
|
| 101 |
# Build tidy dataframe for gr.LinePlot with columns x, y, Model
|
| 102 |
records = []
|
| 103 |
# Exclude all metadata columns and yearly aggregates from x_labels - only keep monthly columns
|
| 104 |
-
excluded_cols = {"Model", "Overall Average", "Parameters", "1st Detected cutoff", "2nd Detected cutoff", "Provider", "Provider cutoff", "Release date", "
|
| 105 |
x_labels = [c for c in graph_cols if c not in excluded_cols and c not in graph_years] # only months for the plot
|
| 106 |
for _, row in graph_df.iterrows():
|
| 107 |
for col in x_labels:
|
|
|
|
| 54 |
graph_years = cfg.get("years")
|
| 55 |
|
| 56 |
# keep some necessary metadata columns in the specified order
|
| 57 |
+
metadata_cols = ["Model", "Overall Average", "1st Detected cutoff", "2nd Detected cutoff", "Provider cutoff", "Provider", "Release date", "Self-declared cutoff", "trend_changepoints", "Parameters", "Evaluation period"]
|
| 58 |
cols = metadata_cols.copy()
|
| 59 |
|
| 60 |
yearly_df = df.copy()
|
|
|
|
| 63 |
|
| 64 |
# TODO if >1 year - aggregate the values to be per year, not per month
|
| 65 |
if len(table_years) > 1:
|
| 66 |
+
lb_cols = ["Model", "Overall Average", "1st Detected cutoff", "2nd Detected cutoff", "Provider cutoff", "Provider", "Release date", "Self-declared cutoff", "Parameters", "Evaluation period"] + [y for y in cfg.get("aggregated_cols_year") if y in table_years]
|
| 67 |
yearly_df = yearly_df[lb_cols]
|
| 68 |
|
| 69 |
# Expand years into their YYYY_MM columns (for table)
|
|
|
|
| 101 |
# Build tidy dataframe for gr.LinePlot with columns x, y, Model
|
| 102 |
records = []
|
| 103 |
# Exclude all metadata columns and yearly aggregates from x_labels - only keep monthly columns
|
| 104 |
+
excluded_cols = {"Model", "Overall Average", "Parameters", "1st Detected cutoff", "2nd Detected cutoff", "Provider", "Provider cutoff", "Release date", "Self-declared cutoff", "trend_changepoints", "Evaluation period"}
|
| 105 |
x_labels = [c for c in graph_cols if c not in excluded_cols and c not in graph_years] # only months for the plot
|
| 106 |
for _, row in graph_df.iterrows():
|
| 107 |
for col in x_labels:
|
src/utils.py
CHANGED
|
@@ -169,34 +169,50 @@ def build_year_column_mapping(years, months):
|
|
| 169 |
|
| 170 |
def validate_equal_measurements(data):
|
| 171 |
"""
|
| 172 |
-
Validate
|
| 173 |
|
| 174 |
Args:
|
| 175 |
data: Dictionary with model names as keys
|
| 176 |
|
| 177 |
Returns:
|
| 178 |
-
tuple: (is_valid,
|
|
|
|
|
|
|
|
|
|
| 179 |
"""
|
| 180 |
measurement_counts = {}
|
| 181 |
for model_name, model_data in data.items():
|
| 182 |
dates = model_data.get('dates', [])
|
| 183 |
measurement_counts[model_name] = len(dates)
|
| 184 |
|
| 185 |
-
|
|
|
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
|
|
|
| 189 |
|
| 190 |
-
if
|
| 191 |
-
|
| 192 |
-
return True,
|
| 193 |
|
| 194 |
-
# Models have different counts - create
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
| 196 |
for model, count in sorted(measurement_counts.items(), key=lambda x: x[1]):
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
-
return
|
| 200 |
|
| 201 |
|
| 202 |
def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=None):
|
|
@@ -212,12 +228,10 @@ def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=
|
|
| 212 |
Returns:
|
| 213 |
List of row dictionaries ready for DataFrame creation
|
| 214 |
"""
|
| 215 |
-
# Validate
|
| 216 |
-
is_valid,
|
| 217 |
-
if not is_valid:
|
| 218 |
-
raise ValueError(f"Data validation failed: {message}")
|
| 219 |
|
| 220 |
-
print(
|
| 221 |
|
| 222 |
rows = []
|
| 223 |
|
|
@@ -258,7 +272,7 @@ def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=
|
|
| 258 |
row["Parameters"] = metadata.get("Parameters", "")
|
| 259 |
row["Provider cutoff"] = metadata.get("Provider cutoff", "")
|
| 260 |
row["Release date"] = metadata.get("Release date", "")
|
| 261 |
-
row["
|
| 262 |
else:
|
| 263 |
# Set empty values if metadata not available
|
| 264 |
# Fall back to extracting provider from model name if no metadata
|
|
@@ -266,19 +280,31 @@ def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=
|
|
| 266 |
row["Parameters"] = ""
|
| 267 |
row["Provider cutoff"] = ""
|
| 268 |
row["Release date"] = ""
|
| 269 |
-
row["
|
| 270 |
|
| 271 |
# Aggregate faithfulness data to monthly averages
|
| 272 |
dates = model_data.get("dates", [])
|
| 273 |
faithfulness = model_data.get("faithfulness", [])
|
| 274 |
monthly_averages = aggregate_weekly_to_monthly(dates, faithfulness)
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
# Add monthly columns (e.g., "2021_01", "2021_02", ...)
|
| 277 |
for month_key, avg_value in monthly_averages.items():
|
| 278 |
row[month_key] = avg_value
|
| 279 |
|
| 280 |
# Calculate yearly averages
|
| 281 |
-
all_years_values = [] # Collect all values for overall average
|
| 282 |
for year in years:
|
| 283 |
year_values = []
|
| 284 |
for month in months:
|
|
@@ -293,6 +319,8 @@ def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=
|
|
| 293 |
all_years_values.extend(year_values)
|
| 294 |
|
| 295 |
# Calculate overall average across all years
|
|
|
|
|
|
|
| 296 |
row["Overall Average"] = round(mean(all_years_values), 2) if all_years_values else None
|
| 297 |
|
| 298 |
rows.append(row)
|
|
|
|
| 169 |
|
| 170 |
def validate_equal_measurements(data):
|
| 171 |
"""
|
| 172 |
+
Validate measurement counts across models and warn about discrepancies.
|
| 173 |
|
| 174 |
Args:
|
| 175 |
data: Dictionary with model names as keys
|
| 176 |
|
| 177 |
Returns:
|
| 178 |
+
tuple: (is_valid, measurement_counts_dict, message)
|
| 179 |
+
- is_valid: Always True now (we allow different counts)
|
| 180 |
+
- measurement_counts_dict: Dict mapping model_name -> count
|
| 181 |
+
- message: Info/warning message about the counts
|
| 182 |
"""
|
| 183 |
measurement_counts = {}
|
| 184 |
for model_name, model_data in data.items():
|
| 185 |
dates = model_data.get('dates', [])
|
| 186 |
measurement_counts[model_name] = len(dates)
|
| 187 |
|
| 188 |
+
if len(measurement_counts) == 0:
|
| 189 |
+
return True, {}, "No models found in data"
|
| 190 |
|
| 191 |
+
# Find max count
|
| 192 |
+
max_count = max(measurement_counts.values())
|
| 193 |
+
min_count = min(measurement_counts.values())
|
| 194 |
|
| 195 |
+
if max_count == min_count:
|
| 196 |
+
# All models have same count
|
| 197 |
+
return True, measurement_counts, f"All models have {max_count} measurements"
|
| 198 |
|
| 199 |
+
# Models have different counts - create warning message
|
| 200 |
+
warning_msg = f"⚠️ Models have different measurement counts (range: {min_count}-{max_count}):\n"
|
| 201 |
+
|
| 202 |
+
# Show models with fewer than max samples
|
| 203 |
+
models_with_fewer = []
|
| 204 |
for model, count in sorted(measurement_counts.items(), key=lambda x: x[1]):
|
| 205 |
+
if count < max_count:
|
| 206 |
+
models_with_fewer.append(f" {model}: {count} samples (missing {max_count - count})")
|
| 207 |
+
|
| 208 |
+
if models_with_fewer:
|
| 209 |
+
warning_msg += "\n".join(models_with_fewer)
|
| 210 |
+
warning_msg += f"\n\nModels with maximum samples ({max_count}):\n"
|
| 211 |
+
for model, count in measurement_counts.items():
|
| 212 |
+
if count == max_count:
|
| 213 |
+
warning_msg += f" {model}\n"
|
| 214 |
|
| 215 |
+
return True, measurement_counts, warning_msg
|
| 216 |
|
| 217 |
|
| 218 |
def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=None):
|
|
|
|
| 228 |
Returns:
|
| 229 |
List of row dictionaries ready for DataFrame creation
|
| 230 |
"""
|
| 231 |
+
# Validate measurements and get counts per model
|
| 232 |
+
is_valid, measurement_counts, message = validate_equal_measurements(data)
|
|
|
|
|
|
|
| 233 |
|
| 234 |
+
print(message)
|
| 235 |
|
| 236 |
rows = []
|
| 237 |
|
|
|
|
| 272 |
row["Parameters"] = metadata.get("Parameters", "")
|
| 273 |
row["Provider cutoff"] = metadata.get("Provider cutoff", "")
|
| 274 |
row["Release date"] = metadata.get("Release date", "")
|
| 275 |
+
row["Self-declared cutoff"] = metadata.get("Model cutoff", "")
|
| 276 |
else:
|
| 277 |
# Set empty values if metadata not available
|
| 278 |
# Fall back to extracting provider from model name if no metadata
|
|
|
|
| 280 |
row["Parameters"] = ""
|
| 281 |
row["Provider cutoff"] = ""
|
| 282 |
row["Release date"] = ""
|
| 283 |
+
row["Self-declared cutoff"] = ""
|
| 284 |
|
| 285 |
# Aggregate faithfulness data to monthly averages
|
| 286 |
dates = model_data.get("dates", [])
|
| 287 |
faithfulness = model_data.get("faithfulness", [])
|
| 288 |
monthly_averages = aggregate_weekly_to_monthly(dates, faithfulness)
|
| 289 |
|
| 290 |
+
# Calculate evaluation period (min and max dates)
|
| 291 |
+
if dates:
|
| 292 |
+
try:
|
| 293 |
+
date_objects = [datetime.strptime(d, '%Y-%m-%d') for d in dates]
|
| 294 |
+
min_date = min(date_objects).strftime('%Y-%m-%d')
|
| 295 |
+
max_date = max(date_objects).strftime('%Y-%m-%d')
|
| 296 |
+
row["Evaluation period"] = f"{min_date} - {max_date}"
|
| 297 |
+
except Exception:
|
| 298 |
+
row["Evaluation period"] = ""
|
| 299 |
+
else:
|
| 300 |
+
row["Evaluation period"] = ""
|
| 301 |
+
|
| 302 |
# Add monthly columns (e.g., "2021_01", "2021_02", ...)
|
| 303 |
for month_key, avg_value in monthly_averages.items():
|
| 304 |
row[month_key] = avg_value
|
| 305 |
|
| 306 |
# Calculate yearly averages
|
| 307 |
+
all_years_values = [] # Collect all monthly values for overall average (specific to this model)
|
| 308 |
for year in years:
|
| 309 |
year_values = []
|
| 310 |
for month in months:
|
|
|
|
| 319 |
all_years_values.extend(year_values)
|
| 320 |
|
| 321 |
# Calculate overall average across all years
|
| 322 |
+
# Note: This is calculated from the model's actual sample count
|
| 323 |
+
# Models with fewer samples will have their average based only on their available data
|
| 324 |
row["Overall Average"] = round(mean(all_years_values), 2) if all_years_values else None
|
| 325 |
|
| 326 |
rows.append(row)
|