fzarnecki commited on
Commit
654cff7
·
1 Parent(s): 207bddc

Changed column header

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. src/utils.py +48 -20
app.py CHANGED
@@ -54,7 +54,7 @@ def update_dashboard(graph_years, graph_model_filter):
54
  graph_years = cfg.get("years")
55
 
56
  # keep some necessary metadata columns in the specified order
57
- metadata_cols = ["Model", "Overall Average", "Provider cutoff", "1st Detected cutoff", "2nd Detected cutoff", "Provider", "Release date", "Model cutoff", "trend_changepoints", "Parameters"]
58
  cols = metadata_cols.copy()
59
 
60
  yearly_df = df.copy()
@@ -63,7 +63,7 @@ def update_dashboard(graph_years, graph_model_filter):
63
 
64
  # TODO if >1 year - aggregate the values to be per year, not per month
65
  if len(table_years) > 1:
66
- lb_cols = ["Model", "Overall Average", "Provider cutoff", "1st Detected cutoff", "2nd Detected cutoff", "Provider", "Release date", "Model cutoff", "Parameters"] + [y for y in cfg.get("aggregated_cols_year") if y in table_years]
67
  yearly_df = yearly_df[lb_cols]
68
 
69
  # Expand years into their YYYY_MM columns (for table)
@@ -101,7 +101,7 @@ def update_dashboard(graph_years, graph_model_filter):
101
  # Build tidy dataframe for gr.LinePlot with columns x, y, Model
102
  records = []
103
  # Exclude all metadata columns and yearly aggregates from x_labels - only keep monthly columns
104
- excluded_cols = {"Model", "Overall Average", "Parameters", "1st Detected cutoff", "2nd Detected cutoff", "Provider", "Provider cutoff", "Release date", "Model cutoff", "trend_changepoints"}
105
  x_labels = [c for c in graph_cols if c not in excluded_cols and c not in graph_years] # only months for the plot
106
  for _, row in graph_df.iterrows():
107
  for col in x_labels:
 
54
  graph_years = cfg.get("years")
55
 
56
  # keep some necessary metadata columns in the specified order
57
+ metadata_cols = ["Model", "Overall Average", "1st Detected cutoff", "2nd Detected cutoff", "Provider cutoff", "Provider", "Release date", "Self-declared cutoff", "trend_changepoints", "Parameters", "Evaluation period"]
58
  cols = metadata_cols.copy()
59
 
60
  yearly_df = df.copy()
 
63
 
64
  # TODO if >1 year - aggregate the values to be per year, not per month
65
  if len(table_years) > 1:
66
+ lb_cols = ["Model", "Overall Average", "1st Detected cutoff", "2nd Detected cutoff", "Provider cutoff", "Provider", "Release date", "Self-declared cutoff", "Parameters", "Evaluation period"] + [y for y in cfg.get("aggregated_cols_year") if y in table_years]
67
  yearly_df = yearly_df[lb_cols]
68
 
69
  # Expand years into their YYYY_MM columns (for table)
 
101
  # Build tidy dataframe for gr.LinePlot with columns x, y, Model
102
  records = []
103
  # Exclude all metadata columns and yearly aggregates from x_labels - only keep monthly columns
104
+ excluded_cols = {"Model", "Overall Average", "Parameters", "1st Detected cutoff", "2nd Detected cutoff", "Provider", "Provider cutoff", "Release date", "Self-declared cutoff", "trend_changepoints", "Evaluation period"}
105
  x_labels = [c for c in graph_cols if c not in excluded_cols and c not in graph_years] # only months for the plot
106
  for _, row in graph_df.iterrows():
107
  for col in x_labels:
src/utils.py CHANGED
@@ -169,34 +169,50 @@ def build_year_column_mapping(years, months):
169
 
170
  def validate_equal_measurements(data):
171
  """
172
- Validate that all models have the same number of measurements.
173
 
174
  Args:
175
  data: Dictionary with model names as keys
176
 
177
  Returns:
178
- tuple: (is_valid, measurement_count, error_message)
 
 
 
179
  """
180
  measurement_counts = {}
181
  for model_name, model_data in data.items():
182
  dates = model_data.get('dates', [])
183
  measurement_counts[model_name] = len(dates)
184
 
185
- unique_counts = set(measurement_counts.values())
 
186
 
187
- if len(unique_counts) == 0:
188
- return False, 0, "No models found in data"
 
189
 
190
- if len(unique_counts) == 1:
191
- count = list(unique_counts)[0]
192
- return True, count, f"All models have {count} measurements"
193
 
194
- # Models have different counts - create error message
195
- error_msg = "Models have different measurement counts:\n"
 
 
 
196
  for model, count in sorted(measurement_counts.items(), key=lambda x: x[1]):
197
- error_msg += f" {model}: {count}\n"
 
 
 
 
 
 
 
 
198
 
199
- return False, None, error_msg
200
 
201
 
202
  def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=None):
@@ -212,12 +228,10 @@ def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=
212
  Returns:
213
  List of row dictionaries ready for DataFrame creation
214
  """
215
- # Validate equal measurements
216
- is_valid, count, message = validate_equal_measurements(data)
217
- if not is_valid:
218
- raise ValueError(f"Data validation failed: {message}")
219
 
220
- print(f"✓ Validation passed: {message}")
221
 
222
  rows = []
223
 
@@ -258,7 +272,7 @@ def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=
258
  row["Parameters"] = metadata.get("Parameters", "")
259
  row["Provider cutoff"] = metadata.get("Provider cutoff", "")
260
  row["Release date"] = metadata.get("Release date", "")
261
- row["Model cutoff"] = metadata.get("Model cutoff", "")
262
  else:
263
  # Set empty values if metadata not available
264
  # Fall back to extracting provider from model name if no metadata
@@ -266,19 +280,31 @@ def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=
266
  row["Parameters"] = ""
267
  row["Provider cutoff"] = ""
268
  row["Release date"] = ""
269
- row["Model cutoff"] = ""
270
 
271
  # Aggregate faithfulness data to monthly averages
272
  dates = model_data.get("dates", [])
273
  faithfulness = model_data.get("faithfulness", [])
274
  monthly_averages = aggregate_weekly_to_monthly(dates, faithfulness)
275
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  # Add monthly columns (e.g., "2021_01", "2021_02", ...)
277
  for month_key, avg_value in monthly_averages.items():
278
  row[month_key] = avg_value
279
 
280
  # Calculate yearly averages
281
- all_years_values = [] # Collect all values for overall average
282
  for year in years:
283
  year_values = []
284
  for month in months:
@@ -293,6 +319,8 @@ def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=
293
  all_years_values.extend(year_values)
294
 
295
  # Calculate overall average across all years
 
 
296
  row["Overall Average"] = round(mean(all_years_values), 2) if all_years_values else None
297
 
298
  rows.append(row)
 
169
 
170
  def validate_equal_measurements(data):
171
  """
172
+ Validate measurement counts across models and warn about discrepancies.
173
 
174
  Args:
175
  data: Dictionary with model names as keys
176
 
177
  Returns:
178
+ tuple: (is_valid, measurement_counts_dict, message)
179
+ - is_valid: Always True now (we allow different counts)
180
+ - measurement_counts_dict: Dict mapping model_name -> count
181
+ - message: Info/warning message about the counts
182
  """
183
  measurement_counts = {}
184
  for model_name, model_data in data.items():
185
  dates = model_data.get('dates', [])
186
  measurement_counts[model_name] = len(dates)
187
 
188
+ if len(measurement_counts) == 0:
189
+ return True, {}, "No models found in data"
190
 
191
+ # Find max count
192
+ max_count = max(measurement_counts.values())
193
+ min_count = min(measurement_counts.values())
194
 
195
+ if max_count == min_count:
196
+ # All models have same count
197
+ return True, measurement_counts, f"All models have {max_count} measurements"
198
 
199
+ # Models have different counts - create warning message
200
+ warning_msg = f"⚠️ Models have different measurement counts (range: {min_count}-{max_count}):\n"
201
+
202
+ # Show models with fewer than max samples
203
+ models_with_fewer = []
204
  for model, count in sorted(measurement_counts.items(), key=lambda x: x[1]):
205
+ if count < max_count:
206
+ models_with_fewer.append(f" {model}: {count} samples (missing {max_count - count})")
207
+
208
+ if models_with_fewer:
209
+ warning_msg += "\n".join(models_with_fewer)
210
+ warning_msg += f"\n\nModels with maximum samples ({max_count}):\n"
211
+ for model, count in measurement_counts.items():
212
+ if count == max_count:
213
+ warning_msg += f" {model}\n"
214
 
215
+ return True, measurement_counts, warning_msg
216
 
217
 
218
  def transform_leaderboard_data_to_dataframe(data, years, months, model_metadata=None):
 
228
  Returns:
229
  List of row dictionaries ready for DataFrame creation
230
  """
231
+ # Validate measurements and get counts per model
232
+ is_valid, measurement_counts, message = validate_equal_measurements(data)
 
 
233
 
234
+ print(message)
235
 
236
  rows = []
237
 
 
272
  row["Parameters"] = metadata.get("Parameters", "")
273
  row["Provider cutoff"] = metadata.get("Provider cutoff", "")
274
  row["Release date"] = metadata.get("Release date", "")
275
+ row["Self-declared cutoff"] = metadata.get("Model cutoff", "")
276
  else:
277
  # Set empty values if metadata not available
278
  # Fall back to extracting provider from model name if no metadata
 
280
  row["Parameters"] = ""
281
  row["Provider cutoff"] = ""
282
  row["Release date"] = ""
283
+ row["Self-declared cutoff"] = ""
284
 
285
  # Aggregate faithfulness data to monthly averages
286
  dates = model_data.get("dates", [])
287
  faithfulness = model_data.get("faithfulness", [])
288
  monthly_averages = aggregate_weekly_to_monthly(dates, faithfulness)
289
 
290
+ # Calculate evaluation period (min and max dates)
291
+ if dates:
292
+ try:
293
+ date_objects = [datetime.strptime(d, '%Y-%m-%d') for d in dates]
294
+ min_date = min(date_objects).strftime('%Y-%m-%d')
295
+ max_date = max(date_objects).strftime('%Y-%m-%d')
296
+ row["Evaluation period"] = f"{min_date} - {max_date}"
297
+ except Exception:
298
+ row["Evaluation period"] = ""
299
+ else:
300
+ row["Evaluation period"] = ""
301
+
302
  # Add monthly columns (e.g., "2021_01", "2021_02", ...)
303
  for month_key, avg_value in monthly_averages.items():
304
  row[month_key] = avg_value
305
 
306
  # Calculate yearly averages
307
+ all_years_values = [] # Collect all monthly values for overall average (specific to this model)
308
  for year in years:
309
  year_values = []
310
  for month in months:
 
319
  all_years_values.extend(year_values)
320
 
321
  # Calculate overall average across all years
322
+ # Note: This is calculated from the model's actual sample count
323
+ # Models with fewer samples will have their average based only on their available data
324
  row["Overall Average"] = round(mean(all_years_values), 2) if all_years_values else None
325
 
326
  rows.append(row)