Spaces:
Running
Running
Lakoc
Leaderboard split into 4 categories, updates of the logic and GT added, simplified CER for Mandarin
7fc9a28
| import json | |
| from pathlib import Path | |
| from typing import Dict, List | |
| import meeteval.io | |
| import pandas as pd | |
| from txt_norm import get_text_norm | |
| from utils import calc_wer, aggregate_wer_metrics | |
| class LeaderboardServer: | |
| """Manages ASR model submissions and leaderboard generation.""" | |
| def __init__(self, | |
| reference_base_path: str = "references", | |
| tasks_metadata_path: str = "tasks_metadata.json", | |
| local_leaderboard_path: str = "submissions"): | |
| """Initialize the leaderboard server. | |
| Args: | |
| reference_base_path: Base path for reference files | |
| tasks_metadata_path: Path to tasks metadata JSON file | |
| local_leaderboard_path: Directory for storing submissions | |
| """ | |
| self.reference_base_path = Path(reference_base_path).resolve() | |
| self.tasks_metadata_path = Path(tasks_metadata_path).resolve() | |
| self.local_leaderboard = Path(local_leaderboard_path).resolve() | |
| # Load tasks metadata | |
| self.tasks_metadata = self._load_tasks_metadata() | |
| # Initialize storage | |
| self.local_leaderboard.mkdir(exist_ok=True) | |
| self.text_normalizer = get_text_norm("whisper_nsf") | |
| def _load_tasks_metadata(self) -> Dict: | |
| """Load tasks metadata from JSON file.""" | |
| try: | |
| with open(self.tasks_metadata_path) as f: | |
| return json.load(f)["tasks"] | |
| except (FileNotFoundError, KeyError, json.JSONDecodeError) as e: | |
| raise ValueError(f"Failed to load tasks metadata: {e}") | |
| def _get_results_file_path(self, task: str) -> Path: | |
| """Get the path to the results file for a specific task.""" | |
| return self.local_leaderboard / f"{task}_results.json" | |
| def _create_submission_id(self, metadata: Dict[str, str]) -> str: | |
| """Create a unique submission ID from metadata.""" | |
| return f"{metadata['submitted_by']}_{metadata['model_id']}" | |
| def _normalize_text_if_needed(self, segment: Dict, normalize: bool) -> Dict: | |
| """Apply text normalization to a segment if requested.""" | |
| if normalize: | |
| return {**segment, "words": self.text_normalizer(segment["words"])} | |
| return segment | |
| def _evaluate_dataset(self, | |
| hyp_seglst, | |
| ref_seglst, | |
| normalize: bool = False) -> Dict: | |
| """Evaluate WER for a single dataset.""" | |
| # Apply normalization if requested | |
| if normalize: | |
| ref_seglst = ref_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True)) | |
| hyp_seglst = hyp_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True)) | |
| # Calculate WER metrics | |
| per_session_wers = calc_wer( | |
| tcp_hyp_seglst=hyp_seglst, | |
| ref_seglst=ref_seglst, | |
| collar=5, | |
| metrics_list=["tcp_wer"] | |
| ) | |
| return aggregate_wer_metrics(per_session_wers, ["tcp_wer"]) | |
| def _load_existing_results(self, task: str) -> Dict: | |
| """Load existing results for a task, or return empty dict.""" | |
| results_path = self._get_results_file_path(task) | |
| if results_path.exists(): | |
| with open(results_path) as f: | |
| return json.load(f) | |
| return {} | |
| def _save_results(self, task: str, results: Dict) -> None: | |
| """Save results to the task results file.""" | |
| results_path = self._get_results_file_path(task) | |
| with open(results_path, "w") as f: | |
| json.dump(results, f, indent=2) | |
| def _save_hypothesis_file(self, | |
| task: str, | |
| submission_id: str, | |
| source_file: str) -> None: | |
| """Save the hypothesis file for future reference.""" | |
| hyp_filename = f"{task}_{submission_id}_hyp.json" | |
| hyp_filepath = self.local_leaderboard / hyp_filename | |
| with open(hyp_filepath, "w") as out_f: | |
| with open(source_file, "r") as in_f: | |
| out_f.write(in_f.read()) | |
| def prepare_model_for_submission(self, | |
| file: str, | |
| metadata: Dict[str, str], | |
| task: str, | |
| datasets: List[str], | |
| normalize: bool = False) -> None: | |
| """Prepare and evaluate a model submission. | |
| Args: | |
| file: Path to the hypothesis file | |
| metadata: Submission metadata containing 'submitted_by' and 'model_id' | |
| task: Task name | |
| datasets: List of dataset names to evaluate on | |
| normalize: Whether to apply text normalization | |
| """ | |
| submission_id = self._create_submission_id(metadata) | |
| # Load hypothesis segments | |
| hyp_seglst = meeteval.io.load(file) | |
| # Evaluate on each dataset | |
| results = {} | |
| for dataset in datasets: | |
| ref_path = self.reference_base_path / task / f"{dataset}.json" | |
| if not ref_path.exists(): | |
| raise FileNotFoundError(f"Reference file not found: {ref_path}") | |
| ref_seglst = meeteval.io.load(ref_path) | |
| sessions = ref_seglst.unique('session_id') | |
| # Filter hypotheses to match reference sessions | |
| local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions) | |
| if "alimeeting" in dataset or "aishell4" in dataset: | |
| import opencc | |
| converter = opencc.OpenCC('s2t.json') | |
| local_hyps = local_hyps.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))}) | |
| ref_seglst = ref_seglst.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))}) | |
| # Evaluate this dataset | |
| results[dataset] = self._evaluate_dataset(local_hyps, ref_seglst, normalize) | |
| # Update results file | |
| all_results = self._load_existing_results(task) | |
| all_results[submission_id] = { | |
| "model_link": metadata["model_link"], | |
| "model_id": metadata["model_id"], | |
| "submitted_by": metadata["submitted_by"], | |
| "results": results | |
| } | |
| self._save_results(task, all_results) | |
| self._save_hypothesis_file(task, submission_id, file) | |
| def make_clickable_model(model_name, link): | |
| return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
| def get_leaderboard(self, task: str) -> pd.DataFrame: | |
| """Generate leaderboard DataFrame for a specific task. | |
| Args: | |
| task: Task name | |
| Returns: | |
| DataFrame containing leaderboard results | |
| """ | |
| results_path = self._get_results_file_path(task) | |
| if not results_path.exists(): | |
| return pd.DataFrame(columns=["No submissions yet"]) | |
| with open(results_path) as f: | |
| results = json.load(f) | |
| if not results: | |
| return pd.DataFrame(columns=["No submissions yet"]) | |
| # Build rows for DataFrame | |
| rows = [] | |
| for content in results.values(): | |
| row = { | |
| "Model ID": self.make_clickable_model(content["model_id"], content["model_link"]), | |
| "Submitted by": content["submitted_by"] | |
| } | |
| # Add dataset results | |
| for dataset, metrics in content["results"].items(): | |
| row[dataset] = metrics.get("tcp_wer") | |
| rows.append(row) | |
| df = pd.DataFrame(rows) | |
| if df.empty: | |
| return df | |
| # Convert WER to percentage and format | |
| numeric_columns = df.select_dtypes(include=['number']).columns | |
| df[numeric_columns] *= 100.0 | |
| df = df.round(2) | |
| df = df.fillna("-") | |
| return df | |