import gradio as gr from datetime import datetime from typing import Any, Dict, Iterable, List, Optional, Tuple from collections import Counter import json import os import html as html_lib import base64 from pathlib import Path from huggingface_hub import HfApi, InferenceClient import requests def _created_year(obj): if hasattr(obj, "created_at"): dt = getattr(obj, "created_at") return dt.year def _year_from_iso(value: Any) -> Optional[int]: if not value or not isinstance(value, str): return None try: # e.g. 2025-12-12T18:40:13.000Z dt = datetime.fromisoformat(value.replace("Z", "+00:00")) return dt.year except Exception: return None _ASSET_CACHE: Dict[str, str] = {} def _asset_data_uri(filename: str) -> str: """ Returns a data URI (base64) for a local asset in this repo. Cached in-memory to avoid re-reading files every render. """ if filename in _ASSET_CACHE: return _ASSET_CACHE[filename] path = Path(__file__).resolve().parent / filename try: raw = path.read_bytes() b64 = base64.b64encode(raw).decode("ascii") ext = path.suffix.lower() mime = "image/png" if ext == ".gif": mime = "image/gif" elif ext in (".jpg", ".jpeg"): mime = "image/jpeg" elif ext == ".webp": mime = "image/webp" uri = f"data:{mime};base64,{b64}" _ASSET_CACHE[filename] = uri return uri except Exception: # If missing, return empty string to avoid breaking HTML return "" def _http_get_json(url: str, *, token: Optional[str] = None, params: Optional[Dict[str, Any]] = None) -> Any: headers: Dict[str, str] = {} if token: headers["Authorization"] = f"Bearer {token}" r = requests.get(url, headers=headers, params=params, timeout=25) r.raise_for_status() return r.json() def fetch_likes_left_2025(username: str, token: Optional[str] = None) -> int: """ Count likes the user left in 2025 via /api/users/{username}/likes. Endpoint returns a list with `createdAt` descending. """ url = f"https://huggingface.co/api/users/{username}/likes" try: data = _http_get_json(url, token=token) except Exception: return 0 if not isinstance(data, list): return 0 total = 0 for item in data: if not isinstance(item, dict): continue yr = _year_from_iso(item.get("createdAt")) if yr is None: continue if yr < 2025: break if yr == 2025: total += 1 return total def _repo_id(obj: Any) -> str: if isinstance(obj, dict): return obj.get("id") or obj.get("modelId") or obj.get("repoId") or "N/A" return ( getattr(obj, "id", None) or getattr(obj, "modelId", None) or getattr(obj, "repoId", None) or getattr(obj, "repo_id", None) or "N/A" ) def _repo_likes(obj: Any) -> int: return int(getattr(obj, "likes", 0) or 0) def _repo_tags(obj: Any) -> List[str]: tags = getattr(obj, "tags", None) or [] return [t for t in tags if isinstance(t, str)] def _repo_pipeline_tag(obj: Any) -> Optional[str]: val = getattr(obj, "pipeline_tag", None) return val def _repo_library_name(obj: Any) -> Optional[str]: val = getattr(obj, "library_name", None) if isinstance(val, str) and val.strip(): return val.strip() val = getattr(obj, "libraryName", None) if isinstance(val, str) and val.strip(): return val.strip() return None def _collect_2025_sorted_desc(items: Iterable[Any]) -> List[Any]: """ We rely on API-side sorting (createdAt desc) + early-stop once we hit < 2025. This avoids pulling a user's entire history. """ out: List[Any] = [] for item in items: yr = _created_year(item) if yr is None: continue if yr < 2025: break if yr == 2025: out.append(item) return out def fetch_user_data_2025(username: str, token: Optional[str] = None) -> Dict[str, List[Any]]: """Fetch user's models/datasets/spaces created in 2025 (API-side sort + paginated early-stop).""" api = HfApi(token=token) data: Dict[str, List[Any]] = {"models": [], "datasets": [], "spaces": []} try: data["models"] = _collect_2025_sorted_desc( api.list_models(author=username, full=True, sort="createdAt", direction=-1) ) except Exception: data["models"] = [] try: data["datasets"] = _collect_2025_sorted_desc( api.list_datasets(author=username, full=True, sort="createdAt", direction=-1) ) except Exception: data["datasets"] = [] # list_spaces full=True isn't supported in some versions; fall back if needed try: data["spaces"] = _collect_2025_sorted_desc( api.list_spaces(author=username, full=True, sort="createdAt", direction=-1) ) except Exception: try: data["spaces"] = _collect_2025_sorted_desc( api.list_spaces(author=username, sort="createdAt", direction=-1) ) except Exception: data["spaces"] = [] return data def _normalize_task_tag(tag: str) -> Optional[str]: t = (tag or "").strip() if not t: return None for prefix in ("task_categories:", "task_ids:", "pipeline_tag:"): if t.startswith(prefix): t = t[len(prefix):].strip() t = t.strip().lower() return t or None def _suggested_nickname_for_task(task: Optional[str]) -> Optional[str]: if not task: return None t = task.strip().lower() mapping = { "text-generation": "LLM Whisperer ๐Ÿ—ฃ๏ธ", "image-text-to-text": "VLM Nerd ๐Ÿค“", "text-to-speech": "Fullโ€‘time Yapper ๐Ÿ—ฃ๏ธ", "automatic-speech-recognition": "Subtitle Goblin ๐ŸŽง", "text-to-image": "Diffusion Gremlin ๐ŸŽจ", "image-classification": "Pixel Judge ๐Ÿ‘๏ธ", "token-classification": "NERd Lord ๐Ÿค“", "text-classification": "Opinion Machine ๐Ÿง ", "translation": "Language Juggler ๐Ÿ—บ๏ธ", "summarization": "TL;DR Dealer โœ๏ธ", "image-to-text": "Caption Connoisseur ๐Ÿ–ผ๏ธ", "zero-shot-classification": "Label Wizard ๐Ÿช„", } return mapping.get(t) def infer_task_and_modality(models: List[Any], datasets: List[Any], spaces: List[Any]) -> Tuple[Optional[str], Counter]: """ Returns: (most_common_task, task_counter) - Task is primarily inferred from model `pipeline_tag`, then from task-ish tags on all artifacts. """ model_tasks: List[str] = [] for m in models: pt = _repo_pipeline_tag(m) if pt: model_tasks.append(pt.strip().lower()) tag_tasks: List[str] = [] for obj in (models + datasets + spaces): for tag in _repo_tags(obj): nt = _normalize_task_tag(tag) if nt: tag_tasks.append(nt) counts = Counter(model_tasks if model_tasks else tag_tasks) top_task = counts.most_common(1)[0][0] if counts else None return top_task, counts def infer_most_common_library(models: List[Any]) -> Optional[str]: libs: List[str] = [] for m in models: ln = _repo_library_name(m) if ln: libs.append(ln) if not libs: return None return Counter(libs).most_common(1)[0][0] def _k2_model_candidates() -> List[str]: """ Kimi K2 repo IDs can vary; allow override via env and try a small list. """ env_model = (os.getenv("KIMI_K2_MODEL") or "moonshotai/Kimi-K2-Instruct").strip() candidates = [env_model] # de-dupe while preserving order seen = set() out = [] for c in candidates: if c and c not in seen: out.append(c) seen.add(c) return out def _esc(value: Any) -> str: if value is None: return "" return html_lib.escape(str(value), quote=True) def _profile_username(profile: Any) -> Optional[str]: if profile is None: return None for key in ("username", "preferred_username", "name", "user", "handle"): val = getattr(profile, key, None) if isinstance(val, str) and val.strip(): return val.strip().lstrip("@") data = getattr(profile, "data", None) if isinstance(data, dict): for key in ("username", "preferred_username", "name"): val = data.get(key) if isinstance(val, str) and val.strip(): return val.strip().lstrip("@") for container in ("profile", "user"): blob = data.get(container) if isinstance(blob, dict): val = blob.get("username") or blob.get("preferred_username") or blob.get("name") if isinstance(val, str) and val.strip(): return val.strip().lstrip("@") if isinstance(profile, dict): val = profile.get("username") or profile.get("preferred_username") or profile.get("name") if isinstance(val, str) and val.strip(): return val.strip().lstrip("@") return None def _profile_token(profile: Any) -> Optional[str]: """ Gradio's OAuth payload varies by version. We try common attribute names and `.data` shapes. """ if profile is None: return None for key in ("token", "access_token", "hf_token", "oauth_token", "oauth_access_token"): val = getattr(profile, key, None) if isinstance(val, str) and val.strip(): return val.strip() data = getattr(profile, "data", None) if isinstance(data, dict): for key in ("token", "access_token", "hf_token", "oauth_token", "oauth_access_token"): val = data.get(key) if isinstance(val, str) and val.strip(): return val.strip() # Common nested objects oauth_info = data.get("oauth_info") or data.get("oauth") or data.get("oauthInfo") or {} if isinstance(oauth_info, dict): val = oauth_info.get("access_token") or oauth_info.get("token") if isinstance(val, str) and val.strip(): return val.strip() if isinstance(profile, dict): val = profile.get("token") or profile.get("access_token") if isinstance(val, str) and val.strip(): return val.strip() return None def generate_roast_and_nickname_with_k2( *, username: str, total_artifacts_2025: int, models_2025: int, datasets_2025: int, spaces_2025: int, top_task: Optional[str], ) -> Tuple[Optional[str], Optional[str]]: """ Calls Kimi K2 via Hugging Face Inference Providers (via huggingface_hub InferenceClient). Returns (nickname, roast). If call fails, returns (None, None). """ token = (os.getenv("HF_TOKEN") or "").strip() if not token: return None, None vibe = top_task or "mysterious vibes" above_below = "above" if total_artifacts_2025 > 20 else "below" suggested = _suggested_nickname_for_task(top_task) system = ( "You are a witty, playful roast-comedian. Keep it fun, not cruel. " "No slurs, no hate, no harassment. Avoid profanity. Keep it short." ) user = f""" Create TWO things about this Hugging Face user, based on their 2025 activity stats. User: @{username} Artifacts created in 2025: {total_artifacts_2025} (models={models_2025}, datasets={datasets_2025}, spaces={spaces_2025}) which is {above_below} 20. Top task (pipeline_tag): {top_task or "unknown"} Nickname guidance (examples you SHOULD follow when applicable): - text-generation -> LLM Whisperer ๐Ÿ—ฃ๏ธ - image-text-to-text -> VLM Nerd ๐Ÿค“ - text-to-speech -> Fullโ€‘time Yapper ๐Ÿ—ฃ๏ธ If top task is known and you have a strong matching idea, pick a nickname like the examples. {f'If unsure, you may use this suggested nickname: {suggested}' if suggested else ''} Roast should reference the task and whether they are above/below 20 artifacts. Most common vibe: {vibe} Return ONLY valid JSON with exactly these keys: {{ "nickname": "...", // short, funny, can include 1 emoji "roast": "..." // 1-2 sentences max, playful, no bullying }} """.strip() client = InferenceClient(model="moonshotai/Kimi-K2-Instruct", token=token) resp = client.chat.completions.create( model="moonshotai/Kimi-K2-Instruct", messages=[ {"role": "system", "content": system}, {"role": "user", "content": user}, ], max_tokens=180, temperature=0.8, ) content = (resp.choices[0].message.content or "").strip() payload = json.loads(content) nickname = payload.get("nickname") roast = payload.get("roast") nickname_out = nickname.strip() if isinstance(nickname, str) else None roast_out = roast.strip() if isinstance(roast, str) else None return nickname_out, roast_out def generate_wrapped_report(profile: gr.OAuthProfile) -> str: """Generate the HF Wrapped 2025 report""" username = _profile_username(profile) or "unknown" token = _profile_token(profile) # Fetch 2025 data (API-side sort + early stop) user_data_2025 = fetch_user_data_2025(username, token) models_2025 = user_data_2025["models"] datasets_2025 = user_data_2025["datasets"] spaces_2025 = user_data_2025["spaces"] most_liked_model = max(models_2025, key=_repo_likes) if models_2025 else None most_liked_dataset = max(datasets_2025, key=_repo_likes) if datasets_2025 else None most_liked_space = max(spaces_2025, key=_repo_likes) if spaces_2025 else None total_likes = sum(_repo_likes(x) for x in (models_2025 + datasets_2025 + spaces_2025)) top_task, _task_counts = infer_task_and_modality(models_2025, datasets_2025, spaces_2025) top_library = infer_most_common_library(models_2025) total_artifacts_2025 = len(models_2025) + len(datasets_2025) + len(spaces_2025) nickname, roast = generate_roast_and_nickname_with_k2( username=username, total_artifacts_2025=total_artifacts_2025, models_2025=len(models_2025), datasets_2025=len(datasets_2025), spaces_2025=len(spaces_2025), top_task=top_task, ) # New 2025 engagement stats likes_left_2025 = fetch_likes_left_2025(username, token) # Inline icons (local assets) like_icon = _asset_data_uri("like_logo.png") likes_received_icon = _asset_data_uri("likes_received.png") model_icon = _asset_data_uri("model_logo.png") dataset_icon = _asset_data_uri("dataset_logo.png") spaces_icon = _asset_data_uri("spaces_logo.png") vibe_icon = _asset_data_uri("vibe_logo.gif") # Create HTML report html = f"""

Your 2025 Hugging Face Wrapped

@{username}

Vibe Your Signature Vibe

{f'
You are a {_esc(nickname)}
' if nickname else ''} {f'
You nailed this task: {_esc(top_task)}
' if top_task else ''}
You shipped {total_artifacts_2025} artifacts this year!
{f'
You loved {_esc(top_library)} library the most ๐Ÿ’›
' if top_library else ''}
{f'
{_esc(roast)}
' if roast else '
Couldnโ€™t generate a roast (missing token or Kimi K2 not reachable).
'}
Models
{len(models_2025)}
Models
Datasets
{len(datasets_2025)}
Datasets
Spaces
{len(spaces_2025)}
Spaces
Likes given
{likes_left_2025}
Likes Given
Likes received
{total_likes}
Likes Received

Most Liked Model

{f'''
๐Ÿค–{_repo_id(most_liked_model)}
''' if most_liked_model else '
No models yet
'}

Most Liked Dataset

{f'''
๐Ÿ“Š{_repo_id(most_liked_dataset)}
''' if most_liked_dataset else '
No datasets yet
'}

Most Liked Space

{f'''
๐Ÿš€{_repo_id(most_liked_space)}
''' if most_liked_space else '
No spaces yet
'}
""" return html def show_login_message(): """Show message for non-logged-in users""" return """

๐ŸŽ‰ Welcome to HF Wrapped! ๐ŸŽ‰

Please log in with your Hugging Face account to see your personalized report!

Click the "Sign in with Hugging Face" button above ๐Ÿ‘†

""" # Create Gradio interface with gr.Blocks(theme=gr.themes.Soft(), css=""" .gradio-container { background: linear-gradient(135deg, #FFF4D6 0%, #FFE6B8 50%, #FFF9E6 100%); } /* Force readable hero text even when HF host page is in dark mode */ .hf-hero, .hf-hero * { color: #111 !important; } """) as demo: gr.HTML("""

๐ŸŽ‰ HF Wrapped 2025 ๐ŸŽ‰

Discover your Hugging Face journey this year!

""") with gr.Row(): with gr.Column(): login_button = gr.LoginButton() output = gr.HTML(value=show_login_message()) def _render(profile_obj: Optional[gr.OAuthProfile] = None): # In Gradio versions that support OAuth, `profile_obj` is injected after login. return generate_wrapped_report(profile_obj) if profile_obj is not None else show_login_message() # On load show the login message (and in some Gradio versions, this also receives the injected profile) demo.load(fn=_render, inputs=None, outputs=output) # After login completes, clicking the login button will trigger a rerender. # Older Gradio treats LoginButton as a button (click event), not a value component (change event). if hasattr(login_button, "click"): login_button.click(fn=_render, inputs=None, outputs=output) if __name__ == "__main__": demo.launch()