COS30082

Sleeping

App Files Files Community

FrAnKu34t23 commited on 26 days ago

Commit

0f0c715

verified ·

1 Parent(s): b94cd58

Update app.py

Browse files

Files changed (1) hide show

app.py +180 -77

app.py CHANGED Viewed

@@ -1,92 +1,210 @@
 import gradio as gr
-# --- 1. Import Existing Baselines ---
-# Wrapped in try-except so the app doesn't crash if files are temporarily missing
 try:
     from baseline.baseline_convnext import predict_convnext
 except ImportError:
-    def predict_convnext(image): return {"Error": "ConvNeXt module missing"}
 try:
     from baseline.baseline_infer import predict_baseline
 except ImportError:
-    def predict_baseline(image): return {"Error": "Baseline module missing"}
-# --- 2. Import NEW SPA Approach ---
-# This imports the function from: new_approach/spa_ensemble.py
 try:
     from new_approach.spa_ensemble import predict_spa
 except ImportError:
-    def predict_spa(image): return {"Error": "SPA module missing. Check 'new_approach' folder."}
-# --- Placeholder models (for future extensions) ---
-def predict_placeholder_2(image):
-    if image is None:
-        return "Please upload an image."
-    return "Model 4 is not available yet. Please check back later."
-# --- Main Prediction Logic ---
 def predict(model_choice, image):
-    if image is None: return None
-    if model_choice == "Herbarium Species Classifier":
-        # Friend's ConvNeXt mix-stream CNN baseline
-        return predict_convnext(image)
     elif model_choice == "Baseline (DINOv2 + LogReg)":
-        # Original baseline
-        return predict_baseline(image)
     elif model_choice == "SPA Ensemble (New Approach)":
-        # YOUR NEW CODE: DINOv2 + BioCLIP + Handcrafted + SPA
-        return predict_spa(image)
     elif model_choice == "Future Model 2 (Placeholder)":
-        return predict_placeholder_2(image)
     else:
-        return "Invalid model selected."
 # --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft(), css="style.css") as demo:
     with gr.Column(elem_id="app-wrapper"):
-        # Header
         gr.Markdown(
             """
             <div id="app-header">
               <h1>🌿 Plant Species Classification</h1>
-              <h3>AML Group Project – PsychicFireSong</h3>
-            </div>
-            """,
-            elem_id="app-header",
-        )
-        # Badges row
-        gr.Markdown(
-            """
-            <div id="badge-row">
-              <span class="badge">Herbarium + Field images</span>
-              <span class="badge">ConvNeXtV2</span>
-              <span class="badge">SPA Ensemble</span>
             </div>
-            """,
-            elem_id="badge-row",
         )
-        # Main card
         with gr.Row(elem_id="main-card"):
-            # Left side: model + image
-            with gr.Column(scale=1, elem_id="left-panel"):
                 model_selector = gr.Dropdown(
                     label="Select model",
                     choices=[
-                        "Herbarium Species Classifier",
                         "Baseline (DINOv2 + LogReg)",
                         "SPA Ensemble (New Approach)",
                         "Future Model 2 (Placeholder)",
                     ],
-                    value="SPA Ensemble (New Approach)", # Default to your new model
                 )
                 gr.Markdown(
@@ -96,43 +214,28 @@ with gr.Blocks(theme=gr.themes.Soft(), css="style.css") as demo:
                       <b>Baseline</b> – Simple DINOv2 + LogReg.<br>
                       <b>SPA Ensemble</b> – <i>(New)</i> DINOv2 + BioCLIP-2 + Handcrafted features.
                     </div>
-                    """,
-                    elem_id="model-help",
-                )
-                image_input = gr.Image(
-                    type="pil",
-                    label="Upload plant image",
                 )
                 submit_button = gr.Button("Classify 🌱", variant="primary")
-            # Right side: predictions
-            with gr.Column(scale=1, elem_id="right-panel"):
-                output_label = gr.Label(
-                    label="Top 5 predictions",
-                    num_top_classes=5,
                 )
         submit_button.click(
             fn=predict,
             inputs=[model_selector, image_input],
-            outputs=output_label,
-        )
-        # Optional examples
-        gr.Examples(
-            examples=[],
-            inputs=image_input,
-            outputs=output_label,
-            fn=lambda img: predict("SPA Ensemble (New Approach)", img),
-            cache_examples=False,
         )
-        gr.Markdown(
-            "Built for the AML course – compare CNN vs. DINOv2 feature-extractor baselines.",
-            elem_id="footer",
-        )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+import os
+import re
+import pickle
+import torch
+from torchvision import transforms
+from huggingface_hub import list_repo_files, hf_hub_download
+# --- CONFIGURATION ---
+# 1. Dataset Config (Where the images are stored)
+# This is used to generate the URLs for the displayed images
+DATASET_ID = "FrAnKu34t23/Herbarium_Field"
+DATASET_URL_BASE = f"https://huggingface.co/datasets/{DATASET_ID}/resolve/main/train/herbarium/"
+# 2. Model Repo Config (Where the herbarium_index.pkl is stored)
+# This is used to download the Visual Search Index
+MODEL_REPO_ID = "FrAnKu34t23/ensemble_models_plant"
+INDEX_FILENAME = "herbarium_index.pkl"
+# Global Variables
+REFERENCE_IMAGE_MAP = {} # Fallback
+VECTOR_INDEX = None      # Smart Search Index
+FEATURE_EXTRACTOR = None # DINOv2 model for retrieval
+TRANSFORM = None         # Image transforms
+# --- SETUP: Load Resources ---
+def load_resources():
+    global VECTOR_INDEX, FEATURE_EXTRACTOR, TRANSFORM, REFERENCE_IMAGE_MAP
+    print("🚀 App starting... Initializing resources.")
+    # 1. Download and Load Visual Search Index from Model Hub
+    try:
+        print(f"⬇️ Downloading {INDEX_FILENAME} from {MODEL_REPO_ID}...")
+        # This downloads the file to a local cache and returns the path
+        index_path = hf_hub_download(
+            repo_id=MODEL_REPO_ID,
+            filename=INDEX_FILENAME,
+            repo_type="model"
+        )
+        print(f"✅ Downloaded to {index_path}. Loading pickle...")
+        with open(index_path, "rb") as f:
+            VECTOR_INDEX = pickle.load(f)
+        # Load DINOv2 (Retrieval Engine)
+        print("⬇️ Loading DINOv2 (Retrieval Engine)...")
+        FEATURE_EXTRACTOR = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
+        FEATURE_EXTRACTOR.eval()
+        TRANSFORM = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ])
+        print("🚀 Smart Search Ready!")
+    except Exception as e:
+        print(f"⚠️ Smart Search initialization failed: {e}")
+        print("ℹ️ Ensure 'herbarium_index.pkl' is uploaded to 'FrAnKu34t23/ensemble_models_plant'")
+        VECTOR_INDEX = None
+    # 2. Build Fallback Map
+    # We do this as a backup in case the specific class isn't in the index
+    build_fallback_map()
+def build_fallback_map():
+    global REFERENCE_IMAGE_MAP
+    try:
+        print(f"🔄 Scanning dataset {DATASET_ID} for fallback map...")
+        # Note: If dataset is private, add token=os.environ.get("HF_TOKEN") inside list_repo_files
+        all_files = list_repo_files(repo_id=DATASET_ID, repo_type="dataset")
+        # Look for images in: train/herbarium/{class_id}/{filename}
+        image_files = [f for f in all_files if f.startswith("train/herbarium/") and f.lower().endswith(('.jpg', '.png'))]
+        for file_path in image_files:
+            parts = file_path.split("/")
+            # Expected parts: ['train', 'herbarium', 'CLASS_ID', 'FILENAME']
+            if len(parts) >= 4:
+                class_id = parts[2]
+                filename = parts[3]
+                if class_id not in REFERENCE_IMAGE_MAP:
+                    REFERENCE_IMAGE_MAP[class_id] = filename
+        print(f"✅ Fallback map built for {len(REFERENCE_IMAGE_MAP)} classes.")
+    except Exception as e:
+        print(f"⚠️ Error scanning dataset: {e}")
+# Load resources on startup
+load_resources()
+# --- Logic: Visual Similarity Search ---
+def find_most_similar_herbarium_sheet(class_prediction, input_pil_image):
+    """
+    Finds the image in the predicted class folder that looks most similar to the input.
+    """
+    if not class_prediction: return None
+    # Extract Class ID
+    match = re.search(r'\((\d+)\)', class_prediction)
+    if not match: return None
+    class_id = match.group(1)
+    # Strategy A: Visual Similarity (Vectors)
+    if VECTOR_INDEX and FEATURE_EXTRACTOR and input_pil_image and class_id in VECTOR_INDEX:
+        try:
+            # Create embedding for input image
+            img_tensor = TRANSFORM(input_pil_image).unsqueeze(0)
+            with torch.no_grad():
+                input_vec = FEATURE_EXTRACTOR(img_tensor)
+                input_vec = torch.nn.functional.normalize(input_vec, p=2, dim=1)
+            # Compare against pre-calculated vectors in the index
+            candidates = VECTOR_INDEX[class_id]
+            best_score = -1.0
+            best_filename = None
+            for item in candidates:
+                # Cosine similarity
+                score = torch.mm(input_vec, item["vector"].T).item()
+                if score > best_score:
+                    best_score = score
+                    best_filename = item["filename"]
+            if best_filename:
+                return f"{DATASET_URL_BASE}{class_id}/{best_filename}"
+        except Exception as e:
+            print(f"⚠️ Search failed: {e}")
+    # Strategy B: First Available Image (Fallback)
+    filename = REFERENCE_IMAGE_MAP.get(class_id)
+    if filename:
+        return f"{DATASET_URL_BASE}{class_id}/{filename}"
+    return None
+# --- Import User Models ---
+# Safely import your existing model files
 try:
     from baseline.baseline_convnext import predict_convnext
 except ImportError:
+    def predict_convnext(image): return {"Error: ConvNeXt missing": 0.0}
 try:
     from baseline.baseline_infer import predict_baseline
 except ImportError:
+    def predict_baseline(image): return {"Error: Baseline missing": 0.0}
 try:
     from new_approach.spa_ensemble import predict_spa
 except ImportError:
+    def predict_spa(image): return {"Error: SPA missing": 0.0}
+def predict_placeholder_2(image): return {"Model 4 Not Available": 0.0}
+# --- Main App Logic ---
 def predict(model_choice, image):
+    if image is None: return None, None
+    # STEP 1: CLASSIFICATION
+    predictions = {}
+    if model_choice == "Herbarium Species Classifier (ConvNeXT)":
+        predictions = predict_convnext(image)
     elif model_choice == "Baseline (DINOv2 + LogReg)":
+        predictions = predict_baseline(image)
     elif model_choice == "SPA Ensemble (New Approach)":
+        predictions = predict_spa(image)
     elif model_choice == "Future Model 2 (Placeholder)":
+        predictions = predict_placeholder_2(image)
     else:
+        predictions = {"Invalid model": 0.0}
+    # STEP 2: RETRIEVAL
+    reference_image_url = None
+    if isinstance(predictions, dict) and predictions:
+        try:
+            top_class = max(predictions, key=predictions.get)
+            if "Error" not in top_class and "Please" not in top_class:
+                reference_image_url = find_most_similar_herbarium_sheet(top_class, image)
+        except Exception as e:
+            print(f"Error in retrieval: {e}")
+    return predictions, reference_image_url
 # --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft(), css="style.css") as demo:
     with gr.Column(elem_id="app-wrapper"):
         gr.Markdown(
             """
             <div id="app-header">
               <h1>🌿 Plant Species Classification</h1>
+              <h3>AML Group Project – Group 8</h3>
             </div>
+            """, elem_id="app-header"
         )
         with gr.Row(elem_id="main-card"):
+            with gr.Column(scale=1):
                 model_selector = gr.Dropdown(
                     label="Select model",
                     choices=[
+                        "Herbarium Species Classifier (ConvNeXT)",
                         "Baseline (DINOv2 + LogReg)",
                         "SPA Ensemble (New Approach)",
                         "Future Model 2 (Placeholder)",
                     ],
+                    value="SPA Ensemble (New Approach)",
                 )
                 gr.Markdown(
                       <b>Baseline</b> – Simple DINOv2 + LogReg.<br>
                       <b>SPA Ensemble</b> – <i>(New)</i> DINOv2 + BioCLIP-2 + Handcrafted features.
                     </div>
+                    """, elem_id="model-help"
                 )
+                image_input = gr.Image(type="pil", label="Upload plant image")
                 submit_button = gr.Button("Classify 🌱", variant="primary")
+            with gr.Column(scale=1):
+                output_label = gr.Label(label="Top 5 predictions", num_top_classes=5)
+                herbarium_output = gr.Image(
+                    label="Matched Herbarium Specimen (Visual Reference)",
+                    show_label=True,
+                    interactive=False,
+                    height=300
                 )
         submit_button.click(
             fn=predict,
             inputs=[model_selector, image_input],
+            outputs=[output_label, herbarium_output],
         )
+        gr.Markdown("Built for the AML course – Group 8", elem_id="footer")
 if __name__ == "__main__":
     demo.launch()