Spaces:

mployd-engineering
/

alpha

Sleeping

App Files Files Community

mployd-engineering commited on 8 days ago

Commit

eee7aae

verified ·

1 Parent(s): a24dd2a

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -12

app.py CHANGED Viewed

@@ -7,13 +7,13 @@ import os
 from fastapi import FastAPI
 from pydantic import BaseModel
 import uvicorn
-import openai # Import the OpenAI library
 # --- Configuration ---
 BASE_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
 LORA_MODEL_ID = "LlamaFactoryAI/Llama-3.1-8B-Instruct-cv-job-description-matching"
 HF_TOKEN = os.environ.get("HF_TOKEN")
-# NEW: Check for OpenAI API key
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 # --- FastAPI App ---
@@ -75,6 +75,45 @@ def load_model():
         print("OPENAI_API_KEY not found. Skipping human-readable summary generation.")
 def get_human_readable_summary(json_data: dict) -> str:
     """Uses OpenAI to convert structured JSON data into human-readable text."""
     if not openai_client:
@@ -84,10 +123,8 @@ def get_human_readable_summary(json_data: dict) -> str:
     prompt = f"""
     You are an expert technical writer. Take the following structured JSON data
     representing a CV-Job Description match analysis and convert it into a smooth,
-    professional, human-readable summary. Do not mention score, focus on the followings:
-        - key findings,  in 'matching_analysis'
-        - analyse fit with strenght and weakness
-        - recommendation for overal fit and what they should do
     steps clearly at the end. Do not output any JSON or code formatting.
     JSON Data:
@@ -121,18 +158,23 @@ def match_cv_jd(cv_text: str, job_description: str) -> dict:
     # Ensure model is loaded (important for environments where Gradio might reload)
     if model is None or tokenizer is None:
         load_model()
     # System prompt guides the model's behavior and output format (JSON structure)
     system_prompt = """You are a world-class CV and Job Description matching AI. Output a structured JSON with fields:- matching_analysis- description- score (0-100)- recommendation (2 concrete steps)Output MUST be valid JSON and contain ONLY the JSON object, nothing else."""
-    # User prompt contains the input data
-    user_prompt = f"""CV:
 ---
-{cv_text}
 ---
-Job Description:
 ---
-{job_description}
 ---"""
     messages = [
@@ -172,7 +214,7 @@ Job Description:
         parsed = json.loads(json_str)
-        # --- NEW: Post-process with OpenAI to add human_readable summary ---
         if OPENAI_API_KEY and openai_client:
             human_readable_text = get_human_readable_summary(parsed)
             # Add the summary to the JSON output

 from fastapi import FastAPI
 from pydantic import BaseModel
 import uvicorn
+import openai
 # --- Configuration ---
 BASE_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
 LORA_MODEL_ID = "LlamaFactoryAI/Llama-3.1-8B-Instruct-cv-job-description-matching"
 HF_TOKEN = os.environ.get("HF_TOKEN")
+# Check for OpenAI API key
 OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 # --- FastAPI App ---
         print("OPENAI_API_KEY not found. Skipping human-readable summary generation.")
+# --- NEW: Function to summarize input text (CV or JD) ---
+def get_summary(text: str, role: str) -> str:
+    """Uses OpenAI to create a concise summary of the CV or Job Description."""
+    if not openai_client:
+        # Fallback: return original text if API is not available
+        return text
+    if role == 'CV':
+        prompt_instruction = "Extract the key professional skills, technologies, job roles, and quantifiable achievements. Exclude personal contact information, filler text, or overly verbose descriptions. Keep the summary under 300 words."
+    elif role == 'JD':
+        prompt_instruction = "Extract the core required skills, experience levels, technological stack, and main responsibilities for this role. Exclude recruiting boilerplate or company mission statements. Keep the summary under 200 words."
+    else:
+        prompt_instruction = "Summarize the key contents."
+    prompt = f"""
+    {prompt_instruction}
+    Original Text:
+    ---
+    {text}
+    ---
+    Concise Summary:
+    """
+    try:
+        completion = openai_client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.1, # Very low temp for fact extraction
+        )
+        return completion.choices[0].message.content.strip()
+    except Exception as e:
+        print(f"OpenAI summarization for {role} failed: {e}. Using original text.")
+        return text
 def get_human_readable_summary(json_data: dict) -> str:
     """Uses OpenAI to convert structured JSON data into human-readable text."""
     if not openai_client:
     prompt = f"""
     You are an expert technical writer. Take the following structured JSON data
     representing a CV-Job Description match analysis and convert it into a smooth,
+    professional, human-readable summary. Focus on the score, key findings in
+    'matching_analysis' and 'description', and format the 'recommendation'
     steps clearly at the end. Do not output any JSON or code formatting.
     JSON Data:
     # Ensure model is loaded (important for environments where Gradio might reload)
     if model is None or tokenizer is None:
         load_model()
+    # --- NEW: Summarization Step ---
+    print("Summarizing CV and Job Description...")
+    summarized_cv = get_summary(cv_text, 'CV')
+    summarized_jd = get_summary(job_description, 'JD')
     # System prompt guides the model's behavior and output format (JSON structure)
     system_prompt = """You are a world-class CV and Job Description matching AI. Output a structured JSON with fields:- matching_analysis- description- score (0-100)- recommendation (2 concrete steps)Output MUST be valid JSON and contain ONLY the JSON object, nothing else."""
+    # User prompt now uses the summarized text
+    user_prompt = f"""CV (Summarized):
 ---
+{summarized_cv}
 ---
+Job Description (Summarized):
 ---
+{summarized_jd}
 ---"""
     messages = [
         parsed = json.loads(json_str)
+        # Post-process with OpenAI to add human_readable summary
         if OPENAI_API_KEY and openai_client:
             human_readable_text = get_human_readable_summary(parsed)
             # Add the summary to the JSON output