Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import json | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel, PeftConfig | |
| import os | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import uvicorn | |
| import openai | |
| # --- Configuration --- | |
| BASE_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct" | |
| LORA_MODEL_ID = "LlamaFactoryAI/Llama-3.1-8B-Instruct-cv-job-description-matching" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| # Check for OpenAI API key | |
| OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") | |
| # --- FastAPI App --- | |
| app = FastAPI() | |
| # --- Pydantic Model for API --- | |
| class MatchRequest(BaseModel): | |
| cv_text: str | |
| job_description: str | |
| # --- Model and Tokenizer --- | |
| model = None | |
| tokenizer = None | |
| openai_client = None | |
| def load_model(): | |
| global model, tokenizer, openai_client | |
| if model is not None: | |
| return | |
| print("Loading base model...") | |
| # Load base model | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL_ID, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| token=HF_TOKEN | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print("Loading LoRA adapter...") | |
| peft_config = PeftConfig.from_pretrained( | |
| LORA_MODEL_ID, | |
| task_type="CAUSAL_LM", | |
| token=HF_TOKEN | |
| ) | |
| model_with_lora = PeftModel.from_pretrained( | |
| base_model, | |
| LORA_MODEL_ID, | |
| config=peft_config, | |
| token=HF_TOKEN | |
| ) | |
| # Merge the LoRA adapter into the base model for a single, faster inference model | |
| model = model_with_lora.merge_and_unload() | |
| model.eval() | |
| print("Model fully loaded!") | |
| # Initialize OpenAI client if key is available | |
| if OPENAI_API_KEY: | |
| openai_client = openai.OpenAI(api_key=OPENAI_API_KEY) | |
| print("OpenAI client initialized.") | |
| else: | |
| print("OPENAI_API_KEY not found. Skipping human-readable summary generation.") | |
| # --- NEW: Function to summarize input text (CV or JD) --- | |
| def get_summary(text: str, role: str) -> str: | |
| """Uses OpenAI to create a concise summary of the CV or Job Description.""" | |
| if not openai_client: | |
| # Fallback: return original text if API is not available | |
| return text | |
| if role == 'CV': | |
| prompt_instruction = "Extract the key professional skills, technologies, job roles, and quantifiable achievements. Exclude personal contact information, filler text, or overly verbose descriptions. Keep the summary under 300 words." | |
| elif role == 'JD': | |
| prompt_instruction = "Extract the core required skills, experience levels, technological stack, and main responsibilities for this role. Exclude recruiting boilerplate or company mission statements. Keep the summary under 200 words." | |
| else: | |
| prompt_instruction = "Summarize the key contents." | |
| prompt = f""" | |
| {prompt_instruction} | |
| Original Text: | |
| --- | |
| {text} | |
| --- | |
| Concise Summary: | |
| """ | |
| try: | |
| completion = openai_client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.1, # Very low temp for fact extraction | |
| ) | |
| return completion.choices[0].message.content.strip() | |
| except Exception as e: | |
| print(f"OpenAI summarization for {role} failed: {e}. Using original text.") | |
| return text | |
| def get_human_readable_summary(json_data: dict) -> str: | |
| """Uses OpenAI to convert structured JSON data into human-readable text.""" | |
| if not openai_client: | |
| return "OpenAI API not available. Set OPENAI_API_KEY to enable summarization." | |
| # Defining a prompt to achieve the conversion to human-readable text | |
| prompt = f""" | |
| You are an expert technical writer. Take the following structured JSON data | |
| representing a CV-Job Description match analysis and convert it into a smooth, | |
| professional, human-readable summary. Focus on the score, key findings in | |
| 'matching_analysis' and 'description' and format the 'recommendation' | |
| steps clearly at the end. Do not output any JSON or code formatting. | |
| JSON Data: | |
| --- | |
| {json.dumps(json_data, indent=2)} | |
| --- | |
| Human-Readable Summary: | |
| """ | |
| try: | |
| completion = openai_client.chat.completions.create( | |
| model="gpt-4o-mini", # A fast and capable model for this task | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.2, # Low temperature for reliable summarization | |
| ) | |
| return completion.choices[0].message.content.strip() | |
| except Exception as e: | |
| print(f"OpenAI API call failed: {e}") | |
| return f"OpenAI summarization failed due to an API error: {e}" | |
| # --- Core Inference --- | |
| def match_cv_jd(cv_text: str, job_description: str) -> dict: | |
| """ | |
| Performs the CV-JD matching using the merged Llama 3.1 model and post-processes | |
| the result using OpenAI if available. | |
| """ | |
| # Ensure model is loaded (important for environments where Gradio might reload) | |
| if model is None or tokenizer is None: | |
| load_model() | |
| # --- NEW: Summarization Step --- | |
| print("Summarizing CV and Job Description...") | |
| summarized_cv = get_summary(cv_text, 'CV') | |
| summarized_jd = get_summary(job_description, 'JD') | |
| # System prompt guides the model's behavior and output format (JSON structure) | |
| system_prompt = """You are a world-class CV and Job Description matching AI. Output a structured JSON with fields:- matching_analysis- description- score (0-100)- recommendation (2 concrete steps)Output MUST be valid JSON and contain ONLY the JSON object, nothing else.""" | |
| # User prompt now uses the summarized text | |
| user_prompt = f"""CV (Summarized): | |
| --- | |
| {summarized_cv} | |
| --- | |
| Job Description (Summarized): | |
| --- | |
| {summarized_jd} | |
| ---""" | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ] | |
| # Prepare inputs for the model | |
| inputs = tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| # Generate the response | |
| outputs = model.generate( | |
| inputs, | |
| max_new_tokens=1024, | |
| temperature=0.01, # Keep temperature low for structured/deterministic output | |
| top_p=0.9, | |
| do_sample=True, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode the response and clean up | |
| response_text = tokenizer.decode( | |
| outputs[0][inputs.shape[-1]:], | |
| skip_special_tokens=True | |
| ).strip() | |
| # Attempt to parse the JSON output | |
| try: | |
| # Robustly find the start and end of the JSON object in the response | |
| start = response_text.find("{") | |
| end = response_text.rfind("}") | |
| json_str = response_text[start:end+1] | |
| parsed = json.loads(json_str) | |
| # Post-process with OpenAI to add human_readable summary | |
| if OPENAI_API_KEY and openai_client: | |
| human_readable_text = get_human_readable_summary(parsed) | |
| # Add the summary to the JSON output | |
| parsed["human_readable"] = human_readable_text | |
| return parsed | |
| except Exception as e: | |
| # Fallback for poorly formed output | |
| print(f"JSON Parsing failed: {e}") | |
| return {"raw": response_text, "error": "Failed to parse JSON output from model."} | |
| # --- FastAPI Endpoint --- | |
| async def api_predict(request: MatchRequest): | |
| """Direct REST API endpoint for CV-JD matching""" | |
| result = match_cv_jd(request.cv_text, request.job_description) | |
| return result | |
| async def health_check(): | |
| return {"status": "ok", "model_loaded": model is not None} | |
| # --- Example Data --- | |
| EXAMPLE_CV = """**John Doe** | |
| Email: [email protected] | |
| **Summary** | |
| Experienced software engineer with 5 years in Python and backend development, specializing in building high-throughput microservices using FastAPI and Docker. | |
| **Experience** | |
| * Senior Software Engineer at TechCorp (2020-Present): Led migration of monolithic app to microservices, reducing latency by 40%. | |
| """ | |
| EXAMPLE_JD = """**Job Title: Senior Backend Engineer** | |
| Responsibilities: Develop scalable, high-performance backend services using Python, ideally with experience in the FastAPI framework. Must have 4+ years of professional experience and familiarity with containerization (Docker/Kubernetes).""" | |
| # --- Gradio Interface --- | |
| with gr.Blocks(title="CV & Job Matcher") as demo: | |
| gr.Markdown("# π€ CV & Job Description Matcher") | |
| gr.Markdown("Enter a CV and a Job Description to get an automated match score and analysis using a fine-tuned Llama 3.1 model.") | |
| with gr.Row(variant="panel", equal_height=True): | |
| cv_input = gr.Textbox( | |
| label="1. Candidate CV/Resume (Text)", | |
| lines=15, | |
| value=EXAMPLE_CV, | |
| interactive=True, | |
| container=False | |
| ) | |
| jd_input = gr.Textbox( | |
| label="2. Job Description (JD) Text", | |
| lines=15, | |
| value=EXAMPLE_JD, | |
| interactive=True, | |
| container=False | |
| ) | |
| analyze_btn = gr.Button("π Analyze Match", variant="primary", scale=0) | |
| # The output component for the structured JSON response | |
| output_display = gr.JSON(label="3. Match Output (Structured JSON Response)", scale=1) | |
| # Internal UI Click & External API Access (combined for compatibility) | |
| analyze_btn.click( | |
| fn=match_cv_jd, | |
| inputs=[cv_input, jd_input], | |
| outputs=output_display, | |
| api_name="predict" | |
| ) | |
| # --- Mount Gradio on FastAPI --- | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| # --- Launch --- | |
| if __name__ == "__main__": | |
| load_model() | |
| # 1. Determine the primary port from the environment, defaulting to 7860 | |
| primary_port = int(os.environ.get("PORT", 7860)) | |
| # Define the standard Gradio port as the fallback for robustness | |
| # Since the logs show PORT=7861 is failing, we try 7860 as the fallback, and vice-versa. | |
| fallback_port = 7860 if primary_port != 7860 else 7861 | |
| # Attempt to run on the primary port | |
| try: | |
| print(f"Attempting to run Uvicorn on primary port {primary_port}") | |
| uvicorn.run(app, host="0.0.0.0", port=primary_port) | |
| except OSError as e: | |
| # Check for "Address already in use" (Error code 98) | |
| if getattr(e, 'errno', None) == 98: | |
| print(f"ERROR: Primary port {primary_port} is already in use. Trying fallback port {fallback_port}...") | |
| # Try running on the fallback port | |
| try: | |
| uvicorn.run(app, host="0.0.0.0", port=fallback_port) | |
| except OSError as fallback_e: | |
| # If the fallback fails, we must terminate. | |
| print(f"FATAL ERROR: Fallback port {fallback_port} also failed to bind. Process terminating.") | |
| raise fallback_e | |
| else: | |
| # Re-raise other unexpected errors | |
| raise e |