Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import json | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from peft import PeftModel, PeftConfig | |
| import os | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import uvicorn | |
| import openai | |
| import socket | |
| import time # Added for retry mechanism | |
| # --- Configuration --- | |
| BASE_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct" | |
| LORA_MODEL_ID = "LlamaFactoryAI/Llama-3.1-8B-Instruct-cv-job-description-matching" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| # Check for OpenAI API key | |
| OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") | |
| # --- FastAPI App --- | |
| app = FastAPI() | |
| # --- Pydantic Model for API --- | |
| class MatchRequest(BaseModel): | |
| cv_text: str | |
| job_description: str | |
| # --- Model and Tokenizer --- | |
| model = None | |
| tokenizer = None | |
| openai_client = None | |
| def load_model(): | |
| global model, tokenizer, openai_client | |
| if model is not None: | |
| return | |
| print("Loading base model...") | |
| # Load base model | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL_ID, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| token=HF_TOKEN | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| print("Loading LoRA adapter...") | |
| peft_config = PeftConfig.from_pretrained( | |
| LORA_MODEL_ID, | |
| task_type="CAUSAL_LM", | |
| token=HF_TOKEN | |
| ) | |
| model_with_lora = PeftModel.from_pretrained( | |
| base_model, | |
| LORA_MODEL_ID, | |
| config=peft_config, | |
| token=HF_TOKEN | |
| ) | |
| # Merge the LoRA adapter into the base model for a single, faster inference model | |
| model = model_with_lora.merge_and_unload() | |
| model.eval() | |
| print("Model fully loaded!") | |
| # Initialize OpenAI client if key is available | |
| if OPENAI_API_KEY: | |
| openai_client = openai.OpenAI(api_key=OPENAI_API_KEY) | |
| print("OpenAI client initialized.") | |
| else: | |
| print("OPENAI_API_KEY not found. Skipping human-readable summary generation.") | |
| # --- NEW: Function to summarize input text (CV or JD) --- | |
| def get_summary(text: str, role: str) -> str: | |
| """Uses OpenAI to create a concise summary of the CV or Job Description.""" | |
| if not openai_client: | |
| # Fallback: return original text if API is not available | |
| return text | |
| if role == 'CV': | |
| prompt_instruction = "Extract the key professional skills, technologies, job roles, and quantifiable achievements. Exclude personal contact information, filler text, or overly verbose descriptions. Keep the summary under 300 words." | |
| elif role == 'JD': | |
| prompt_instruction = "Extract the core required skills, experience levels, technological stack, and main responsibilities for this role. Exclude recruiting boilerplate or company mission statements. Keep the summary under 200 words." | |
| else: | |
| prompt_instruction = "Summarize the key contents." | |
| prompt = f""" | |
| {prompt_instruction} | |
| Original Text: | |
| --- | |
| {text} | |
| --- | |
| Concise Summary: | |
| """ | |
| try: | |
| completion = openai_client.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.1, # Very low temp for fact extraction | |
| ) | |
| return completion.choices[0].message.content.strip() | |
| except Exception as e: | |
| print(f"OpenAI summarization for {role} failed: {e}. Using original text.") | |
| return text | |
| def get_human_readable_summary(json_data: dict) -> str: | |
| """Uses OpenAI to convert structured JSON data into human-readable text.""" | |
| if not openai_client: | |
| return "OpenAI API not available. Set OPENAI_API_KEY to enable summarization." | |
| # Defining a prompt to achieve the conversion to human-readable text | |
| prompt = f""" | |
| Take the following structured JSON data and analyse the Job Description and the Candidateβs profile. | |
| Your task is to produce a concise, employee-facing match summary using the structure below. | |
| Do not exceed the level of detail shown. | |
| Do not add commentary, risks, gaps, or extra sections. | |
| Keep the tone direct, confident, and written like an expert recruiter. | |
| Structure to follow exactly: | |
| β[Candidate Name] is a [good/great/perfect] match for the [Role Title].β | |
| Company: | |
| Write one short sentence explaining whether the candidateβs current or recent companies increase the likelihood of relevance to the hiring company. | |
| Example style: βHer current and recent companies operate in SaaS environments, which increases relevance to our business.β | |
| Skills & Experience: | |
| List 6β10 short keywords or tags that reflect the most relevant skills and experience for the role. No sentences. No fluff. | |
| Summary: | |
| -Write a single sentence (maximum 300 characters) explaining why the candidate is a strong match for the role. | |
| The summary must be: | |
| - direct, | |
| - confidence-building, | |
| - based on clear overlaps between the JD and the candidate, | |
| - NOT overly detailed. | |
| Rules: | |
| - Keep everything concise. | |
| - Avoid technical explanations, long descriptions, or extra insights. | |
| - Do not include risks, gaps, scores, or any other sections. | |
| - Output only the four required parts above. | |
| Do not output any JSON or code formatting. | |
| JSON Data: | |
| --- | |
| {json.dumps(json_data, indent=2)} | |
| --- | |
| Human-Readable Summary: | |
| """ | |
| try: | |
| completion = openai_client.chat.completions.create( | |
| model="gpt-4o-mini", # A fast and capable model for this task | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.2, # Low temperature for reliable summarization | |
| ) | |
| return completion.choices[0].message.content.strip() | |
| except Exception as e: | |
| print(f"OpenAI API call failed: {e}") | |
| return f"OpenAI summarization failed due to an API error: {e}" | |
| # --- Core Inference --- | |
| def match_cv_jd(cv_text: str, job_description: str) -> dict: | |
| """ | |
| Performs the CV-JD matching using the merged Llama 3.1 model and post-processes | |
| the result using OpenAI if available. | |
| """ | |
| # Ensure model is loaded (important for environments where Gradio might reload) | |
| if model is None or tokenizer is None: | |
| load_model() | |
| # --- NEW: Summarization Step --- | |
| print("Summarizing CV and Job Description...") | |
| summarized_cv = get_summary(cv_text, 'CV') | |
| summarized_jd = get_summary(job_description, 'JD') | |
| # System prompt guides the model's behavior and output format (JSON structure) | |
| system_prompt = """You are a world-class CV and Job Description matching AI. Output a structured JSON with fields:- matching_analysis- description- score (0-100)- recommendation (2 concrete steps)Output MUST be valid JSON and contain ONLY the JSON object, nothing else.""" | |
| # User prompt now uses the summarized text | |
| user_prompt = f"""CV (Summarized): | |
| --- | |
| {summarized_cv} | |
| --- | |
| Job Description (Summarized): | |
| --- | |
| {summarized_jd} | |
| ---""" | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ] | |
| # Prepare inputs for the model | |
| inputs = tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| # Generate the response | |
| outputs = model.generate( | |
| inputs, | |
| max_new_tokens=1024, | |
| temperature=0.01, # Keep temperature low for structured/deterministic output | |
| top_p=0.9, | |
| do_sample=True, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Decode the response and clean up | |
| response_text = tokenizer.decode( | |
| outputs[0][inputs.shape[-1]:], | |
| skip_special_tokens=True | |
| ).strip() | |
| # Attempt to parse the JSON output | |
| try: | |
| # Robustly find the start and end of the JSON object in the response | |
| start = response_text.find("{") | |
| end = response_text.rfind("}") | |
| json_str = response_text[start:end+1] | |
| parsed = json.loads(json_str) | |
| # Post-process with OpenAI to add human_readable summary | |
| if OPENAI_API_KEY and openai_client: | |
| human_readable_text = get_human_readable_summary(parsed) | |
| # Add the summary to the JSON output | |
| parsed["human_readable"] = human_readable_text | |
| return parsed | |
| except Exception as e: | |
| # Fallback for poorly formed output | |
| print(f"JSON Parsing failed: {e}") | |
| return {"raw": response_text, "error": "Failed to parse JSON output from model."} | |
| # --- FastAPI Endpoint --- | |
| async def api_predict(request: MatchRequest): | |
| """Direct REST API endpoint for CV-JD matching""" | |
| result = match_cv_jd(request.cv_text, request.job_description) | |
| return result | |
| async def health_check(): | |
| return {"status": "ok", "model_loaded": model is not None} | |
| # --- Example Data --- | |
| EXAMPLE_CV = """**John Doe** | |
| Email: [email protected] | |
| **Summary** | |
| Experienced software engineer with 5 years in Python and backend development, specializing in building high-throughput microservices using FastAPI and Docker. | |
| **Experience** | |
| * Senior Software Engineer at TechCorp (2020-Present): Led migration of monolithic app to microservices, reducing latency by 40%. | |
| """ | |
| EXAMPLE_JD = """**Job Title: Senior Backend Engineer** | |
| Responsibilities: Develop scalable, high-performance backend services using Python, ideally with experience in the FastAPI framework. Must have 4+ years of professional experience and familiarity with containerization (Docker/Kubernetes).""" | |
| # --- Gradio Interface --- | |
| with gr.Blocks(title="CV & Job Matcher") as demo: | |
| gr.Markdown("# π€ CV & Job Description Matcher") | |
| gr.Markdown("Enter a CV and a Job Description to get an automated match score and analysis using a fine-tuned Llama 3.1 model.") | |
| with gr.Row(variant="panel", equal_height=True): | |
| cv_input = gr.Textbox( | |
| label="1. Candidate CV/Resume (Text)", | |
| lines=15, | |
| value=EXAMPLE_CV, | |
| interactive=True, | |
| container=False | |
| ) | |
| jd_input = gr.Textbox( | |
| label="2. Job Description (JD) Text", | |
| lines=15, | |
| value=EXAMPLE_JD, | |
| interactive=True, | |
| container=False | |
| ) | |
| analyze_btn = gr.Button("π Analyze Match", variant="primary", scale=0) | |
| # The output component for the structured JSON response | |
| output_display = gr.JSON(label="3. Match Output (Structured JSON Response)", scale=1) | |
| # Internal UI Click & External API Access (combined for compatibility) | |
| analyze_btn.click( | |
| fn=match_cv_jd, | |
| inputs=[cv_input, jd_input], | |
| outputs=output_display, | |
| api_name="predict" | |
| ) | |
| # --- Mount Gradio on FastAPI --- | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| # --- Port Availability Check --- | |
| def is_port_available(port): | |
| """Checks if a given port is currently free to bind.""" | |
| with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | |
| try: | |
| s.bind(("0.0.0.0", port)) | |
| return True | |
| except socket.error as e: | |
| # Error 98 is typically "Address already in use" | |
| if e.errno == 98: | |
| return False | |
| raise e | |
| # --- Launch --- | |
| if __name__ == "__main__": | |
| load_model() | |
| # 1. Determine the primary port and define a sequence of ports to try | |
| primary_port = int(os.environ.get("PORT", 7860)) | |
| # Create a list of ports to try, prioritizing the environment variable, then common Gradio ports | |
| ports_to_try = [primary_port] | |
| if 7860 not in ports_to_try: | |
| ports_to_try.append(7860) | |
| if 7861 not in ports_to_try: | |
| ports_to_try.append(7861) | |
| # Simple retry mechanism | |
| max_retries = 3 | |
| server_started = False | |
| # Loop through all ports | |
| for port in ports_to_try: | |
| # Retry binding on the current port | |
| for attempt in range(max_retries): | |
| print(f"Attempting to bind/run Uvicorn on port {port} (Check {attempt + 1}/{max_retries})") | |
| if is_port_available(port): | |
| # Port is free, start Uvicorn | |
| print(f"Port {port} is available. Starting server...") | |
| # Call uvicorn.run, which is a blocking call. If it succeeds, the script stops here. | |
| uvicorn.run(app, host="0.0.0.0", port=port, loop="asyncio") | |
| # This code is only reached if uvicorn.run somehow returns (e.g., graceful shutdown), | |
| # in which case we consider it a successful start for the loop logic. | |
| server_started = True | |
| break # Break out of inner retry loop | |
| else: | |
| # Port is not free | |
| print(f"WARNING: Port {port} is already in use. Retrying in 1 second...") | |
| time.sleep(1) # Wait a moment before retrying the same port | |
| if attempt == max_retries - 1: | |
| # If this was the last attempt for this port, move to the next port | |
| print(f"All {max_retries} checks failed for port {port}. Trying next port in sequence.") | |
| break # Break inner loop to try next port in ports_to_try | |
| if server_started: | |
| # If the server started successfully in the inner loop, break the outer port loop | |
| break | |
| if not server_started: | |
| print("FATAL ERROR: Failed to bind to any available port after multiple retries. Process terminating.") | |
| exit(1) # Ensure process exits with failure code if no port is found. |