mployd-engineering commited on
Commit
eee7aae
·
verified ·
1 Parent(s): a24dd2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -12
app.py CHANGED
@@ -7,13 +7,13 @@ import os
7
  from fastapi import FastAPI
8
  from pydantic import BaseModel
9
  import uvicorn
10
- import openai # Import the OpenAI library
11
 
12
  # --- Configuration ---
13
  BASE_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
14
  LORA_MODEL_ID = "LlamaFactoryAI/Llama-3.1-8B-Instruct-cv-job-description-matching"
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
16
- # NEW: Check for OpenAI API key
17
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
18
 
19
  # --- FastAPI App ---
@@ -75,6 +75,45 @@ def load_model():
75
  print("OPENAI_API_KEY not found. Skipping human-readable summary generation.")
76
 
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def get_human_readable_summary(json_data: dict) -> str:
79
  """Uses OpenAI to convert structured JSON data into human-readable text."""
80
  if not openai_client:
@@ -84,10 +123,8 @@ def get_human_readable_summary(json_data: dict) -> str:
84
  prompt = f"""
85
  You are an expert technical writer. Take the following structured JSON data
86
  representing a CV-Job Description match analysis and convert it into a smooth,
87
- professional, human-readable summary. Do not mention score, focus on the followings:
88
- - key findings, in 'matching_analysis'
89
- - analyse fit with strenght and weakness
90
- - recommendation for overal fit and what they should do
91
  steps clearly at the end. Do not output any JSON or code formatting.
92
 
93
  JSON Data:
@@ -121,18 +158,23 @@ def match_cv_jd(cv_text: str, job_description: str) -> dict:
121
  # Ensure model is loaded (important for environments where Gradio might reload)
122
  if model is None or tokenizer is None:
123
  load_model()
 
 
 
 
 
124
 
125
  # System prompt guides the model's behavior and output format (JSON structure)
126
  system_prompt = """You are a world-class CV and Job Description matching AI. Output a structured JSON with fields:- matching_analysis- description- score (0-100)- recommendation (2 concrete steps)Output MUST be valid JSON and contain ONLY the JSON object, nothing else."""
127
 
128
- # User prompt contains the input data
129
- user_prompt = f"""CV:
130
  ---
131
- {cv_text}
132
  ---
133
- Job Description:
134
  ---
135
- {job_description}
136
  ---"""
137
 
138
  messages = [
@@ -172,7 +214,7 @@ Job Description:
172
 
173
  parsed = json.loads(json_str)
174
 
175
- # --- NEW: Post-process with OpenAI to add human_readable summary ---
176
  if OPENAI_API_KEY and openai_client:
177
  human_readable_text = get_human_readable_summary(parsed)
178
  # Add the summary to the JSON output
 
7
  from fastapi import FastAPI
8
  from pydantic import BaseModel
9
  import uvicorn
10
+ import openai
11
 
12
  # --- Configuration ---
13
  BASE_MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
14
  LORA_MODEL_ID = "LlamaFactoryAI/Llama-3.1-8B-Instruct-cv-job-description-matching"
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
16
+ # Check for OpenAI API key
17
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
18
 
19
  # --- FastAPI App ---
 
75
  print("OPENAI_API_KEY not found. Skipping human-readable summary generation.")
76
 
77
 
78
+ # --- NEW: Function to summarize input text (CV or JD) ---
79
+ def get_summary(text: str, role: str) -> str:
80
+ """Uses OpenAI to create a concise summary of the CV or Job Description."""
81
+ if not openai_client:
82
+ # Fallback: return original text if API is not available
83
+ return text
84
+
85
+ if role == 'CV':
86
+ prompt_instruction = "Extract the key professional skills, technologies, job roles, and quantifiable achievements. Exclude personal contact information, filler text, or overly verbose descriptions. Keep the summary under 300 words."
87
+ elif role == 'JD':
88
+ prompt_instruction = "Extract the core required skills, experience levels, technological stack, and main responsibilities for this role. Exclude recruiting boilerplate or company mission statements. Keep the summary under 200 words."
89
+ else:
90
+ prompt_instruction = "Summarize the key contents."
91
+
92
+ prompt = f"""
93
+ {prompt_instruction}
94
+
95
+ Original Text:
96
+ ---
97
+ {text}
98
+ ---
99
+
100
+ Concise Summary:
101
+ """
102
+
103
+ try:
104
+ completion = openai_client.chat.completions.create(
105
+ model="gpt-4o-mini",
106
+ messages=[
107
+ {"role": "user", "content": prompt}
108
+ ],
109
+ temperature=0.1, # Very low temp for fact extraction
110
+ )
111
+ return completion.choices[0].message.content.strip()
112
+ except Exception as e:
113
+ print(f"OpenAI summarization for {role} failed: {e}. Using original text.")
114
+ return text
115
+
116
+
117
  def get_human_readable_summary(json_data: dict) -> str:
118
  """Uses OpenAI to convert structured JSON data into human-readable text."""
119
  if not openai_client:
 
123
  prompt = f"""
124
  You are an expert technical writer. Take the following structured JSON data
125
  representing a CV-Job Description match analysis and convert it into a smooth,
126
+ professional, human-readable summary. Focus on the score, key findings in
127
+ 'matching_analysis' and 'description', and format the 'recommendation'
 
 
128
  steps clearly at the end. Do not output any JSON or code formatting.
129
 
130
  JSON Data:
 
158
  # Ensure model is loaded (important for environments where Gradio might reload)
159
  if model is None or tokenizer is None:
160
  load_model()
161
+
162
+ # --- NEW: Summarization Step ---
163
+ print("Summarizing CV and Job Description...")
164
+ summarized_cv = get_summary(cv_text, 'CV')
165
+ summarized_jd = get_summary(job_description, 'JD')
166
 
167
  # System prompt guides the model's behavior and output format (JSON structure)
168
  system_prompt = """You are a world-class CV and Job Description matching AI. Output a structured JSON with fields:- matching_analysis- description- score (0-100)- recommendation (2 concrete steps)Output MUST be valid JSON and contain ONLY the JSON object, nothing else."""
169
 
170
+ # User prompt now uses the summarized text
171
+ user_prompt = f"""CV (Summarized):
172
  ---
173
+ {summarized_cv}
174
  ---
175
+ Job Description (Summarized):
176
  ---
177
+ {summarized_jd}
178
  ---"""
179
 
180
  messages = [
 
214
 
215
  parsed = json.loads(json_str)
216
 
217
+ # Post-process with OpenAI to add human_readable summary
218
  if OPENAI_API_KEY and openai_client:
219
  human_readable_text = get_human_readable_summary(parsed)
220
  # Add the summary to the JSON output