Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -58,10 +58,11 @@ from typing import List
|
|
| 58 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 59 |
from sentence_transformers import SentenceTransformer
|
| 60 |
from pypdf import PdfReader
|
| 61 |
-
import torch, os
|
| 62 |
|
| 63 |
app = FastAPI(title="Harshal AI Backend", version="1.0.0")
|
| 64 |
|
|
|
|
| 65 |
app.add_middleware(
|
| 66 |
CORSMiddleware,
|
| 67 |
allow_origins=["*"],
|
|
@@ -69,163 +70,183 @@ app.add_middleware(
|
|
| 69 |
allow_headers=["*"],
|
| 70 |
)
|
| 71 |
|
| 72 |
-
#
|
| 73 |
-
# 1
|
| 74 |
-
#
|
| 75 |
-
MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 76 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
| 77 |
-
llm = AutoModelForCausalLM.from_pretrained(MODEL, dtype=torch.float32)
|
| 78 |
-
llm.eval()
|
| 79 |
-
|
| 80 |
-
# -------------------------------------------------------
|
| 81 |
-
# 2. SMART RAG (Semantic Sections)
|
| 82 |
-
# -------------------------------------------------------
|
| 83 |
-
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 84 |
|
| 85 |
-
|
| 86 |
-
SECTIONS = {} # { section_name: text }
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
"""
|
| 93 |
-
parts = re.split(r"(PROFESSIONAL EXPERIENCE|PROJECTS|SKILLS|EDUCATION|CERTIFICATION)", text)
|
| 94 |
-
cleaned = {}
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
p = p.strip()
|
| 99 |
-
if p in ["PROFESSIONAL EXPERIENCE", "PROJECTS", "SKILLS", "EDUCATION", "CERTIFICATION"]:
|
| 100 |
-
current = p
|
| 101 |
-
cleaned[current] = ""
|
| 102 |
-
elif current:
|
| 103 |
-
cleaned[current] += p + "\n"
|
| 104 |
|
| 105 |
-
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
-
def
|
| 109 |
-
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
| 114 |
|
| 115 |
-
|
| 116 |
-
text = ""
|
| 117 |
|
| 118 |
-
for p in reader.pages:
|
| 119 |
-
text += p.extract_text() or ""
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
|
|
|
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
for k, v in SECTIONS.items():
|
| 131 |
-
SECTION_EMBS[k] = embedder.encode([v], convert_to_tensor=True)
|
| 132 |
|
|
|
|
| 133 |
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
"""
|
| 138 |
-
q = embedder.encode([query], convert_to_tensor=True)
|
| 139 |
-
best = None
|
| 140 |
-
best_score = -1
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
if sim > best_score:
|
| 145 |
-
best_score = sim
|
| 146 |
-
best = name
|
| 147 |
|
| 148 |
-
return best
|
| 149 |
|
|
|
|
| 150 |
|
| 151 |
-
# SPECIAL RULES
|
| 152 |
-
def special_logic(query):
|
| 153 |
-
q = query.lower()
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
|
|
|
| 160 |
|
| 161 |
-
|
| 162 |
-
return "SKILLS"
|
| 163 |
|
| 164 |
-
return None
|
| 165 |
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
-
|
| 168 |
-
# 3. CHAT API
|
| 169 |
-
# -------------------------------------------------------
|
| 170 |
-
class Msg(BaseModel):
|
| 171 |
role: str
|
| 172 |
content: str
|
| 173 |
|
| 174 |
class ChatReq(BaseModel):
|
| 175 |
-
messages: List[
|
|
|
|
| 176 |
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
@app.post("/chat")
|
| 179 |
def chat(req: ChatReq):
|
| 180 |
-
|
| 181 |
user_msg = req.messages[-1].content.strip()
|
| 182 |
|
| 183 |
-
#
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
""".strip()
|
| 203 |
|
| 204 |
-
|
| 205 |
-
|
|
|
|
| 206 |
{"role": "user", "content": user_msg},
|
| 207 |
]
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
max_new_tokens=150,
|
| 214 |
-
temperature=0.55,
|
| 215 |
-
top_p=0.9,
|
| 216 |
-
do_sample=True,
|
| 217 |
-
pad_token_id=tokenizer.eos_token_id
|
| 218 |
)
|
| 219 |
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
|
| 222 |
|
| 223 |
-
if resume_text.strip() == "":
|
| 224 |
-
reply = "This is not mentioned in my resume, but based on my experience, " + reply
|
| 225 |
-
|
| 226 |
return {"reply": reply}
|
| 227 |
|
| 228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
@app.get("/")
|
| 230 |
def health():
|
| 231 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 59 |
from sentence_transformers import SentenceTransformer
|
| 60 |
from pypdf import PdfReader
|
| 61 |
+
import torch, os
|
| 62 |
|
| 63 |
app = FastAPI(title="Harshal AI Backend", version="1.0.0")
|
| 64 |
|
| 65 |
+
# CORS (Next.js frontend)
|
| 66 |
app.add_middleware(
|
| 67 |
CORSMiddleware,
|
| 68 |
allow_origins=["*"],
|
|
|
|
| 70 |
allow_headers=["*"],
|
| 71 |
)
|
| 72 |
|
| 73 |
+
# ============================================================
|
| 74 |
+
# 1) LOAD MAIN MODEL (Phi-3 Mini — good balance of quality/speed)
|
| 75 |
+
# ============================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
|
|
|
|
| 78 |
|
| 79 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 80 |
+
llm = AutoModelForCausalLM.from_pretrained(
|
| 81 |
+
MODEL_NAME,
|
| 82 |
+
dtype=torch.float32, # instead of torch_dtype
|
| 83 |
+
)
|
| 84 |
+
llm.eval()
|
| 85 |
|
| 86 |
+
# ============================================================
|
| 87 |
+
# 2) LOAD EMBEDDINGS + BUILD RAG FROM resume.pdf
|
| 88 |
+
# ============================================================
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
+
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 91 |
+
embedder = SentenceTransformer(EMBED_MODEL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
RESUME_FILE = "resume.pdf"
|
| 94 |
+
resume_rag = None
|
| 95 |
|
| 96 |
|
| 97 |
+
def chunk_text(text, max_chars=450, overlap=80):
|
| 98 |
+
"""Simple overlapping chunks."""
|
| 99 |
+
text = " ".join(text.split())
|
| 100 |
+
chunks, start = [], 0
|
| 101 |
|
| 102 |
+
while start < len(text):
|
| 103 |
+
end = start + max_chars
|
| 104 |
+
chunks.append(text[start:end])
|
| 105 |
+
start = end - overlap
|
| 106 |
|
| 107 |
+
return chunks
|
|
|
|
| 108 |
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
def build_rag():
|
| 111 |
+
"""Reads resume.pdf → chunks → embeddings."""
|
| 112 |
+
global resume_rag
|
| 113 |
|
| 114 |
+
if not os.path.exists(RESUME_FILE):
|
| 115 |
+
print("⚠ resume.pdf NOT FOUND — RAG disabled.")
|
| 116 |
+
return
|
| 117 |
|
| 118 |
+
reader = PdfReader(RESUME_FILE)
|
| 119 |
+
full_text = ""
|
| 120 |
|
| 121 |
+
for page in reader.pages:
|
| 122 |
+
full_text += page.extract_text() or ""
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
chunks = chunk_text(full_text)
|
| 125 |
|
| 126 |
+
embeddings = embedder.encode(
|
| 127 |
+
chunks, convert_to_tensor=True, normalize_embeddings=True
|
| 128 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
resume_rag = {"chunks": chunks, "embs": embeddings}
|
| 131 |
+
print("✅ Resume RAG built with", len(chunks), "chunks")
|
|
|
|
|
|
|
|
|
|
| 132 |
|
|
|
|
| 133 |
|
| 134 |
+
build_rag()
|
| 135 |
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
+
def retrieve_rag(query, top_k=3):
|
| 138 |
+
"""Find most relevant resume chunks."""
|
| 139 |
+
if resume_rag is None:
|
| 140 |
+
return ""
|
| 141 |
|
| 142 |
+
q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
|
| 143 |
+
sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
|
| 144 |
+
top = torch.topk(sims, k=min(top_k, len(sims)))
|
| 145 |
|
| 146 |
+
return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)
|
|
|
|
| 147 |
|
|
|
|
| 148 |
|
| 149 |
+
# ============================================================
|
| 150 |
+
# 3) REQUEST MODELS
|
| 151 |
+
# ============================================================
|
| 152 |
|
| 153 |
+
class Message(BaseModel):
|
|
|
|
|
|
|
|
|
|
| 154 |
role: str
|
| 155 |
content: str
|
| 156 |
|
| 157 |
class ChatReq(BaseModel):
|
| 158 |
+
messages: List[Message]
|
| 159 |
+
|
| 160 |
|
| 161 |
+
# ============================================================
|
| 162 |
+
# 4) SMART MODE CHAT ROUTE (Resume + Fallback)
|
| 163 |
+
# ============================================================
|
| 164 |
|
| 165 |
@app.post("/chat")
|
| 166 |
def chat(req: ChatReq):
|
|
|
|
| 167 |
user_msg = req.messages[-1].content.strip()
|
| 168 |
|
| 169 |
+
# RAG: take from resume
|
| 170 |
+
rag_context = retrieve_rag(user_msg)
|
| 171 |
+
|
| 172 |
+
# Detect if we found resume info
|
| 173 |
+
has_relevant_rag = rag_context and len(rag_context) > 40
|
| 174 |
+
|
| 175 |
+
# SMART RULE:
|
| 176 |
+
# If resume has relevant info → answer ONLY from resume
|
| 177 |
+
# If resume has nothing → fallback using general persona
|
| 178 |
+
if has_relevant_rag:
|
| 179 |
+
fallback_note = ""
|
| 180 |
+
resume_instruction = """
|
| 181 |
+
Use ONLY the details provided in the resume context.
|
| 182 |
+
Do NOT invent any new companies, dates, or roles.
|
| 183 |
+
Stay strictly consistent with:
|
| 184 |
+
- Current company: Börm Bruckmeier Infotech India Pvt. Ltd.
|
| 185 |
+
- Past experience: SarvaTech Consultants (Internship)
|
| 186 |
+
- Personal projects listed in resume.
|
| 187 |
+
"""
|
| 188 |
+
else:
|
| 189 |
+
fallback_note = """
|
| 190 |
+
The resume does NOT mention this.
|
| 191 |
+
Provide a short helpful answer based on Harshal’s background, tone, and skills —
|
| 192 |
+
BUT do NOT invent companies, dates, or job titles not present in the resume.
|
| 193 |
+
"""
|
| 194 |
+
resume_instruction = ""
|
| 195 |
+
|
| 196 |
+
# System persona
|
| 197 |
+
system_prompt = f"""
|
| 198 |
+
You are Harshal Sonawane — a real human software engineer from Pune, India.
|
| 199 |
+
|
| 200 |
+
Tone:
|
| 201 |
+
- Natural, confident, friendly.
|
| 202 |
+
- 2–4 sentences max.
|
| 203 |
+
- Never say you are an AI.
|
| 204 |
+
|
| 205 |
+
Your resume (RAG context):
|
| 206 |
+
{rag_context}
|
| 207 |
+
|
| 208 |
+
Instructions:
|
| 209 |
+
{resume_instruction}
|
| 210 |
+
|
| 211 |
+
Fallback rule:
|
| 212 |
+
{fallback_note}
|
| 213 |
""".strip()
|
| 214 |
|
| 215 |
+
# Chat template
|
| 216 |
+
msgs = [
|
| 217 |
+
{"role": "system", "content": system_prompt},
|
| 218 |
{"role": "user", "content": user_msg},
|
| 219 |
]
|
| 220 |
|
| 221 |
+
input_ids = tokenizer.apply_chat_template(
|
| 222 |
+
msgs,
|
| 223 |
+
return_tensors="pt",
|
| 224 |
+
add_generation_prompt=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
)
|
| 226 |
|
| 227 |
+
with torch.no_grad():
|
| 228 |
+
output_ids = llm.generate(
|
| 229 |
+
input_ids,
|
| 230 |
+
max_new_tokens=160,
|
| 231 |
+
temperature=0.55,
|
| 232 |
+
top_p=0.9,
|
| 233 |
+
repetition_penalty=1.06,
|
| 234 |
+
do_sample=True,
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
gen = output_ids[0][input_ids.shape[-1]:]
|
| 238 |
reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
|
| 239 |
|
|
|
|
|
|
|
|
|
|
| 240 |
return {"reply": reply}
|
| 241 |
|
| 242 |
|
| 243 |
+
# ============================================================
|
| 244 |
+
# 5) HEALTH CHECK
|
| 245 |
+
# ============================================================
|
| 246 |
+
|
| 247 |
@app.get("/")
|
| 248 |
def health():
|
| 249 |
+
return {
|
| 250 |
+
"status": "Harshal AI backend running (SMART MODE)",
|
| 251 |
+
"model": MODEL_NAME
|
| 252 |
+
}
|