Harsh123007 commited on
Commit
e1af920
·
verified ·
1 Parent(s): 81b1612

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +132 -111
main.py CHANGED
@@ -58,10 +58,11 @@ from typing import List
58
  from transformers import AutoTokenizer, AutoModelForCausalLM
59
  from sentence_transformers import SentenceTransformer
60
  from pypdf import PdfReader
61
- import torch, os, re
62
 
63
  app = FastAPI(title="Harshal AI Backend", version="1.0.0")
64
 
 
65
  app.add_middleware(
66
  CORSMiddleware,
67
  allow_origins=["*"],
@@ -69,163 +70,183 @@ app.add_middleware(
69
  allow_headers=["*"],
70
  )
71
 
72
- # -------------------------------------------------------
73
- # 1. MODEL (CPU SAFE)
74
- # -------------------------------------------------------
75
- MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
76
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
77
- llm = AutoModelForCausalLM.from_pretrained(MODEL, dtype=torch.float32)
78
- llm.eval()
79
-
80
- # -------------------------------------------------------
81
- # 2. SMART RAG (Semantic Sections)
82
- # -------------------------------------------------------
83
- embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
84
 
85
- RESUME = "resume.pdf"
86
- SECTIONS = {} # { section_name: text }
87
 
 
 
 
 
 
 
88
 
89
- def extract_sections(text):
90
- """
91
- Extracts logical resume sections using headings.
92
- """
93
- parts = re.split(r"(PROFESSIONAL EXPERIENCE|PROJECTS|SKILLS|EDUCATION|CERTIFICATION)", text)
94
- cleaned = {}
95
 
96
- current = None
97
- for p in parts:
98
- p = p.strip()
99
- if p in ["PROFESSIONAL EXPERIENCE", "PROJECTS", "SKILLS", "EDUCATION", "CERTIFICATION"]:
100
- current = p
101
- cleaned[current] = ""
102
- elif current:
103
- cleaned[current] += p + "\n"
104
 
105
- return cleaned
 
106
 
107
 
108
- def build_rag_sections():
109
- global SECTIONS
 
 
110
 
111
- if not os.path.exists(RESUME):
112
- print("❌ resume.pdf not found")
113
- return
 
114
 
115
- reader = PdfReader(RESUME)
116
- text = ""
117
 
118
- for p in reader.pages:
119
- text += p.extract_text() or ""
120
 
121
- # Extract using bold headings
122
- SECTIONS = extract_sections(text)
123
- print("📌 Extracted Sections:", SECTIONS.keys())
124
 
 
 
 
125
 
126
- build_rag_sections()
 
127
 
128
- # Embed section titles
129
- SECTION_EMBS = {}
130
- for k, v in SECTIONS.items():
131
- SECTION_EMBS[k] = embedder.encode([v], convert_to_tensor=True)
132
 
 
133
 
134
- def retrieve_section(query):
135
- """
136
- Best matching resume section.
137
- """
138
- q = embedder.encode([query], convert_to_tensor=True)
139
- best = None
140
- best_score = -1
141
 
142
- for name, emb in SECTION_EMBS.items():
143
- sim = torch.nn.functional.cosine_similarity(q, emb)[0].item()
144
- if sim > best_score:
145
- best_score = sim
146
- best = name
147
 
148
- return best
149
 
 
150
 
151
- # SPECIAL RULES
152
- def special_logic(query):
153
- q = query.lower()
154
 
155
- if any(w in q for w in ["current", "working now", "present job", "company"]):
156
- return "PROFESSIONAL EXPERIENCE"
 
 
157
 
158
- if any(w in q for w in ["project", "projects", "personal project"]):
159
- return "PROJECTS"
 
160
 
161
- if any(w in q for w in ["skill", "skills"]):
162
- return "SKILLS"
163
 
164
- return None
165
 
 
 
 
166
 
167
- # -------------------------------------------------------
168
- # 3. CHAT API
169
- # -------------------------------------------------------
170
- class Msg(BaseModel):
171
  role: str
172
  content: str
173
 
174
  class ChatReq(BaseModel):
175
- messages: List[Msg]
 
176
 
 
 
 
177
 
178
  @app.post("/chat")
179
  def chat(req: ChatReq):
180
-
181
  user_msg = req.messages[-1].content.strip()
182
 
183
- # Special rule override
184
- sec = special_logic(user_msg)
185
- if sec is None:
186
- sec = retrieve_section(user_msg)
187
-
188
- resume_text = SECTIONS.get(sec, "")
189
-
190
- persona = f"""
191
- You are Harshal Sonawane, a real human software engineer from Pune.
192
-
193
- Rules:
194
- - Answer like a friendly human (2–4 sentences).
195
- - ONLY use information from the resume section below.
196
- - If the resume does NOT mention the answer:
197
- say: "This is not mentioned in my resume, but based on my experience..."
198
- and reply realistically WITHOUT inventing fake jobs.
199
-
200
- Resume relevant section ({sec}):
201
- {resume_text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  """.strip()
203
 
204
- messages = [
205
- {"role": "system", "content": persona},
 
206
  {"role": "user", "content": user_msg},
207
  ]
208
 
209
- ids = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
210
-
211
- out = llm.generate(
212
- ids,
213
- max_new_tokens=150,
214
- temperature=0.55,
215
- top_p=0.9,
216
- do_sample=True,
217
- pad_token_id=tokenizer.eos_token_id
218
  )
219
 
220
- gen = out[0][ids.shape[-1]:]
 
 
 
 
 
 
 
 
 
 
221
  reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
222
 
223
- if resume_text.strip() == "":
224
- reply = "This is not mentioned in my resume, but based on my experience, " + reply
225
-
226
  return {"reply": reply}
227
 
228
 
 
 
 
 
229
  @app.get("/")
230
  def health():
231
- return {"status": "Harshal AI running with smart RAG 🎯"}
 
 
 
 
58
  from transformers import AutoTokenizer, AutoModelForCausalLM
59
  from sentence_transformers import SentenceTransformer
60
  from pypdf import PdfReader
61
+ import torch, os
62
 
63
  app = FastAPI(title="Harshal AI Backend", version="1.0.0")
64
 
65
+ # CORS (Next.js frontend)
66
  app.add_middleware(
67
  CORSMiddleware,
68
  allow_origins=["*"],
 
70
  allow_headers=["*"],
71
  )
72
 
73
+ # ============================================================
74
+ # 1) LOAD MAIN MODEL (Phi-3 Mini — good balance of quality/speed)
75
+ # ============================================================
 
 
 
 
 
 
 
 
 
76
 
77
+ MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
 
78
 
79
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
80
+ llm = AutoModelForCausalLM.from_pretrained(
81
+ MODEL_NAME,
82
+ dtype=torch.float32, # instead of torch_dtype
83
+ )
84
+ llm.eval()
85
 
86
+ # ============================================================
87
+ # 2) LOAD EMBEDDINGS + BUILD RAG FROM resume.pdf
88
+ # ============================================================
 
 
 
89
 
90
+ EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
91
+ embedder = SentenceTransformer(EMBED_MODEL)
 
 
 
 
 
 
92
 
93
+ RESUME_FILE = "resume.pdf"
94
+ resume_rag = None
95
 
96
 
97
+ def chunk_text(text, max_chars=450, overlap=80):
98
+ """Simple overlapping chunks."""
99
+ text = " ".join(text.split())
100
+ chunks, start = [], 0
101
 
102
+ while start < len(text):
103
+ end = start + max_chars
104
+ chunks.append(text[start:end])
105
+ start = end - overlap
106
 
107
+ return chunks
 
108
 
 
 
109
 
110
+ def build_rag():
111
+ """Reads resume.pdf → chunks → embeddings."""
112
+ global resume_rag
113
 
114
+ if not os.path.exists(RESUME_FILE):
115
+ print("⚠ resume.pdf NOT FOUND — RAG disabled.")
116
+ return
117
 
118
+ reader = PdfReader(RESUME_FILE)
119
+ full_text = ""
120
 
121
+ for page in reader.pages:
122
+ full_text += page.extract_text() or ""
 
 
123
 
124
+ chunks = chunk_text(full_text)
125
 
126
+ embeddings = embedder.encode(
127
+ chunks, convert_to_tensor=True, normalize_embeddings=True
128
+ )
 
 
 
 
129
 
130
+ resume_rag = {"chunks": chunks, "embs": embeddings}
131
+ print("✅ Resume RAG built with", len(chunks), "chunks")
 
 
 
132
 
 
133
 
134
+ build_rag()
135
 
 
 
 
136
 
137
+ def retrieve_rag(query, top_k=3):
138
+ """Find most relevant resume chunks."""
139
+ if resume_rag is None:
140
+ return ""
141
 
142
+ q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
143
+ sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
144
+ top = torch.topk(sims, k=min(top_k, len(sims)))
145
 
146
+ return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)
 
147
 
 
148
 
149
+ # ============================================================
150
+ # 3) REQUEST MODELS
151
+ # ============================================================
152
 
153
+ class Message(BaseModel):
 
 
 
154
  role: str
155
  content: str
156
 
157
  class ChatReq(BaseModel):
158
+ messages: List[Message]
159
+
160
 
161
+ # ============================================================
162
+ # 4) SMART MODE CHAT ROUTE (Resume + Fallback)
163
+ # ============================================================
164
 
165
  @app.post("/chat")
166
  def chat(req: ChatReq):
 
167
  user_msg = req.messages[-1].content.strip()
168
 
169
+ # RAG: take from resume
170
+ rag_context = retrieve_rag(user_msg)
171
+
172
+ # Detect if we found resume info
173
+ has_relevant_rag = rag_context and len(rag_context) > 40
174
+
175
+ # SMART RULE:
176
+ # If resume has relevant info → answer ONLY from resume
177
+ # If resume has nothing fallback using general persona
178
+ if has_relevant_rag:
179
+ fallback_note = ""
180
+ resume_instruction = """
181
+ Use ONLY the details provided in the resume context.
182
+ Do NOT invent any new companies, dates, or roles.
183
+ Stay strictly consistent with:
184
+ - Current company: Börm Bruckmeier Infotech India Pvt. Ltd.
185
+ - Past experience: SarvaTech Consultants (Internship)
186
+ - Personal projects listed in resume.
187
+ """
188
+ else:
189
+ fallback_note = """
190
+ The resume does NOT mention this.
191
+ Provide a short helpful answer based on Harshal’s background, tone, and skills —
192
+ BUT do NOT invent companies, dates, or job titles not present in the resume.
193
+ """
194
+ resume_instruction = ""
195
+
196
+ # System persona
197
+ system_prompt = f"""
198
+ You are Harshal Sonawane — a real human software engineer from Pune, India.
199
+
200
+ Tone:
201
+ - Natural, confident, friendly.
202
+ - 2–4 sentences max.
203
+ - Never say you are an AI.
204
+
205
+ Your resume (RAG context):
206
+ {rag_context}
207
+
208
+ Instructions:
209
+ {resume_instruction}
210
+
211
+ Fallback rule:
212
+ {fallback_note}
213
  """.strip()
214
 
215
+ # Chat template
216
+ msgs = [
217
+ {"role": "system", "content": system_prompt},
218
  {"role": "user", "content": user_msg},
219
  ]
220
 
221
+ input_ids = tokenizer.apply_chat_template(
222
+ msgs,
223
+ return_tensors="pt",
224
+ add_generation_prompt=True,
 
 
 
 
 
225
  )
226
 
227
+ with torch.no_grad():
228
+ output_ids = llm.generate(
229
+ input_ids,
230
+ max_new_tokens=160,
231
+ temperature=0.55,
232
+ top_p=0.9,
233
+ repetition_penalty=1.06,
234
+ do_sample=True,
235
+ )
236
+
237
+ gen = output_ids[0][input_ids.shape[-1]:]
238
  reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
239
 
 
 
 
240
  return {"reply": reply}
241
 
242
 
243
+ # ============================================================
244
+ # 5) HEALTH CHECK
245
+ # ============================================================
246
+
247
  @app.get("/")
248
  def health():
249
+ return {
250
+ "status": "Harshal AI backend running (SMART MODE)",
251
+ "model": MODEL_NAME
252
+ }