Harsh123007 commited on
Commit
a34510d
Β·
verified Β·
1 Parent(s): 1a2efd8

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +66 -73
main.py CHANGED
@@ -60,15 +60,12 @@ from sentence_transformers import SentenceTransformer
60
  from pypdf import PdfReader
61
  import torch, os
62
 
63
- # ======================================
64
- # FastAPI Base
65
- # ======================================
66
  app = FastAPI(
67
  title="Harshal AI Backend",
68
  version="1.0.0",
69
- description="Human-like assistant bound to Harshal's real resume facts."
70
  )
71
 
 
72
  app.add_middleware(
73
  CORSMiddleware,
74
  allow_origins=["*"],
@@ -76,77 +73,72 @@ app.add_middleware(
76
  allow_headers=["*"],
77
  )
78
 
79
- # ======================================
80
- # MODEL (Qwen2.5 1.5B)
81
- # ======================================
 
82
  MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
83
 
84
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 
85
  llm = AutoModelForCausalLM.from_pretrained(
86
  MODEL_NAME,
87
- dtype=torch.float32,
88
- device_map="cpu"
89
  )
 
90
  llm.eval()
91
 
92
- # ======================================
93
- # EMBEDDING MODEL
94
- # ======================================
 
95
  embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
96
- resume_data = None
97
  RESUME_FILE = "resume.pdf"
 
98
 
99
- # ======================================
100
- # UTIL: Chunk Resume
101
- # ======================================
102
- def chunk_text(text, size=450, overlap=80):
103
  text = " ".join(text.split())
104
- out, start = [], 0
105
-
106
  while start < len(text):
107
  end = start + size
108
- out.append(text[start:end])
109
  start = end - overlap
 
110
 
111
- return out
 
112
 
113
- # ======================================
114
- # BUILD RAG
115
- # ======================================
116
- def load_resume():
117
- global resume_data
118
  if not os.path.exists(RESUME_FILE):
119
- print("❌ resume.pdf not found")
120
  return
121
 
122
  reader = PdfReader(RESUME_FILE)
123
  text = ""
124
- for pg in reader.pages:
125
- text += pg.extract_text() or ""
126
 
127
- chunks = chunk_text(text)
128
- embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
129
- resume_data = {"chunks": chunks, "embs": embs}
130
 
131
- print(f"βœ… Resume RAG loaded with {len(chunks)} chunks")
 
132
 
133
-
134
- def get_resume_context(query, top_k=3):
135
- if resume_data is None:
136
  return ""
137
 
138
- q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
139
- sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_data["embs"])
140
- top = torch.topk(sims, k=top_k)
141
-
142
- return "\n\n".join(resume_data["chunks"][i] for i in top.indices)
143
 
 
144
 
145
- load_resume()
 
 
146
 
147
- # ======================================
148
- # SCHEMAS
149
- # ======================================
150
  class Msg(BaseModel):
151
  role: str
152
  content: str
@@ -154,59 +146,60 @@ class Msg(BaseModel):
154
  class ChatReq(BaseModel):
155
  messages: List[Msg]
156
 
157
- # ======================================
158
- # CHAT
159
- # ======================================
 
160
  @app.post("/chat")
161
  def chat(req: ChatReq):
162
 
163
  user_msg = req.messages[-1].content.strip()
164
- resume_context = get_resume_context(user_msg)
165
 
166
- # STRICT anti-hallucination persona
167
  persona = f"""
168
  You are Harshal Sonawane, a real human software engineer from Pune.
169
 
170
- VERY IMPORTANT RULES:
171
- - NEVER say you are an AI, model, assistant, chatbot.
172
- - ONLY answer using information available in the resume context.
173
- - If the resume does NOT contain the answer β†’ say: "I haven't mentioned this in my resume."
174
- - Do NOT invent employment history, projects, companies, or skills.
175
- - Use a natural, short, human tone (2–4 sentences).
176
- - Stay consistent with resume facts.
177
-
178
- Resume facts (DO NOT IGNORE):
179
- {resume_context}
 
180
  """.strip()
181
 
182
- messages = [
183
  {"role": "system", "content": persona},
184
- {"role": "user", "content": user_msg}
185
  ]
186
 
187
  ids = tokenizer.apply_chat_template(
188
- messages,
189
  return_tensors="pt",
190
  add_generation_prompt=True
191
- ).to(llm.device)
192
 
193
  out = llm.generate(
194
  ids,
195
- max_new_tokens=160,
196
  temperature=0.45,
197
  top_p=0.9,
198
  repetition_penalty=1.1,
199
- do_sample=True,
200
- pad_token_id=tokenizer.eos_token_id
201
  )
202
 
203
- reply = tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True).strip()
 
204
 
205
- return {"reply": reply}
 
 
206
 
207
- # ======================================
208
- # HEALTH
209
- # ======================================
210
  @app.get("/")
211
  def health():
212
- return {"status": "Harshal AI backend running with Qwen 1.5B + strict RAG 🎯"}
 
60
  from pypdf import PdfReader
61
  import torch, os
62
 
 
 
 
63
  app = FastAPI(
64
  title="Harshal AI Backend",
65
  version="1.0.0",
 
66
  )
67
 
68
+ # Allow requests from anywhere (Next.js frontend)
69
  app.add_middleware(
70
  CORSMiddleware,
71
  allow_origins=["*"],
 
73
  allow_headers=["*"],
74
  )
75
 
76
+ # =======================================================
77
+ # 1) LOAD Qwen 1.5B (NO device_map, NO accelerate needed)
78
+ # =======================================================
79
+
80
  MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
81
 
82
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
83
+
84
+ # ❗ Load normally then move to CPU
85
  llm = AutoModelForCausalLM.from_pretrained(
86
  MODEL_NAME,
87
+ torch_dtype=torch.float32,
 
88
  )
89
+ llm = llm.to("cpu")
90
  llm.eval()
91
 
92
+ # =======================================================
93
+ # 2) RAG (Resume Embeddings)
94
+ # =======================================================
95
+
96
  embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 
97
  RESUME_FILE = "resume.pdf"
98
+ RAG = None
99
 
100
+ def chunk(text, size=450, overlap=80):
 
 
 
101
  text = " ".join(text.split())
102
+ chunks, start = [], 0
 
103
  while start < len(text):
104
  end = start + size
105
+ chunks.append(text[start:end])
106
  start = end - overlap
107
+ return chunks
108
 
109
+ def build_rag():
110
+ global RAG
111
 
 
 
 
 
 
112
  if not os.path.exists(RESUME_FILE):
113
+ print("⚠ resume.pdf NOT FOUND β€” RAG DISABLED")
114
  return
115
 
116
  reader = PdfReader(RESUME_FILE)
117
  text = ""
118
+ for p in reader.pages:
119
+ text += p.extract_text() or ""
120
 
121
+ c = chunk(text)
122
+ e = embedder.encode(c, normalize_embeddings=True, convert_to_tensor=True)
 
123
 
124
+ RAG = {"chunks": c, "embs": e}
125
+ print(f"βœ… RAG ready with {len(c)} chunks")
126
 
127
+ def search_rag(q):
128
+ if RAG is None:
 
129
  return ""
130
 
131
+ qv = embedder.encode([q], normalize_embeddings=True, convert_to_tensor=True)[0]
132
+ sims = torch.nn.functional.cosine_similarity(qv.unsqueeze(0), RAG["embs"])
133
+ top = torch.topk(sims, 3).indices
134
+ return "\n\n".join(RAG["chunks"][i] for i in top)
 
135
 
136
+ build_rag()
137
 
138
+ # =======================================================
139
+ # 3) SCHEMAS
140
+ # =======================================================
141
 
 
 
 
142
  class Msg(BaseModel):
143
  role: str
144
  content: str
 
146
  class ChatReq(BaseModel):
147
  messages: List[Msg]
148
 
149
+ # =======================================================
150
+ # 4) CHAT ENDPOINT
151
+ # =======================================================
152
+
153
  @app.post("/chat")
154
  def chat(req: ChatReq):
155
 
156
  user_msg = req.messages[-1].content.strip()
157
+ rag = search_rag(user_msg)
158
 
 
159
  persona = f"""
160
  You are Harshal Sonawane, a real human software engineer from Pune.
161
 
162
+ STRICT RULES:
163
+ - Never say you are an AI or model.
164
+ - ONLY answer using resume facts below.
165
+ - If the resume does not contain the answer β†’ say:
166
+ "I haven't mentioned this in my resume."
167
+ - Do not guess or invent.
168
+ - Speak naturally like a human.
169
+ - Keep replies short (2–4 sentences).
170
+
171
+ Resume facts:
172
+ {rag}
173
  """.strip()
174
 
175
+ msgs = [
176
  {"role": "system", "content": persona},
177
+ {"role": "user", "content": user_msg},
178
  ]
179
 
180
  ids = tokenizer.apply_chat_template(
181
+ msgs,
182
  return_tensors="pt",
183
  add_generation_prompt=True
184
+ ).to("cpu")
185
 
186
  out = llm.generate(
187
  ids,
188
+ max_new_tokens=150,
189
  temperature=0.45,
190
  top_p=0.9,
191
  repetition_penalty=1.1,
192
+ pad_token_id=tokenizer.eos_token_id,
193
+ do_sample=True
194
  )
195
 
196
+ resp = tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True).strip()
197
+ return {"reply": resp}
198
 
199
+ # =======================================================
200
+ # 5) HEALTH CHECK
201
+ # =======================================================
202
 
 
 
 
203
  @app.get("/")
204
  def health():
205
+ return {"status": "Harshal AI backend running with Qwen 1.5B + RAG 🎯"}