Harsh123007 commited on
Commit
1a2efd8
Β·
verified Β·
1 Parent(s): 9fdd6be

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +85 -101
main.py CHANGED
@@ -58,16 +58,15 @@ from typing import List
58
  from transformers import AutoTokenizer, AutoModelForCausalLM
59
  from sentence_transformers import SentenceTransformer
60
  from pypdf import PdfReader
61
- import torch
62
- import os
63
 
64
- # ======================================================
65
- # FastAPI App
66
- # ======================================================
67
  app = FastAPI(
68
  title="Harshal AI Backend",
69
  version="1.0.0",
70
- description="Human-like AI for Harshal Portfolio"
71
  )
72
 
73
  app.add_middleware(
@@ -77,123 +76,109 @@ app.add_middleware(
77
  allow_headers=["*"],
78
  )
79
 
80
- # ======================================================
81
- # 1) LOAD MAIN MODEL β€” Qwen2.5 1.5B (CPU Friendly)
82
- # ======================================================
83
  MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
84
 
85
- print(f"πŸš€ Loading LLM: {MODEL_NAME}")
86
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
87
-
88
  llm = AutoModelForCausalLM.from_pretrained(
89
  MODEL_NAME,
90
- dtype=torch.float32, # correct argument, CPU-safe
 
91
  )
92
  llm.eval()
93
 
94
- print("βœ… Qwen Loaded Successfully")
95
-
96
- # ======================================================
97
- # 2) LOAD RESUME + RAG
98
- # ======================================================
99
- EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
100
- embedder = SentenceTransformer(EMBED_MODEL)
101
-
102
- RESUME_PATH = "resume.pdf"
103
- resume_rag = None
104
-
105
-
106
- def chunk_text(text, max_chars=450, overlap=80):
107
  text = " ".join(text.split())
108
- chunks = []
109
- start = 0
110
  while start < len(text):
111
- end = start + max_chars
112
- chunks.append(text[start:end])
113
  start = end - overlap
114
- return chunks
115
-
116
 
117
- def build_resume_rag():
118
- """Read resume.pdf, chunk it, embed it."""
119
- global resume_rag
120
 
121
- if not os.path.exists(RESUME_PATH):
122
- print("⚠ resume.pdf not found β€” RAG disabled")
 
 
 
 
 
123
  return
124
 
125
- try:
126
- reader = PdfReader(RESUME_PATH)
127
- full = ""
128
- for p in reader.pages:
129
- full += p.extract_text() or ""
130
 
131
- chunks = chunk_text(full)
132
- embs = embedder.encode(
133
- chunks,
134
- convert_to_tensor=True,
135
- normalize_embeddings=True
136
- )
137
- resume_rag = {"chunks": chunks, "embs": embs}
138
- print(f"βœ… Resume RAG ready ({len(chunks)} chunks)")
139
 
140
- except Exception as e:
141
- print("❌ RAG build error:", e)
142
- resume_rag = None
143
 
144
 
145
- def get_rag_context(query: str, top_k=3):
146
- if resume_rag is None:
147
  return ""
148
 
149
  q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
150
- sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
151
- idx = torch.topk(sims, k=min(top_k, len(sims))).indices
152
 
153
- return "\n\n".join(resume_rag["chunks"][i] for i in idx)
154
 
155
 
156
- build_resume_rag()
157
 
158
- # ======================================================
159
- # 3) API BODY DEFINITIONS β€” CLEAN SWAGGER
160
- # ======================================================
161
- class Message(BaseModel):
162
  role: str
163
  content: str
164
 
 
 
165
 
166
- class ChatRequest(BaseModel):
167
- messages: List[Message]
168
-
169
-
170
- # ======================================================
171
- # 4) CHAT ROUTE β€” HUMAN-LIKE HARSHAAL AI
172
- # ======================================================
173
  @app.post("/chat")
174
- def chat(req: ChatRequest):
175
-
176
- if not req.messages:
177
- return {"reply": "Hey, ask me something πŸ™‚"}
178
 
179
  user_msg = req.messages[-1].content.strip()
 
180
 
181
- # -------- Retrieve relevant resume context --------
182
- rag_text = get_rag_context(user_msg)
183
-
184
- # -------- Persona --------
185
  persona = f"""
186
  You are Harshal Sonawane, a real human software engineer from Pune.
187
- Speak naturally, casual but professional.
188
- Never say you're an AI or model.
189
- Use resume context when relevant.
190
- Keep replies short (2–4 sentences).
191
 
192
- Resume knowledge:
193
- {rag_text}
 
 
 
 
 
 
 
 
194
  """.strip()
195
 
196
- # -------- Model Chat Format --------
197
  messages = [
198
  {"role": "system", "content": persona},
199
  {"role": "user", "content": user_msg}
@@ -202,27 +187,26 @@ Resume knowledge:
202
  ids = tokenizer.apply_chat_template(
203
  messages,
204
  return_tensors="pt",
205
- add_generation_prompt=True,
 
 
 
 
 
 
 
 
 
 
206
  )
207
 
208
- with torch.no_grad():
209
- out = llm.generate(
210
- ids,
211
- max_new_tokens=150,
212
- temperature=0.45,
213
- top_p=0.9,
214
- repetition_penalty=1.1,
215
- do_sample=True
216
- )
217
-
218
  reply = tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True).strip()
219
 
220
  return {"reply": reply}
221
 
222
-
223
- # ======================================================
224
- # 5) HEALTH CHECK
225
- # ======================================================
226
  @app.get("/")
227
- def root():
228
- return {"status": "Harshal AI backend is running (Qwen2.5 + RAG) 🎯"}
 
58
  from transformers import AutoTokenizer, AutoModelForCausalLM
59
  from sentence_transformers import SentenceTransformer
60
  from pypdf import PdfReader
61
+ import torch, os
 
62
 
63
+ # ======================================
64
+ # FastAPI Base
65
+ # ======================================
66
  app = FastAPI(
67
  title="Harshal AI Backend",
68
  version="1.0.0",
69
+ description="Human-like assistant bound to Harshal's real resume facts."
70
  )
71
 
72
  app.add_middleware(
 
76
  allow_headers=["*"],
77
  )
78
 
79
+ # ======================================
80
+ # MODEL (Qwen2.5 1.5B)
81
+ # ======================================
82
  MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
83
 
 
84
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
85
  llm = AutoModelForCausalLM.from_pretrained(
86
  MODEL_NAME,
87
+ dtype=torch.float32,
88
+ device_map="cpu"
89
  )
90
  llm.eval()
91
 
92
+ # ======================================
93
+ # EMBEDDING MODEL
94
+ # ======================================
95
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
96
+ resume_data = None
97
+ RESUME_FILE = "resume.pdf"
98
+
99
+ # ======================================
100
+ # UTIL: Chunk Resume
101
+ # ======================================
102
+ def chunk_text(text, size=450, overlap=80):
 
 
103
  text = " ".join(text.split())
104
+ out, start = [], 0
105
+
106
  while start < len(text):
107
+ end = start + size
108
+ out.append(text[start:end])
109
  start = end - overlap
 
 
110
 
111
+ return out
 
 
112
 
113
+ # ======================================
114
+ # BUILD RAG
115
+ # ======================================
116
+ def load_resume():
117
+ global resume_data
118
+ if not os.path.exists(RESUME_FILE):
119
+ print("❌ resume.pdf not found")
120
  return
121
 
122
+ reader = PdfReader(RESUME_FILE)
123
+ text = ""
124
+ for pg in reader.pages:
125
+ text += pg.extract_text() or ""
 
126
 
127
+ chunks = chunk_text(text)
128
+ embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
129
+ resume_data = {"chunks": chunks, "embs": embs}
 
 
 
 
 
130
 
131
+ print(f"βœ… Resume RAG loaded with {len(chunks)} chunks")
 
 
132
 
133
 
134
+ def get_resume_context(query, top_k=3):
135
+ if resume_data is None:
136
  return ""
137
 
138
  q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
139
+ sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_data["embs"])
140
+ top = torch.topk(sims, k=top_k)
141
 
142
+ return "\n\n".join(resume_data["chunks"][i] for i in top.indices)
143
 
144
 
145
+ load_resume()
146
 
147
+ # ======================================
148
+ # SCHEMAS
149
+ # ======================================
150
+ class Msg(BaseModel):
151
  role: str
152
  content: str
153
 
154
+ class ChatReq(BaseModel):
155
+ messages: List[Msg]
156
 
157
+ # ======================================
158
+ # CHAT
159
+ # ======================================
 
 
 
 
160
  @app.post("/chat")
161
+ def chat(req: ChatReq):
 
 
 
162
 
163
  user_msg = req.messages[-1].content.strip()
164
+ resume_context = get_resume_context(user_msg)
165
 
166
+ # STRICT anti-hallucination persona
 
 
 
167
  persona = f"""
168
  You are Harshal Sonawane, a real human software engineer from Pune.
 
 
 
 
169
 
170
+ VERY IMPORTANT RULES:
171
+ - NEVER say you are an AI, model, assistant, chatbot.
172
+ - ONLY answer using information available in the resume context.
173
+ - If the resume does NOT contain the answer β†’ say: "I haven't mentioned this in my resume."
174
+ - Do NOT invent employment history, projects, companies, or skills.
175
+ - Use a natural, short, human tone (2–4 sentences).
176
+ - Stay consistent with resume facts.
177
+
178
+ Resume facts (DO NOT IGNORE):
179
+ {resume_context}
180
  """.strip()
181
 
 
182
  messages = [
183
  {"role": "system", "content": persona},
184
  {"role": "user", "content": user_msg}
 
187
  ids = tokenizer.apply_chat_template(
188
  messages,
189
  return_tensors="pt",
190
+ add_generation_prompt=True
191
+ ).to(llm.device)
192
+
193
+ out = llm.generate(
194
+ ids,
195
+ max_new_tokens=160,
196
+ temperature=0.45,
197
+ top_p=0.9,
198
+ repetition_penalty=1.1,
199
+ do_sample=True,
200
+ pad_token_id=tokenizer.eos_token_id
201
  )
202
 
 
 
 
 
 
 
 
 
 
 
203
  reply = tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True).strip()
204
 
205
  return {"reply": reply}
206
 
207
+ # ======================================
208
+ # HEALTH
209
+ # ======================================
 
210
  @app.get("/")
211
+ def health():
212
+ return {"status": "Harshal AI backend running with Qwen 1.5B + strict RAG 🎯"}