Harsh123007 commited on
Commit
ca245fc
Β·
verified Β·
1 Parent(s): b986cc9

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +86 -118
main.py CHANGED
@@ -50,6 +50,7 @@
50
  # routes=app.routes
51
  # )
52
 
 
53
  from fastapi import FastAPI
54
  from fastapi.middleware.cors import CORSMiddleware
55
  from pydantic import BaseModel
@@ -60,182 +61,149 @@ from pypdf import PdfReader
60
  import torch
61
  import os
62
 
63
- # ============================================================
64
- # FastAPI APP
65
- # ============================================================
66
-
67
  app = FastAPI(
68
  title="Harshal AI Backend",
69
- version="1.0.0",
70
- description="Human-like AI Assistant for Harshal's Portfolio"
71
  )
72
 
 
73
  app.add_middleware(
74
  CORSMiddleware,
75
  allow_origins=["*"],
76
- allow_credentials=True,
77
  allow_methods=["*"],
78
  allow_headers=["*"],
79
  )
80
 
81
- # ============================================================
82
- # 1) LOAD MAIN CHAT MODEL
83
- # ============================================================
 
84
 
85
- LLM_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
86
-
87
- print(f"πŸš€ Loading LLM model: {LLM_MODEL_NAME}")
88
- tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
89
- llm_model = AutoModelForCausalLM.from_pretrained(
90
- LLM_MODEL_NAME,
91
  torch_dtype=torch.float32,
92
  device_map="cpu"
93
  )
94
- llm_model.eval()
95
- print("βœ… LLM Loaded Successfully")
96
-
97
- # ============================================================
98
- # 2) RAG: LOAD EMBEDDINGS FROM RESUME
99
- # ============================================================
100
 
101
- EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
102
- RESUME_PATH = "resume.pdf"
 
 
 
103
 
104
- embedder = SentenceTransformer(EMBED_MODEL_NAME)
105
- resume_vectors = None
106
 
107
 
108
  def chunk_text(text, max_chars=450, overlap=80):
109
- text = " ".join(text.replace("\n", " ").split())
110
  chunks, start = [], 0
 
111
  while start < len(text):
112
  end = start + max_chars
113
  chunks.append(text[start:end])
114
  start = end - overlap
 
115
  return chunks
116
 
117
 
118
- def build_resume_rag():
119
- global resume_vectors
120
- if not os.path.exists(RESUME_PATH):
121
- print("⚠️ resume.pdf missing β€” RAG disabled.")
 
122
  return
123
 
124
- try:
125
- reader = PdfReader(RESUME_PATH)
126
- text = "".join((p.extract_text() or "") for p in reader.pages)
127
- chunks = chunk_text(text)
128
 
129
- embeddings = embedder.encode(
130
- chunks, convert_to_tensor=True, normalize_embeddings=True
131
- )
132
 
133
- resume_vectors = {"chunks": chunks, "embeddings": embeddings}
134
- print("βœ… Resume RAG ready.")
135
- except Exception as e:
136
- print(f"❌ RAG Error: {e}")
137
- resume_vectors = None
138
 
 
139
 
140
- def embed_query(query: str):
141
- return embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
142
 
143
 
144
- def retrieve_relevant_resume_text(query: str, top_k=3):
145
- if resume_vectors is None:
146
- return ""
147
- try:
148
- q = embed_query(query)
149
- sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_vectors["embeddings"])
150
- top = torch.topk(sims, k=min(top_k, len(sims))).indices
151
- return "\n\n".join(resume_vectors["chunks"][i] for i in top)
152
- except:
153
  return ""
154
 
 
 
 
155
 
156
- build_resume_rag()
157
 
158
- # ============================================================
159
- # 3) CLEAN REQUEST SCHEMA
160
- # ============================================================
161
 
162
- class Message(BaseModel):
 
 
 
 
 
163
  role: str
164
  content: str
165
 
166
-
167
- class ChatRequest(BaseModel):
168
- messages: List[Message]
169
 
170
 
171
- # ============================================================
172
- # 4) CHAT ENDPOINT
173
- # ============================================================
174
-
175
  @app.post("/chat")
176
- def chat(req: ChatRequest):
177
- try:
178
- if not req.messages:
179
- return {"reply": "I didn't get any question yet πŸ™‚"}
180
 
181
- user_msg = req.messages[-1].content.strip()
182
- if not user_msg:
183
- return {"reply": "Could you repeat that? I didn't catch it."}
184
 
185
- resume_context = retrieve_relevant_resume_text(user_msg)
186
 
187
- system_prompt = f"""
188
  You are Harshal Sonawane, a real human software engineer from Pune.
189
- Speak casually, friendly, confident.
190
- Never say you are an AI.
191
- Keep replies short (2–4 sentences).
192
- Use resume context when helpful.
193
- If unknown, answer honestly.
194
-
195
- Resume info:
196
- {resume_context}
197
- """.strip()
198
 
199
- chat_messages = [
200
- {"role": "system", "content": system_prompt},
201
- {"role": "user", "content": user_msg},
202
- ]
 
 
203
 
204
- input_ids = tokenizer.apply_chat_template(
205
- chat_messages,
206
- return_tensors="pt",
207
- add_generation_prompt=True,
208
- ).to(llm_model.device)
209
-
210
- with torch.no_grad():
211
- output_ids = llm_model.generate(
212
- input_ids,
213
- max_new_tokens=170,
214
- temperature=0.55,
215
- top_p=0.9,
216
- repetition_penalty=1.08,
217
- do_sample=True,
218
- pad_token_id=tokenizer.eos_token_id
219
- )
220
 
221
- generated = output_ids[0][input_ids.shape[-1]:]
222
- reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
 
 
223
 
224
- if not reply:
225
- reply = "Something went wrong. Try again?"
 
 
 
226
 
227
- return {"reply": reply}
 
 
 
 
 
 
 
228
 
229
- except Exception as e:
230
- print(f"❌ Chat Error: {e}")
231
- return {"reply": "Oops, I hit a snag. Try again shortly."}
232
 
 
233
 
234
- # ============================================================
235
- # 5) ROOT HEALTH CHECK
236
- # ============================================================
237
 
238
  @app.get("/")
239
- def home():
240
- return {"message": "Harshal AI backend running with Phi-3 Mini + RAG 🎯"}
241
-
 
50
  # routes=app.routes
51
  # )
52
 
53
+
54
  from fastapi import FastAPI
55
  from fastapi.middleware.cors import CORSMiddleware
56
  from pydantic import BaseModel
 
61
  import torch
62
  import os
63
 
 
 
 
 
64
  app = FastAPI(
65
  title="Harshal AI Backend",
66
+ version="1.0.0"
 
67
  )
68
 
69
+ # CORS so Next.js can call backend
70
  app.add_middleware(
71
  CORSMiddleware,
72
  allow_origins=["*"],
 
73
  allow_methods=["*"],
74
  allow_headers=["*"],
75
  )
76
 
77
+ # ======================================
78
+ # 1) LOAD MAIN MODEL (Qwen2.5 1.5B)
79
+ # ======================================
80
+ MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
81
 
82
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
83
+ llm = AutoModelForCausalLM.from_pretrained(
84
+ MODEL_NAME,
 
 
 
85
  torch_dtype=torch.float32,
86
  device_map="cpu"
87
  )
88
+ llm.eval()
 
 
 
 
 
89
 
90
+ # ======================================
91
+ # 2) LOAD EMBEDDING MODEL + RAG
92
+ # ======================================
93
+ EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
94
+ embedder = SentenceTransformer(EMBED_MODEL)
95
 
96
+ RESUME = "resume.pdf"
97
+ resume_rag = None
98
 
99
 
100
  def chunk_text(text, max_chars=450, overlap=80):
101
+ text = " ".join(text.split())
102
  chunks, start = [], 0
103
+
104
  while start < len(text):
105
  end = start + max_chars
106
  chunks.append(text[start:end])
107
  start = end - overlap
108
+
109
  return chunks
110
 
111
 
112
+ def build_rag():
113
+ global resume_rag
114
+
115
+ if not os.path.exists(RESUME):
116
+ print("⚠ resume.pdf NOT FOUND β€” RAG disabled")
117
  return
118
 
119
+ reader = PdfReader(RESUME)
120
+ text = ""
 
 
121
 
122
+ for p in reader.pages:
123
+ text += p.extract_text() or ""
 
124
 
125
+ chunks = chunk_text(text)
 
 
 
 
126
 
127
+ embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
128
 
129
+ resume_rag = {"chunks": chunks, "embs": embs}
130
+ print("βœ… RAG Ready with", len(chunks), "chunks")
131
 
132
 
133
+ def get_rag_context(query):
134
+ if resume_rag is None:
 
 
 
 
 
 
 
135
  return ""
136
 
137
+ q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
138
+ sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
139
+ top = torch.topk(sims, k=3)
140
 
141
+ return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)
142
 
 
 
 
143
 
144
+ build_rag()
145
+
146
+ # ======================================
147
+ # 3) SWAGGER DATA MODELS
148
+ # ======================================
149
+ class Msg(BaseModel):
150
  role: str
151
  content: str
152
 
153
+ class ChatReq(BaseModel):
154
+ messages: List[Msg]
 
155
 
156
 
157
+ # ======================================
158
+ # 4) CHAT ROUTE (Human-like Harshal)
159
+ # ======================================
 
160
  @app.post("/chat")
161
+ def chat(req: ChatReq):
 
 
 
162
 
163
+ user_msg = req.messages[-1].content.strip()
 
 
164
 
165
+ rag = get_rag_context(user_msg)
166
 
167
+ persona = f"""
168
  You are Harshal Sonawane, a real human software engineer from Pune.
 
 
 
 
 
 
 
 
 
169
 
170
+ Rules:
171
+ - Do NOT say you are an AI or model.
172
+ - Answer in a human, natural tone.
173
+ - Keep replies short (2–4 sentences).
174
+ - Use resume facts when relevant.
175
+ - If unknown, answer honestly.
176
 
177
+ Resume context:
178
+ {rag}
179
+ """.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ messages = [
182
+ {"role": "system", "content": persona},
183
+ {"role": "user", "content": user_msg}
184
+ ]
185
 
186
+ ids = tokenizer.apply_chat_template(
187
+ messages,
188
+ return_tensors="pt",
189
+ add_generation_prompt=True
190
+ ).to(llm.device)
191
 
192
+ out = llm.generate(
193
+ ids,
194
+ max_new_tokens=150,
195
+ temperature=0.5,
196
+ top_p=0.9,
197
+ repetition_penalty=1.05,
198
+ do_sample=True
199
+ )
200
 
201
+ gen = out[0][ids.shape[-1]:]
202
+ reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
 
203
 
204
+ return {"reply": reply}
205
 
 
 
 
206
 
207
  @app.get("/")
208
+ def health():
209
+ return {"status": "Harshal AI backend is running with Qwen 1.5B + RAG 🎯"}