Harsh123007 commited on
Commit
9fdd6be
Β·
verified Β·
1 Parent(s): de38dd1

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +84 -65
main.py CHANGED
@@ -61,12 +61,15 @@ from pypdf import PdfReader
61
  import torch
62
  import os
63
 
 
 
 
64
  app = FastAPI(
65
  title="Harshal AI Backend",
66
- version="1.0.0"
 
67
  )
68
 
69
- # CORS so Next.js can call backend
70
  app.add_middleware(
71
  CORSMiddleware,
72
  allow_origins=["*"],
@@ -74,110 +77,123 @@ app.add_middleware(
74
  allow_headers=["*"],
75
  )
76
 
77
- # ======================================
78
- # 1) LOAD MAIN MODEL (Qwen2.5 1.5B)
79
- # ======================================
80
  MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
81
 
 
82
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
83
  llm = AutoModelForCausalLM.from_pretrained(
84
  MODEL_NAME,
85
- torch_dtype=torch.float32,
86
- device_map="cpu"
87
  )
88
  llm.eval()
89
 
90
- # ======================================
91
- # 2) LOAD EMBEDDING MODEL + RAG
92
- # ======================================
 
 
93
  EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
94
  embedder = SentenceTransformer(EMBED_MODEL)
95
 
96
- RESUME = "resume.pdf"
97
  resume_rag = None
98
 
99
 
100
  def chunk_text(text, max_chars=450, overlap=80):
101
  text = " ".join(text.split())
102
- chunks, start = [], 0
103
-
104
  while start < len(text):
105
  end = start + max_chars
106
  chunks.append(text[start:end])
107
  start = end - overlap
108
-
109
  return chunks
110
 
111
 
112
- def build_rag():
 
113
  global resume_rag
114
 
115
- if not os.path.exists(RESUME):
116
- print("⚠ resume.pdf NOT FOUND β€” RAG disabled")
117
  return
118
 
119
- reader = PdfReader(RESUME)
120
- text = ""
121
-
122
- for p in reader.pages:
123
- text += p.extract_text() or ""
124
 
125
- chunks = chunk_text(text)
 
 
 
 
 
 
 
126
 
127
- embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
 
 
128
 
129
- resume_rag = {"chunks": chunks, "embs": embs}
130
- print("βœ… RAG Ready with", len(chunks), "chunks")
131
 
132
-
133
- def get_rag_context(query):
134
  if resume_rag is None:
135
  return ""
136
 
137
  q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
138
  sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
139
- top = torch.topk(sims, k=3)
140
 
141
- return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)
142
 
143
 
144
- build_rag()
145
 
146
- # ======================================
147
- # 3) SWAGGER DATA MODELS
148
- # ======================================
149
- class Msg(BaseModel):
150
  role: str
151
  content: str
152
 
153
- class ChatReq(BaseModel):
154
- messages: List[Msg]
 
155
 
156
 
157
- # ======================================
158
- # 4) CHAT ROUTE (Human-like Harshal)
159
- # ======================================
160
  @app.post("/chat")
161
- def chat(req: ChatReq):
 
 
 
162
 
163
  user_msg = req.messages[-1].content.strip()
164
 
165
- rag = get_rag_context(user_msg)
 
166
 
 
167
  persona = f"""
168
  You are Harshal Sonawane, a real human software engineer from Pune.
 
 
 
 
169
 
170
- Rules:
171
- - Do NOT say you are an AI or model.
172
- - Answer in a human, natural tone.
173
- - Keep replies short (2–4 sentences).
174
- - Use resume facts when relevant.
175
- - If unknown, answer honestly.
176
-
177
- Resume context:
178
- {rag}
179
  """.strip()
180
 
 
181
  messages = [
182
  {"role": "system", "content": persona},
183
  {"role": "user", "content": user_msg}
@@ -186,24 +202,27 @@ Resume context:
186
  ids = tokenizer.apply_chat_template(
187
  messages,
188
  return_tensors="pt",
189
- add_generation_prompt=True
190
- ).to(llm.device)
191
-
192
- out = llm.generate(
193
- ids,
194
- max_new_tokens=150,
195
- temperature=0.5,
196
- top_p=0.9,
197
- repetition_penalty=1.05,
198
- do_sample=True
199
  )
200
 
201
- gen = out[0][ids.shape[-1]:]
202
- reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
 
 
 
 
 
 
 
 
 
203
 
204
  return {"reply": reply}
205
 
206
 
 
 
 
207
  @app.get("/")
208
- def health():
209
- return {"status": "Harshal AI backend is running with Qwen 1.5B + RAG 🎯"}
 
61
  import torch
62
  import os
63
 
64
+ # ======================================================
65
+ # FastAPI App
66
+ # ======================================================
67
  app = FastAPI(
68
  title="Harshal AI Backend",
69
+ version="1.0.0",
70
+ description="Human-like AI for Harshal Portfolio"
71
  )
72
 
 
73
  app.add_middleware(
74
  CORSMiddleware,
75
  allow_origins=["*"],
 
77
  allow_headers=["*"],
78
  )
79
 
80
+ # ======================================================
81
+ # 1) LOAD MAIN MODEL β€” Qwen2.5 1.5B (CPU Friendly)
82
+ # ======================================================
83
  MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
84
 
85
+ print(f"πŸš€ Loading LLM: {MODEL_NAME}")
86
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
87
+
88
  llm = AutoModelForCausalLM.from_pretrained(
89
  MODEL_NAME,
90
+ dtype=torch.float32, # correct argument, CPU-safe
 
91
  )
92
  llm.eval()
93
 
94
+ print("βœ… Qwen Loaded Successfully")
95
+
96
+ # ======================================================
97
+ # 2) LOAD RESUME + RAG
98
+ # ======================================================
99
  EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
100
  embedder = SentenceTransformer(EMBED_MODEL)
101
 
102
+ RESUME_PATH = "resume.pdf"
103
  resume_rag = None
104
 
105
 
106
  def chunk_text(text, max_chars=450, overlap=80):
107
  text = " ".join(text.split())
108
+ chunks = []
109
+ start = 0
110
  while start < len(text):
111
  end = start + max_chars
112
  chunks.append(text[start:end])
113
  start = end - overlap
 
114
  return chunks
115
 
116
 
117
+ def build_resume_rag():
118
+ """Read resume.pdf, chunk it, embed it."""
119
  global resume_rag
120
 
121
+ if not os.path.exists(RESUME_PATH):
122
+ print("⚠ resume.pdf not found β€” RAG disabled")
123
  return
124
 
125
+ try:
126
+ reader = PdfReader(RESUME_PATH)
127
+ full = ""
128
+ for p in reader.pages:
129
+ full += p.extract_text() or ""
130
 
131
+ chunks = chunk_text(full)
132
+ embs = embedder.encode(
133
+ chunks,
134
+ convert_to_tensor=True,
135
+ normalize_embeddings=True
136
+ )
137
+ resume_rag = {"chunks": chunks, "embs": embs}
138
+ print(f"βœ… Resume RAG ready ({len(chunks)} chunks)")
139
 
140
+ except Exception as e:
141
+ print("❌ RAG build error:", e)
142
+ resume_rag = None
143
 
 
 
144
 
145
+ def get_rag_context(query: str, top_k=3):
 
146
  if resume_rag is None:
147
  return ""
148
 
149
  q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
150
  sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
151
+ idx = torch.topk(sims, k=min(top_k, len(sims))).indices
152
 
153
+ return "\n\n".join(resume_rag["chunks"][i] for i in idx)
154
 
155
 
156
+ build_resume_rag()
157
 
158
+ # ======================================================
159
+ # 3) API BODY DEFINITIONS β€” CLEAN SWAGGER
160
+ # ======================================================
161
+ class Message(BaseModel):
162
  role: str
163
  content: str
164
 
165
+
166
+ class ChatRequest(BaseModel):
167
+ messages: List[Message]
168
 
169
 
170
+ # ======================================================
171
+ # 4) CHAT ROUTE β€” HUMAN-LIKE HARSHAAL AI
172
+ # ======================================================
173
  @app.post("/chat")
174
+ def chat(req: ChatRequest):
175
+
176
+ if not req.messages:
177
+ return {"reply": "Hey, ask me something πŸ™‚"}
178
 
179
  user_msg = req.messages[-1].content.strip()
180
 
181
+ # -------- Retrieve relevant resume context --------
182
+ rag_text = get_rag_context(user_msg)
183
 
184
+ # -------- Persona --------
185
  persona = f"""
186
  You are Harshal Sonawane, a real human software engineer from Pune.
187
+ Speak naturally, casual but professional.
188
+ Never say you're an AI or model.
189
+ Use resume context when relevant.
190
+ Keep replies short (2–4 sentences).
191
 
192
+ Resume knowledge:
193
+ {rag_text}
 
 
 
 
 
 
 
194
  """.strip()
195
 
196
+ # -------- Model Chat Format --------
197
  messages = [
198
  {"role": "system", "content": persona},
199
  {"role": "user", "content": user_msg}
 
202
  ids = tokenizer.apply_chat_template(
203
  messages,
204
  return_tensors="pt",
205
+ add_generation_prompt=True,
 
 
 
 
 
 
 
 
 
206
  )
207
 
208
+ with torch.no_grad():
209
+ out = llm.generate(
210
+ ids,
211
+ max_new_tokens=150,
212
+ temperature=0.45,
213
+ top_p=0.9,
214
+ repetition_penalty=1.1,
215
+ do_sample=True
216
+ )
217
+
218
+ reply = tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True).strip()
219
 
220
  return {"reply": reply}
221
 
222
 
223
+ # ======================================================
224
+ # 5) HEALTH CHECK
225
+ # ======================================================
226
  @app.get("/")
227
+ def root():
228
+ return {"status": "Harshal AI backend is running (Qwen2.5 + RAG) 🎯"}