Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -70,7 +70,6 @@ app = FastAPI(
|
|
| 70 |
description="Human-like AI Assistant for Harshal's Portfolio"
|
| 71 |
)
|
| 72 |
|
| 73 |
-
# Allow everything (your Next.js frontend)
|
| 74 |
app.add_middleware(
|
| 75 |
CORSMiddleware,
|
| 76 |
allow_origins=["*"],
|
|
@@ -80,7 +79,7 @@ app.add_middleware(
|
|
| 80 |
)
|
| 81 |
|
| 82 |
# ============================================================
|
| 83 |
-
# 1) LOAD MAIN CHAT MODEL
|
| 84 |
# ============================================================
|
| 85 |
|
| 86 |
LLM_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
|
|
@@ -96,66 +95,45 @@ llm_model.eval()
|
|
| 96 |
print("β
LLM Loaded Successfully")
|
| 97 |
|
| 98 |
# ============================================================
|
| 99 |
-
# 2) LOAD
|
| 100 |
# ============================================================
|
| 101 |
|
| 102 |
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
| 103 |
RESUME_PATH = "resume.pdf"
|
| 104 |
|
| 105 |
-
print(f"π Loading embedding model: {EMBED_MODEL_NAME}")
|
| 106 |
embedder = SentenceTransformer(EMBED_MODEL_NAME)
|
| 107 |
-
|
| 108 |
|
| 109 |
-
resume_vectors = None # {"chunks": [...], "embeddings": tensor[]}
|
| 110 |
|
| 111 |
-
|
| 112 |
-
def chunk_text(text: str, max_chars=450, overlap=80):
|
| 113 |
text = " ".join(text.replace("\n", " ").split())
|
| 114 |
-
chunks = []
|
| 115 |
-
start = 0
|
| 116 |
-
|
| 117 |
while start < len(text):
|
| 118 |
end = start + max_chars
|
| 119 |
chunks.append(text[start:end])
|
| 120 |
start = end - overlap
|
| 121 |
-
|
| 122 |
return chunks
|
| 123 |
|
| 124 |
|
| 125 |
def build_resume_rag():
|
| 126 |
-
"""Reads resume.pdf, chunks it, builds embeddings"""
|
| 127 |
global resume_vectors
|
| 128 |
-
|
| 129 |
if not os.path.exists(RESUME_PATH):
|
| 130 |
print("β οΈ resume.pdf missing β RAG disabled.")
|
| 131 |
return
|
| 132 |
|
| 133 |
-
print("π Reading resume.pdf...")
|
| 134 |
try:
|
| 135 |
reader = PdfReader(RESUME_PATH)
|
| 136 |
-
text = ""
|
| 137 |
-
|
| 138 |
-
for page in reader.pages:
|
| 139 |
-
text += page.extract_text() or ""
|
| 140 |
-
|
| 141 |
chunks = chunk_text(text)
|
| 142 |
-
print(f"βοΈ Resume split into {len(chunks)} chunks.")
|
| 143 |
|
| 144 |
embeddings = embedder.encode(
|
| 145 |
-
chunks,
|
| 146 |
-
convert_to_tensor=True,
|
| 147 |
-
normalize_embeddings=True
|
| 148 |
)
|
| 149 |
|
| 150 |
-
resume_vectors = {
|
| 151 |
-
"chunks": chunks,
|
| 152 |
-
"embeddings": embeddings
|
| 153 |
-
}
|
| 154 |
-
|
| 155 |
print("β
Resume RAG ready.")
|
| 156 |
-
|
| 157 |
except Exception as e:
|
| 158 |
-
print(f"β
|
| 159 |
resume_vectors = None
|
| 160 |
|
| 161 |
|
|
@@ -166,29 +144,29 @@ def embed_query(query: str):
|
|
| 166 |
def retrieve_relevant_resume_text(query: str, top_k=3):
|
| 167 |
if resume_vectors is None:
|
| 168 |
return ""
|
| 169 |
-
|
| 170 |
try:
|
| 171 |
q = embed_query(query)
|
| 172 |
sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_vectors["embeddings"])
|
| 173 |
-
top = torch.topk(sims, k=min(top_k, len(sims)))
|
| 174 |
-
return "\n\n".join(resume_vectors["chunks"][i] for i in top
|
| 175 |
except:
|
| 176 |
return ""
|
| 177 |
|
| 178 |
|
| 179 |
-
# Build RAG on startup
|
| 180 |
build_resume_rag()
|
| 181 |
|
| 182 |
# ============================================================
|
| 183 |
-
# 3) CLEAN REQUEST SCHEMA
|
| 184 |
# ============================================================
|
| 185 |
|
| 186 |
class Message(BaseModel):
|
| 187 |
role: str
|
| 188 |
content: str
|
| 189 |
|
|
|
|
| 190 |
class ChatRequest(BaseModel):
|
| 191 |
-
messages: List
|
|
|
|
| 192 |
|
| 193 |
# ============================================================
|
| 194 |
# 4) CHAT ENDPOINT
|
|
@@ -196,44 +174,28 @@ class ChatRequest(BaseModel):
|
|
| 196 |
|
| 197 |
@app.post("/chat")
|
| 198 |
def chat(req: ChatRequest):
|
| 199 |
-
"""
|
| 200 |
-
Chat endpoint for Harshal AI.
|
| 201 |
-
Expects:
|
| 202 |
-
{
|
| 203 |
-
"messages": [
|
| 204 |
-
{ "role": "user", "content": "Where are you from?" }
|
| 205 |
-
]
|
| 206 |
-
}
|
| 207 |
-
Returns:
|
| 208 |
-
{ "reply": "..." }
|
| 209 |
-
"""
|
| 210 |
-
|
| 211 |
try:
|
| 212 |
if not req.messages:
|
| 213 |
return {"reply": "I didn't get any question yet π"}
|
| 214 |
|
| 215 |
user_msg = req.messages[-1].content.strip()
|
| 216 |
-
|
| 217 |
if not user_msg:
|
| 218 |
return {"reply": "Could you repeat that? I didn't catch it."}
|
| 219 |
|
| 220 |
-
# ---- GET RAG TEXT ----
|
| 221 |
resume_context = retrieve_relevant_resume_text(user_msg)
|
| 222 |
|
| 223 |
-
# ---- Persona ----
|
| 224 |
system_prompt = f"""
|
| 225 |
-
You are Harshal Sonawane, a real human software engineer from Pune
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
Use resume
|
| 230 |
-
If
|
| 231 |
-
|
| 232 |
-
|
| 233 |
{resume_context}
|
| 234 |
""".strip()
|
| 235 |
|
| 236 |
-
# ---- Chat format for Phi-3 ----
|
| 237 |
chat_messages = [
|
| 238 |
{"role": "system", "content": system_prompt},
|
| 239 |
{"role": "user", "content": user_msg},
|
|
@@ -260,13 +222,14 @@ Relevant resume details:
|
|
| 260 |
reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
|
| 261 |
|
| 262 |
if not reply:
|
| 263 |
-
reply = "
|
| 264 |
|
| 265 |
return {"reply": reply}
|
| 266 |
|
| 267 |
except Exception as e:
|
| 268 |
print(f"β Chat Error: {e}")
|
| 269 |
-
return {"reply": "Oops, I hit a snag.
|
|
|
|
| 270 |
|
| 271 |
# ============================================================
|
| 272 |
# 5) ROOT HEALTH CHECK
|
|
@@ -275,3 +238,4 @@ Relevant resume details:
|
|
| 275 |
@app.get("/")
|
| 276 |
def home():
|
| 277 |
return {"message": "Harshal AI backend running with Phi-3 Mini + RAG π―"}
|
|
|
|
|
|
| 70 |
description="Human-like AI Assistant for Harshal's Portfolio"
|
| 71 |
)
|
| 72 |
|
|
|
|
| 73 |
app.add_middleware(
|
| 74 |
CORSMiddleware,
|
| 75 |
allow_origins=["*"],
|
|
|
|
| 79 |
)
|
| 80 |
|
| 81 |
# ============================================================
|
| 82 |
+
# 1) LOAD MAIN CHAT MODEL
|
| 83 |
# ============================================================
|
| 84 |
|
| 85 |
LLM_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
|
|
|
|
| 95 |
print("β
LLM Loaded Successfully")
|
| 96 |
|
| 97 |
# ============================================================
|
| 98 |
+
# 2) RAG: LOAD EMBEDDINGS FROM RESUME
|
| 99 |
# ============================================================
|
| 100 |
|
| 101 |
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
| 102 |
RESUME_PATH = "resume.pdf"
|
| 103 |
|
|
|
|
| 104 |
embedder = SentenceTransformer(EMBED_MODEL_NAME)
|
| 105 |
+
resume_vectors = None
|
| 106 |
|
|
|
|
| 107 |
|
| 108 |
+
def chunk_text(text, max_chars=450, overlap=80):
|
|
|
|
| 109 |
text = " ".join(text.replace("\n", " ").split())
|
| 110 |
+
chunks, start = [], 0
|
|
|
|
|
|
|
| 111 |
while start < len(text):
|
| 112 |
end = start + max_chars
|
| 113 |
chunks.append(text[start:end])
|
| 114 |
start = end - overlap
|
|
|
|
| 115 |
return chunks
|
| 116 |
|
| 117 |
|
| 118 |
def build_resume_rag():
|
|
|
|
| 119 |
global resume_vectors
|
|
|
|
| 120 |
if not os.path.exists(RESUME_PATH):
|
| 121 |
print("β οΈ resume.pdf missing β RAG disabled.")
|
| 122 |
return
|
| 123 |
|
|
|
|
| 124 |
try:
|
| 125 |
reader = PdfReader(RESUME_PATH)
|
| 126 |
+
text = "".join((p.extract_text() or "") for p in reader.pages)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
chunks = chunk_text(text)
|
|
|
|
| 128 |
|
| 129 |
embeddings = embedder.encode(
|
| 130 |
+
chunks, convert_to_tensor=True, normalize_embeddings=True
|
|
|
|
|
|
|
| 131 |
)
|
| 132 |
|
| 133 |
+
resume_vectors = {"chunks": chunks, "embeddings": embeddings}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
print("β
Resume RAG ready.")
|
|
|
|
| 135 |
except Exception as e:
|
| 136 |
+
print(f"β RAG Error: {e}")
|
| 137 |
resume_vectors = None
|
| 138 |
|
| 139 |
|
|
|
|
| 144 |
def retrieve_relevant_resume_text(query: str, top_k=3):
|
| 145 |
if resume_vectors is None:
|
| 146 |
return ""
|
|
|
|
| 147 |
try:
|
| 148 |
q = embed_query(query)
|
| 149 |
sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_vectors["embeddings"])
|
| 150 |
+
top = torch.topk(sims, k=min(top_k, len(sims))).indices
|
| 151 |
+
return "\n\n".join(resume_vectors["chunks"][i] for i in top)
|
| 152 |
except:
|
| 153 |
return ""
|
| 154 |
|
| 155 |
|
|
|
|
| 156 |
build_resume_rag()
|
| 157 |
|
| 158 |
# ============================================================
|
| 159 |
+
# 3) CLEAN REQUEST SCHEMA
|
| 160 |
# ============================================================
|
| 161 |
|
| 162 |
class Message(BaseModel):
|
| 163 |
role: str
|
| 164 |
content: str
|
| 165 |
|
| 166 |
+
|
| 167 |
class ChatRequest(BaseModel):
|
| 168 |
+
messages: List[Message]
|
| 169 |
+
|
| 170 |
|
| 171 |
# ============================================================
|
| 172 |
# 4) CHAT ENDPOINT
|
|
|
|
| 174 |
|
| 175 |
@app.post("/chat")
|
| 176 |
def chat(req: ChatRequest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
try:
|
| 178 |
if not req.messages:
|
| 179 |
return {"reply": "I didn't get any question yet π"}
|
| 180 |
|
| 181 |
user_msg = req.messages[-1].content.strip()
|
|
|
|
| 182 |
if not user_msg:
|
| 183 |
return {"reply": "Could you repeat that? I didn't catch it."}
|
| 184 |
|
|
|
|
| 185 |
resume_context = retrieve_relevant_resume_text(user_msg)
|
| 186 |
|
|
|
|
| 187 |
system_prompt = f"""
|
| 188 |
+
You are Harshal Sonawane, a real human software engineer from Pune.
|
| 189 |
+
Speak casually, friendly, confident.
|
| 190 |
+
Never say you are an AI.
|
| 191 |
+
Keep replies short (2β4 sentences).
|
| 192 |
+
Use resume context when helpful.
|
| 193 |
+
If unknown, answer honestly.
|
| 194 |
+
|
| 195 |
+
Resume info:
|
| 196 |
{resume_context}
|
| 197 |
""".strip()
|
| 198 |
|
|
|
|
| 199 |
chat_messages = [
|
| 200 |
{"role": "system", "content": system_prompt},
|
| 201 |
{"role": "user", "content": user_msg},
|
|
|
|
| 222 |
reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
|
| 223 |
|
| 224 |
if not reply:
|
| 225 |
+
reply = "Something went wrong. Try again?"
|
| 226 |
|
| 227 |
return {"reply": reply}
|
| 228 |
|
| 229 |
except Exception as e:
|
| 230 |
print(f"β Chat Error: {e}")
|
| 231 |
+
return {"reply": "Oops, I hit a snag. Try again shortly."}
|
| 232 |
+
|
| 233 |
|
| 234 |
# ============================================================
|
| 235 |
# 5) ROOT HEALTH CHECK
|
|
|
|
| 238 |
@app.get("/")
|
| 239 |
def home():
|
| 240 |
return {"message": "Harshal AI backend running with Phi-3 Mini + RAG π―"}
|
| 241 |
+
|