Spaces:

Harsh123007
/

harshal-portfolio-ai

Sleeping

App Files Files Community

Harsh123007 commited on 17 days ago

Commit

a34510d

verified ·

1 Parent(s): 1a2efd8

Update main.py

Browse files

Files changed (1) hide show

main.py +66 -73

main.py CHANGED Viewed

@@ -60,15 +60,12 @@ from sentence_transformers import SentenceTransformer
 from pypdf import PdfReader
 import torch, os
-# ======================================
-# FastAPI Base
-# ======================================
 app = FastAPI(
     title="Harshal AI Backend",
     version="1.0.0",
-    description="Human-like assistant bound to Harshal's real resume facts."
 )
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -76,77 +73,72 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# ======================================
-# MODEL (Qwen2.5 1.5B)
-# ======================================
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 llm = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    dtype=torch.float32,
-    device_map="cpu"
 )
 llm.eval()
-# ======================================
-# EMBEDDING MODEL
-# ======================================
 embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-resume_data = None
 RESUME_FILE = "resume.pdf"
-# ======================================
-# UTIL: Chunk Resume
-# ======================================
-def chunk_text(text, size=450, overlap=80):
     text = " ".join(text.split())
-    out, start = [], 0
     while start < len(text):
         end = start + size
-        out.append(text[start:end])
         start = end - overlap
-    return out
-# ======================================
-# BUILD RAG
-# ======================================
-def load_resume():
-    global resume_data
     if not os.path.exists(RESUME_FILE):
-        print("❌ resume.pdf not found")
         return
     reader = PdfReader(RESUME_FILE)
     text = ""
-    for pg in reader.pages:
-        text += pg.extract_text() or ""
-    chunks = chunk_text(text)
-    embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
-    resume_data = {"chunks": chunks, "embs": embs}
-    print(f"✅ Resume RAG loaded with {len(chunks)} chunks")
-def get_resume_context(query, top_k=3):
-    if resume_data is None:
         return ""
-    q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
-    sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_data["embs"])
-    top = torch.topk(sims, k=top_k)
-    return "\n\n".join(resume_data["chunks"][i] for i in top.indices)
-load_resume()
-# ======================================
-# SCHEMAS
-# ======================================
 class Msg(BaseModel):
     role: str
     content: str
@@ -154,59 +146,60 @@ class Msg(BaseModel):
 class ChatReq(BaseModel):
     messages: List[Msg]
-# ======================================
-# CHAT
-# ======================================
 @app.post("/chat")
 def chat(req: ChatReq):
     user_msg = req.messages[-1].content.strip()
-    resume_context = get_resume_context(user_msg)
-    # STRICT anti-hallucination persona
     persona = f"""
 You are Harshal Sonawane, a real human software engineer from Pune.
-VERY IMPORTANT RULES:
-- NEVER say you are an AI, model, assistant, chatbot.
-- ONLY answer using information available in the resume context.
-- If the resume does NOT contain the answer → say: "I haven't mentioned this in my resume."
-- Do NOT invent employment history, projects, companies, or skills.
-- Use a natural, short, human tone (2–4 sentences).
-- Stay consistent with resume facts.
-Resume facts (DO NOT IGNORE):
-{resume_context}
 """.strip()
-    messages = [
         {"role": "system", "content": persona},
-        {"role": "user", "content": user_msg}
     ]
     ids = tokenizer.apply_chat_template(
-        messages,
         return_tensors="pt",
         add_generation_prompt=True
-    ).to(llm.device)
     out = llm.generate(
         ids,
-        max_new_tokens=160,
         temperature=0.45,
         top_p=0.9,
         repetition_penalty=1.1,
-        do_sample=True,
-        pad_token_id=tokenizer.eos_token_id
     )
-    reply = tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True).strip()
-    return {"reply": reply}
-# ======================================
-# HEALTH
-# ======================================
 @app.get("/")
 def health():
-    return {"status": "Harshal AI backend running with Qwen 1.5B + strict RAG 🎯"}

 from pypdf import PdfReader
 import torch, os
 app = FastAPI(
     title="Harshal AI Backend",
     version="1.0.0",
 )
+# Allow requests from anywhere (Next.js frontend)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# =======================================================
+# 1) LOAD Qwen 1.5B (NO device_map, NO accelerate needed)
+# =======================================================
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# ❗ Load normally then move to CPU
 llm = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    torch_dtype=torch.float32,
 )
+llm = llm.to("cpu")
 llm.eval()
+# =======================================================
+# 2) RAG (Resume Embeddings)
+# =======================================================
 embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 RESUME_FILE = "resume.pdf"
+RAG = None
+def chunk(text, size=450, overlap=80):
     text = " ".join(text.split())
+    chunks, start = [], 0
     while start < len(text):
         end = start + size
+        chunks.append(text[start:end])
         start = end - overlap
+    return chunks
+def build_rag():
+    global RAG
     if not os.path.exists(RESUME_FILE):
+        print("⚠ resume.pdf NOT FOUND — RAG DISABLED")
         return
     reader = PdfReader(RESUME_FILE)
     text = ""
+    for p in reader.pages:
+        text += p.extract_text() or ""
+    c = chunk(text)
+    e = embedder.encode(c, normalize_embeddings=True, convert_to_tensor=True)
+    RAG = {"chunks": c, "embs": e}
+    print(f"✅ RAG ready with {len(c)} chunks")
+def search_rag(q):
+    if RAG is None:
         return ""
+    qv = embedder.encode([q], normalize_embeddings=True, convert_to_tensor=True)[0]
+    sims = torch.nn.functional.cosine_similarity(qv.unsqueeze(0), RAG["embs"])
+    top = torch.topk(sims, 3).indices
+    return "\n\n".join(RAG["chunks"][i] for i in top)
+build_rag()
+# =======================================================
+# 3) SCHEMAS
+# =======================================================
 class Msg(BaseModel):
     role: str
     content: str
 class ChatReq(BaseModel):
     messages: List[Msg]
+# =======================================================
+# 4) CHAT ENDPOINT
+# =======================================================
 @app.post("/chat")
 def chat(req: ChatReq):
     user_msg = req.messages[-1].content.strip()
+    rag = search_rag(user_msg)
     persona = f"""
 You are Harshal Sonawane, a real human software engineer from Pune.
+STRICT RULES:
+- Never say you are an AI or model.
+- ONLY answer using resume facts below.
+- If the resume does not contain the answer → say:
+  "I haven't mentioned this in my resume."
+- Do not guess or invent.
+- Speak naturally like a human.
+- Keep replies short (2–4 sentences).
+Resume facts:
+{rag}
 """.strip()
+    msgs = [
         {"role": "system", "content": persona},
+        {"role": "user", "content": user_msg},
     ]
     ids = tokenizer.apply_chat_template(
+        msgs,
         return_tensors="pt",
         add_generation_prompt=True
+    ).to("cpu")
     out = llm.generate(
         ids,
+        max_new_tokens=150,
         temperature=0.45,
         top_p=0.9,
         repetition_penalty=1.1,
+        pad_token_id=tokenizer.eos_token_id,
+        do_sample=True
     )
+    resp = tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True).strip()
+    return {"reply": resp}
+# =======================================================
+# 5) HEALTH CHECK
+# =======================================================
 @app.get("/")
 def health():
+    return {"status": "Harshal AI backend running with Qwen 1.5B + RAG 🎯"}