Spaces:

Harsh123007
/

harshal-portfolio-ai

Sleeping

App Files Files Community

Harsh123007 commited on 16 days ago

Commit

9fdd6be

verified ·

1 Parent(s): de38dd1

Update main.py

Browse files

Files changed (1) hide show

main.py +84 -65

main.py CHANGED Viewed

@@ -61,12 +61,15 @@ from pypdf import PdfReader
 import torch
 import os
 app = FastAPI(
     title="Harshal AI Backend",
-    version="1.0.0"
 )
-# CORS so Next.js can call backend
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -74,110 +77,123 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# ======================================
-# 1) LOAD MAIN MODEL (Qwen2.5 1.5B)
-# ======================================
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 llm = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch.float32,
-    device_map="cpu"
 )
 llm.eval()
-# ======================================
-# 2) LOAD EMBEDDING MODEL + RAG
-# ======================================
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 embedder = SentenceTransformer(EMBED_MODEL)
-RESUME = "resume.pdf"
 resume_rag = None
 def chunk_text(text, max_chars=450, overlap=80):
     text = " ".join(text.split())
-    chunks, start = [], 0
     while start < len(text):
         end = start + max_chars
         chunks.append(text[start:end])
         start = end - overlap
     return chunks
-def build_rag():
     global resume_rag
-    if not os.path.exists(RESUME):
-        print("⚠ resume.pdf NOT FOUND — RAG disabled")
         return
-    reader = PdfReader(RESUME)
-    text = ""
-    for p in reader.pages:
-        text += p.extract_text() or ""
-    chunks = chunk_text(text)
-    embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
-    resume_rag = {"chunks": chunks, "embs": embs}
-    print("✅ RAG Ready with", len(chunks), "chunks")
-def get_rag_context(query):
     if resume_rag is None:
         return ""
     q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
     sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
-    top = torch.topk(sims, k=3)
-    return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)
-build_rag()
-# ======================================
-# 3) SWAGGER DATA MODELS
-# ======================================
-class Msg(BaseModel):
     role: str
     content: str
-class ChatReq(BaseModel):
-    messages: List[Msg]
-# ======================================
-# 4) CHAT ROUTE (Human-like Harshal)
-# ======================================
 @app.post("/chat")
-def chat(req: ChatReq):
     user_msg = req.messages[-1].content.strip()
-    rag = get_rag_context(user_msg)
     persona = f"""
 You are Harshal Sonawane, a real human software engineer from Pune.
-Rules:
-- Do NOT say you are an AI or model.
-- Answer in a human, natural tone.
-- Keep replies short (2–4 sentences).
-- Use resume facts when relevant.
-- If unknown, answer honestly.
-Resume context:
-{rag}
 """.strip()
     messages = [
         {"role": "system", "content": persona},
         {"role": "user", "content": user_msg}
@@ -186,24 +202,27 @@ Resume context:
     ids = tokenizer.apply_chat_template(
         messages,
         return_tensors="pt",
-        add_generation_prompt=True
-    ).to(llm.device)
-    out = llm.generate(
-        ids,
-        max_new_tokens=150,
-        temperature=0.5,
-        top_p=0.9,
-        repetition_penalty=1.05,
-        do_sample=True
     )
-    gen = out[0][ids.shape[-1]:]
-    reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
     return {"reply": reply}
 @app.get("/")
-def health():
-    return {"status": "Harshal AI backend is running with Qwen 1.5B + RAG 🎯"}

 import torch
 import os
+# ======================================================
+# FastAPI App
+# ======================================================
 app = FastAPI(
     title="Harshal AI Backend",
+    version="1.0.0",
+    description="Human-like AI for Harshal Portfolio"
 )
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# ======================================================
+# 1) LOAD MAIN MODEL — Qwen2.5 1.5B (CPU Friendly)
+# ======================================================
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
+print(f"🚀 Loading LLM: {MODEL_NAME}")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 llm = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    dtype=torch.float32,      # correct argument, CPU-safe
 )
 llm.eval()
+print("✅ Qwen Loaded Successfully")
+# ======================================================
+# 2) LOAD RESUME + RAG
+# ======================================================
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 embedder = SentenceTransformer(EMBED_MODEL)
+RESUME_PATH = "resume.pdf"
 resume_rag = None
 def chunk_text(text, max_chars=450, overlap=80):
     text = " ".join(text.split())
+    chunks = []
+    start = 0
     while start < len(text):
         end = start + max_chars
         chunks.append(text[start:end])
         start = end - overlap
     return chunks
+def build_resume_rag():
+    """Read resume.pdf, chunk it, embed it."""
     global resume_rag
+    if not os.path.exists(RESUME_PATH):
+        print("⚠ resume.pdf not found — RAG disabled")
         return
+    try:
+        reader = PdfReader(RESUME_PATH)
+        full = ""
+        for p in reader.pages:
+            full += p.extract_text() or ""
+        chunks = chunk_text(full)
+        embs = embedder.encode(
+            chunks,
+            convert_to_tensor=True,
+            normalize_embeddings=True
+        )
+        resume_rag = {"chunks": chunks, "embs": embs}
+        print(f"✅ Resume RAG ready ({len(chunks)} chunks)")
+    except Exception as e:
+        print("❌ RAG build error:", e)
+        resume_rag = None
+def get_rag_context(query: str, top_k=3):
     if resume_rag is None:
         return ""
     q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
     sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
+    idx = torch.topk(sims, k=min(top_k, len(sims))).indices
+    return "\n\n".join(resume_rag["chunks"][i] for i in idx)
+build_resume_rag()
+# ======================================================
+# 3) API BODY DEFINITIONS — CLEAN SWAGGER
+# ======================================================
+class Message(BaseModel):
     role: str
     content: str
+class ChatRequest(BaseModel):
+    messages: List[Message]
+# ======================================================
+# 4) CHAT ROUTE — HUMAN-LIKE HARSHAAL AI
+# ======================================================
 @app.post("/chat")
+def chat(req: ChatRequest):
+    if not req.messages:
+        return {"reply": "Hey, ask me something 🙂"}
     user_msg = req.messages[-1].content.strip()
+    # -------- Retrieve relevant resume context --------
+    rag_text = get_rag_context(user_msg)
+    # -------- Persona --------
     persona = f"""
 You are Harshal Sonawane, a real human software engineer from Pune.
+Speak naturally, casual but professional.
+Never say you're an AI or model.
+Use resume context when relevant.
+Keep replies short (2–4 sentences).
+Resume knowledge:
+{rag_text}
 """.strip()
+    # -------- Model Chat Format --------
     messages = [
         {"role": "system", "content": persona},
         {"role": "user", "content": user_msg}
     ids = tokenizer.apply_chat_template(
         messages,
         return_tensors="pt",
+        add_generation_prompt=True,
     )
+    with torch.no_grad():
+        out = llm.generate(
+            ids,
+            max_new_tokens=150,
+            temperature=0.45,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            do_sample=True
+        )
+    reply = tokenizer.decode(out[0][ids.shape[-1]:], skip_special_tokens=True).strip()
     return {"reply": reply}
+# ======================================================
+# 5) HEALTH CHECK
+# ======================================================
 @app.get("/")
+def root():
+    return {"status": "Harshal AI backend is running (Qwen2.5 + RAG) 🎯"}