Spaces:

Harsh123007
/

harshal-portfolio-ai

Sleeping

App Files Files Community

Harsh123007 commited on 15 days ago

Commit

ca245fc

verified ·

1 Parent(s): b986cc9

Update main.py

Browse files

Files changed (1) hide show

main.py +86 -118

main.py CHANGED Viewed

@@ -50,6 +50,7 @@
 #         routes=app.routes
 #     )
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
@@ -60,182 +61,149 @@ from pypdf import PdfReader
 import torch
 import os
-# ============================================================
-# FastAPI APP
-# ============================================================
 app = FastAPI(
     title="Harshal AI Backend",
-    version="1.0.0",
-    description="Human-like AI Assistant for Harshal's Portfolio"
 )
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
-    allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
-# ============================================================
-# 1) LOAD MAIN CHAT MODEL
-# ============================================================
-LLM_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
-print(f"🚀 Loading LLM model: {LLM_MODEL_NAME}")
-tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
-llm_model = AutoModelForCausalLM.from_pretrained(
-    LLM_MODEL_NAME,
     torch_dtype=torch.float32,
     device_map="cpu"
 )
-llm_model.eval()
-print("✅ LLM Loaded Successfully")
-# ============================================================
-# 2) RAG: LOAD EMBEDDINGS FROM RESUME
-# ============================================================
-EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
-RESUME_PATH = "resume.pdf"
-embedder = SentenceTransformer(EMBED_MODEL_NAME)
-resume_vectors = None
 def chunk_text(text, max_chars=450, overlap=80):
-    text = " ".join(text.replace("\n", " ").split())
     chunks, start = [], 0
     while start < len(text):
         end = start + max_chars
         chunks.append(text[start:end])
         start = end - overlap
     return chunks
-def build_resume_rag():
-    global resume_vectors
-    if not os.path.exists(RESUME_PATH):
-        print("⚠️ resume.pdf missing — RAG disabled.")
         return
-    try:
-        reader = PdfReader(RESUME_PATH)
-        text = "".join((p.extract_text() or "") for p in reader.pages)
-        chunks = chunk_text(text)
-        embeddings = embedder.encode(
-            chunks, convert_to_tensor=True, normalize_embeddings=True
-        )
-        resume_vectors = {"chunks": chunks, "embeddings": embeddings}
-        print("✅ Resume RAG ready.")
-    except Exception as e:
-        print(f"❌ RAG Error: {e}")
-        resume_vectors = None
-def embed_query(query: str):
-    return embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
-def retrieve_relevant_resume_text(query: str, top_k=3):
-    if resume_vectors is None:
-        return ""
-    try:
-        q = embed_query(query)
-        sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_vectors["embeddings"])
-        top = torch.topk(sims, k=min(top_k, len(sims))).indices
-        return "\n\n".join(resume_vectors["chunks"][i] for i in top)
-    except:
         return ""
-build_resume_rag()
-# ============================================================
-# 3) CLEAN REQUEST SCHEMA
-# ============================================================
-class Message(BaseModel):
     role: str
     content: str
-class ChatRequest(BaseModel):
-    messages: List[Message]
-# ============================================================
-# 4) CHAT ENDPOINT
-# ============================================================
 @app.post("/chat")
-def chat(req: ChatRequest):
-    try:
-        if not req.messages:
-            return {"reply": "I didn't get any question yet 🙂"}
-        user_msg = req.messages[-1].content.strip()
-        if not user_msg:
-            return {"reply": "Could you repeat that? I didn't catch it."}
-        resume_context = retrieve_relevant_resume_text(user_msg)
-        system_prompt = f"""
 You are Harshal Sonawane, a real human software engineer from Pune.
-Speak casually, friendly, confident.
-Never say you are an AI.
-Keep replies short (2–4 sentences).
-Use resume context when helpful.
-If unknown, answer honestly.
-Resume info:
-{resume_context}
-""".strip()
-        chat_messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_msg},
-        ]
-        input_ids = tokenizer.apply_chat_template(
-            chat_messages,
-            return_tensors="pt",
-            add_generation_prompt=True,
-        ).to(llm_model.device)
-        with torch.no_grad():
-            output_ids = llm_model.generate(
-                input_ids,
-                max_new_tokens=170,
-                temperature=0.55,
-                top_p=0.9,
-                repetition_penalty=1.08,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        generated = output_ids[0][input_ids.shape[-1]:]
-        reply = tokenizer.decode(generated, skip_special_tokens=True).strip()
-        if not reply:
-            reply = "Something went wrong. Try again?"
-        return {"reply": reply}
-    except Exception as e:
-        print(f"❌ Chat Error: {e}")
-        return {"reply": "Oops, I hit a snag. Try again shortly."}
-# ============================================================
-# 5) ROOT HEALTH CHECK
-# ============================================================
 @app.get("/")
-def home():
-    return {"message": "Harshal AI backend running with Phi-3 Mini + RAG 🎯"}

 #         routes=app.routes
 #     )
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import torch
 import os
 app = FastAPI(
     title="Harshal AI Backend",
+    version="1.0.0"
 )
+# CORS so Next.js can call backend
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# ======================================
+# 1) LOAD MAIN MODEL (Qwen2.5 1.5B)
+# ======================================
+MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+llm = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
     torch_dtype=torch.float32,
     device_map="cpu"
 )
+llm.eval()
+# ======================================
+# 2) LOAD EMBEDDING MODEL + RAG
+# ======================================
+EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+embedder = SentenceTransformer(EMBED_MODEL)
+RESUME = "resume.pdf"
+resume_rag = None
 def chunk_text(text, max_chars=450, overlap=80):
+    text = " ".join(text.split())
     chunks, start = [], 0
     while start < len(text):
         end = start + max_chars
         chunks.append(text[start:end])
         start = end - overlap
     return chunks
+def build_rag():
+    global resume_rag
+    if not os.path.exists(RESUME):
+        print("⚠ resume.pdf NOT FOUND — RAG disabled")
         return
+    reader = PdfReader(RESUME)
+    text = ""
+    for p in reader.pages:
+        text += p.extract_text() or ""
+    chunks = chunk_text(text)
+    embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
+    resume_rag = {"chunks": chunks, "embs": embs}
+    print("✅ RAG Ready with", len(chunks), "chunks")
+def get_rag_context(query):
+    if resume_rag is None:
         return ""
+    q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
+    sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
+    top = torch.topk(sims, k=3)
+    return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)
+build_rag()
+# ======================================
+# 3) SWAGGER DATA MODELS
+# ======================================
+class Msg(BaseModel):
     role: str
     content: str
+class ChatReq(BaseModel):
+    messages: List[Msg]
+# ======================================
+# 4) CHAT ROUTE (Human-like Harshal)
+# ======================================
 @app.post("/chat")
+def chat(req: ChatReq):
+    user_msg = req.messages[-1].content.strip()
+    rag = get_rag_context(user_msg)
+    persona = f"""
 You are Harshal Sonawane, a real human software engineer from Pune.
+Rules:
+- Do NOT say you are an AI or model.
+- Answer in a human, natural tone.
+- Keep replies short (2–4 sentences).
+- Use resume facts when relevant.
+- If unknown, answer honestly.
+Resume context:
+{rag}
+""".strip()
+    messages = [
+        {"role": "system", "content": persona},
+        {"role": "user", "content": user_msg}
+    ]
+    ids = tokenizer.apply_chat_template(
+        messages,
+        return_tensors="pt",
+        add_generation_prompt=True
+    ).to(llm.device)
+    out = llm.generate(
+        ids,
+        max_new_tokens=150,
+        temperature=0.5,
+        top_p=0.9,
+        repetition_penalty=1.05,
+        do_sample=True
+    )
+    gen = out[0][ids.shape[-1]:]
+    reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
+    return {"reply": reply}
 @app.get("/")
+def health():
+    return {"status": "Harshal AI backend is running with Qwen 1.5B + RAG 🎯"}