Spaces:
Sleeping
Sleeping
| # from fastapi import FastAPI | |
| # from pydantic import BaseModel | |
| # from fastapi.openapi.utils import get_openapi | |
| # from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # import torch | |
| # app = FastAPI( | |
| # title="Harshal AI Backend", | |
| # version="1.0.0", | |
| # ) | |
| # MODEL_NAME = "Qwen/Qwen2.5-0.5B" | |
| # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| # model = AutoModelForCausalLM.from_pretrained( | |
| # MODEL_NAME, | |
| # torch_dtype=torch.float32, | |
| # device_map="cpu", | |
| # ) | |
| # class ChatMessage(BaseModel): | |
| # messages: list | |
| # @app.get("/") | |
| # def home(): | |
| # return {"message": "Harshal AI backend running with Qwen 0.5B!"} | |
| # @app.post("/chat") | |
| # def chat(body: ChatMessage): | |
| # user_msg = body.messages[-1]["content"] | |
| # prompt = f"User: {user_msg}\nAssistant:" | |
| # inputs = tokenizer(prompt, return_tensors="pt") | |
| # outputs = model.generate( | |
| # **inputs, | |
| # max_new_tokens=120, | |
| # pad_token_id=tokenizer.eos_token_id, | |
| # temperature=0.4, | |
| # ) | |
| # text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # reply = text.split("Assistant:")[-1].strip() | |
| # return {"reply": reply} | |
| # @app.get("/openapi.json") | |
| # def openapi_json(): | |
| # return get_openapi( | |
| # title="Harshal AI Backend", | |
| # version="1.0.0", | |
| # routes=app.routes | |
| # ) | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import List | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from sentence_transformers import SentenceTransformer | |
| from pypdf import PdfReader | |
| import torch | |
| import os | |
| app = FastAPI( | |
| title="Harshal AI Backend", | |
| version="1.0.0" | |
| ) | |
| # CORS so Next.js can call backend | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # ====================================== | |
| # 1) LOAD MAIN MODEL (Qwen2.5 1.5B) | |
| # ====================================== | |
| MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| llm = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float32, | |
| device_map="cpu" | |
| ) | |
| llm.eval() | |
| # ====================================== | |
| # 2) LOAD EMBEDDING MODEL + RAG | |
| # ====================================== | |
| EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" | |
| embedder = SentenceTransformer(EMBED_MODEL) | |
| RESUME = "resume.pdf" | |
| resume_rag = None | |
| def chunk_text(text, max_chars=450, overlap=80): | |
| text = " ".join(text.split()) | |
| chunks, start = [], 0 | |
| while start < len(text): | |
| end = start + max_chars | |
| chunks.append(text[start:end]) | |
| start = end - overlap | |
| return chunks | |
| def build_rag(): | |
| global resume_rag | |
| if not os.path.exists(RESUME): | |
| print("β resume.pdf NOT FOUND β RAG disabled") | |
| return | |
| reader = PdfReader(RESUME) | |
| text = "" | |
| for p in reader.pages: | |
| text += p.extract_text() or "" | |
| chunks = chunk_text(text) | |
| embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True) | |
| resume_rag = {"chunks": chunks, "embs": embs} | |
| print("β RAG Ready with", len(chunks), "chunks") | |
| def get_rag_context(query): | |
| if resume_rag is None: | |
| return "" | |
| q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0] | |
| sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"]) | |
| top = torch.topk(sims, k=3) | |
| return "\n\n".join(resume_rag["chunks"][i] for i in top.indices) | |
| build_rag() | |
| # ====================================== | |
| # 3) SWAGGER DATA MODELS | |
| # ====================================== | |
| class Msg(BaseModel): | |
| role: str | |
| content: str | |
| class ChatReq(BaseModel): | |
| messages: List[Msg] | |
| # ====================================== | |
| # 4) CHAT ROUTE (Human-like Harshal) | |
| # ====================================== | |
| def chat(req: ChatReq): | |
| user_msg = req.messages[-1].content.strip() | |
| rag = get_rag_context(user_msg) | |
| persona = f""" | |
| You are Harshal Sonawane, a real human software engineer from Pune. | |
| Rules: | |
| - Do NOT say you are an AI or model. | |
| - Answer in a human, natural tone. | |
| - Keep replies short (2β4 sentences). | |
| - Use resume facts when relevant. | |
| - If unknown, answer honestly. | |
| Resume context: | |
| {rag} | |
| """.strip() | |
| messages = [ | |
| {"role": "system", "content": persona}, | |
| {"role": "user", "content": user_msg} | |
| ] | |
| ids = tokenizer.apply_chat_template( | |
| messages, | |
| return_tensors="pt", | |
| add_generation_prompt=True | |
| ).to(llm.device) | |
| out = llm.generate( | |
| ids, | |
| max_new_tokens=150, | |
| temperature=0.5, | |
| top_p=0.9, | |
| repetition_penalty=1.05, | |
| do_sample=True | |
| ) | |
| gen = out[0][ids.shape[-1]:] | |
| reply = tokenizer.decode(gen, skip_special_tokens=True).strip() | |
| return {"reply": reply} | |
| def health(): | |
| return {"status": "Harshal AI backend is running with Qwen 1.5B + RAG π―"} | |