Harsh123007's picture
Update main.py
ca245fc verified
raw
history blame
5.02 kB
# from fastapi import FastAPI
# from pydantic import BaseModel
# from fastapi.openapi.utils import get_openapi
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch
# app = FastAPI(
# title="Harshal AI Backend",
# version="1.0.0",
# )
# MODEL_NAME = "Qwen/Qwen2.5-0.5B"
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# model = AutoModelForCausalLM.from_pretrained(
# MODEL_NAME,
# torch_dtype=torch.float32,
# device_map="cpu",
# )
# class ChatMessage(BaseModel):
# messages: list
# @app.get("/")
# def home():
# return {"message": "Harshal AI backend running with Qwen 0.5B!"}
# @app.post("/chat")
# def chat(body: ChatMessage):
# user_msg = body.messages[-1]["content"]
# prompt = f"User: {user_msg}\nAssistant:"
# inputs = tokenizer(prompt, return_tensors="pt")
# outputs = model.generate(
# **inputs,
# max_new_tokens=120,
# pad_token_id=tokenizer.eos_token_id,
# temperature=0.4,
# )
# text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# reply = text.split("Assistant:")[-1].strip()
# return {"reply": reply}
# @app.get("/openapi.json")
# def openapi_json():
# return get_openapi(
# title="Harshal AI Backend",
# version="1.0.0",
# routes=app.routes
# )
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
import torch
import os
app = FastAPI(
title="Harshal AI Backend",
version="1.0.0"
)
# CORS so Next.js can call backend
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# ======================================
# 1) LOAD MAIN MODEL (Qwen2.5 1.5B)
# ======================================
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
llm = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="cpu"
)
llm.eval()
# ======================================
# 2) LOAD EMBEDDING MODEL + RAG
# ======================================
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBED_MODEL)
RESUME = "resume.pdf"
resume_rag = None
def chunk_text(text, max_chars=450, overlap=80):
text = " ".join(text.split())
chunks, start = [], 0
while start < len(text):
end = start + max_chars
chunks.append(text[start:end])
start = end - overlap
return chunks
def build_rag():
global resume_rag
if not os.path.exists(RESUME):
print("⚠ resume.pdf NOT FOUND β€” RAG disabled")
return
reader = PdfReader(RESUME)
text = ""
for p in reader.pages:
text += p.extract_text() or ""
chunks = chunk_text(text)
embs = embedder.encode(chunks, convert_to_tensor=True, normalize_embeddings=True)
resume_rag = {"chunks": chunks, "embs": embs}
print("βœ… RAG Ready with", len(chunks), "chunks")
def get_rag_context(query):
if resume_rag is None:
return ""
q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
top = torch.topk(sims, k=3)
return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)
build_rag()
# ======================================
# 3) SWAGGER DATA MODELS
# ======================================
class Msg(BaseModel):
role: str
content: str
class ChatReq(BaseModel):
messages: List[Msg]
# ======================================
# 4) CHAT ROUTE (Human-like Harshal)
# ======================================
@app.post("/chat")
def chat(req: ChatReq):
user_msg = req.messages[-1].content.strip()
rag = get_rag_context(user_msg)
persona = f"""
You are Harshal Sonawane, a real human software engineer from Pune.
Rules:
- Do NOT say you are an AI or model.
- Answer in a human, natural tone.
- Keep replies short (2–4 sentences).
- Use resume facts when relevant.
- If unknown, answer honestly.
Resume context:
{rag}
""".strip()
messages = [
{"role": "system", "content": persona},
{"role": "user", "content": user_msg}
]
ids = tokenizer.apply_chat_template(
messages,
return_tensors="pt",
add_generation_prompt=True
).to(llm.device)
out = llm.generate(
ids,
max_new_tokens=150,
temperature=0.5,
top_p=0.9,
repetition_penalty=1.05,
do_sample=True
)
gen = out[0][ids.shape[-1]:]
reply = tokenizer.decode(gen, skip_special_tokens=True).strip()
return {"reply": reply}
@app.get("/")
def health():
return {"status": "Harshal AI backend is running with Qwen 1.5B + RAG 🎯"}