Spaces:

Harsh123007
/

harshal-portfolio-ai

Sleeping

File size: 6,702 Bytes

a7d165c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca245fc
8a093ba
260f6d6
a7d165c
5ae4b14
f06ca93
260f6d6
 
e1af920
260f6d6
81b1612
260f6d6
e1af920
260f6d6
 
 
 
 
 
 
e1af920
 
 
1a2efd8
e1af920
377a616
e1af920
 
 
 
 
 
377a616
e1af920
 
 
377a616
e1af920
 
377a616
e1af920
 
260f6d6
377a616
e1af920
 
 
 
ca245fc
e1af920
 
 
 
260f6d6
e1af920
377a616
f06ca93
e1af920
 
 
81b1612
e1af920
 
 
81b1612
e1af920
 
260f6d6
e1af920
 
260f6d6
e1af920
377a616
e1af920
 
 
377a616
e1af920
 
260f6d6
 
e1af920
260f6d6
ca245fc
e1af920
 
 
 
377a616
e1af920
 
 
81b1612
e1af920
81b1612
 
e1af920
 
 
81b1612
e1af920
5ae4b14
 
 
1a2efd8
e1af920
 
9fdd6be
e1af920
 
 
a34510d
f06ca93
1a2efd8
ca245fc
377a616
e1af920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9035251
e1af920
 
 
 
 
 
 
 
 
 
ca245fc
f06ca93
e1af920
 
 
a34510d
ca245fc
f06ca93
e1af920
 
 
 
ca245fc
a7d165c
e1af920
 
 
 
 
 
 
 
 
 
 
377a616
 
 
b986cc9
260f6d6
e1af920
 
 
 
260f6d6
1a2efd8
e1af920

# from fastapi import FastAPI
# from pydantic import BaseModel
# from fastapi.openapi.utils import get_openapi
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch

# app = FastAPI(
#     title="Harshal AI Backend",
#     version="1.0.0",
# )

# MODEL_NAME = "Qwen/Qwen2.5-0.5B"

# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# model = AutoModelForCausalLM.from_pretrained(
#     MODEL_NAME,
#     torch_dtype=torch.float32,
#     device_map="cpu",
# )

# class ChatMessage(BaseModel):
#     messages: list

# @app.get("/")
# def home():
#     return {"message": "Harshal AI backend running with Qwen 0.5B!"}

# @app.post("/chat")
# def chat(body: ChatMessage):
#     user_msg = body.messages[-1]["content"]
#     prompt = f"User: {user_msg}\nAssistant:"

#     inputs = tokenizer(prompt, return_tensors="pt")
#     outputs = model.generate(
#         **inputs,
#         max_new_tokens=120,
#         pad_token_id=tokenizer.eos_token_id,
#         temperature=0.4,
#     )

#     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
#     reply = text.split("Assistant:")[-1].strip()
#     return {"reply": reply}

# @app.get("/openapi.json")
# def openapi_json():
#     return get_openapi(
#         title="Harshal AI Backend",
#         version="1.0.0",
#         routes=app.routes
#     )


from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
import torch, os

app = FastAPI(title="Harshal AI Backend", version="1.0.0")

# CORS (Next.js frontend)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ============================================================
# 1) LOAD MAIN MODEL (Phi-3 Mini — good balance of quality/speed)
# ============================================================

MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
llm = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    dtype=torch.float32,        # instead of torch_dtype
)
llm.eval()

# ============================================================
# 2) LOAD EMBEDDINGS + BUILD RAG FROM resume.pdf
# ============================================================

EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBED_MODEL)

RESUME_FILE = "resume.pdf"
resume_rag = None


def chunk_text(text, max_chars=450, overlap=80):
    """Simple overlapping chunks."""
    text = " ".join(text.split())
    chunks, start = [], 0

    while start < len(text):
        end = start + max_chars
        chunks.append(text[start:end])
        start = end - overlap

    return chunks


def build_rag():
    """Reads resume.pdf → chunks → embeddings."""
    global resume_rag

    if not os.path.exists(RESUME_FILE):
        print("⚠ resume.pdf NOT FOUND — RAG disabled.")
        return

    reader = PdfReader(RESUME_FILE)
    full_text = ""

    for page in reader.pages:
        full_text += page.extract_text() or ""

    chunks = chunk_text(full_text)

    embeddings = embedder.encode(
        chunks, convert_to_tensor=True, normalize_embeddings=True
    )

    resume_rag = {"chunks": chunks, "embs": embeddings}
    print("✅ Resume RAG built with", len(chunks), "chunks")


build_rag()


def retrieve_rag(query, top_k=3):
    """Find most relevant resume chunks."""
    if resume_rag is None:
        return ""

    q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
    sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
    top = torch.topk(sims, k=min(top_k, len(sims)))

    return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)


# ============================================================
# 3) REQUEST MODELS
# ============================================================

class Message(BaseModel):
    role: str
    content: str

class ChatReq(BaseModel):
    messages: List[Message]


# ============================================================
# 4) SMART MODE CHAT ROUTE (Resume + Fallback)
# ============================================================

@app.post("/chat")
def chat(req: ChatReq):
    user_msg = req.messages[-1].content.strip()

    # RAG: take from resume
    rag_context = retrieve_rag(user_msg)

    # Detect if we found resume info
    has_relevant_rag = rag_context and len(rag_context) > 40

    # SMART RULE:
    #   If resume has relevant info → answer ONLY from resume
    #   If resume has nothing → fallback using general persona
    if has_relevant_rag:
        fallback_note = ""
        resume_instruction = """
Use ONLY the details provided in the resume context.
Do NOT invent any new companies, dates, or roles.
Stay strictly consistent with:
- Current company: Börm Bruckmeier Infotech India Pvt. Ltd.
- Past experience: SarvaTech Consultants (Internship)
- Personal projects listed in resume.
"""
    else:
        fallback_note = """
The resume does NOT mention this. 
Provide a short helpful answer based on Harshal’s background, tone, and skills — 
BUT do NOT invent companies, dates, or job titles not present in the resume.
"""
        resume_instruction = ""

    # System persona
    system_prompt = f"""
You are Harshal Sonawane — a real human software engineer from Pune, India.

Tone:
- Natural, confident, friendly.
- 2–8 sentences max.
- Never say you are an AI.

Your resume (RAG context):
{rag_context}

Instructions:
{resume_instruction}

Fallback rule:
{fallback_note}
""".strip()

    # Chat template
    msgs = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_msg},
    ]

    input_ids = tokenizer.apply_chat_template(
        msgs,
        return_tensors="pt",
        add_generation_prompt=True,
    )

    with torch.no_grad():
        output_ids = llm.generate(
            input_ids,
            max_new_tokens=160,
            temperature=0.55,
            top_p=0.9,
            repetition_penalty=1.06,
            do_sample=True,
        )

    gen = output_ids[0][input_ids.shape[-1]:]
    reply = tokenizer.decode(gen, skip_special_tokens=True).strip()

    return {"reply": reply}


# ============================================================
# 5) HEALTH CHECK
# ============================================================

@app.get("/")
def health():
    return {
        "status": "Harshal AI backend running (SMART MODE)",
        "model": MODEL_NAME
    }