# from fastapi import FastAPI
# from pydantic import BaseModel
# from fastapi.openapi.utils import get_openapi
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch

# app = FastAPI(
#     title="Harshal AI Backend",
#     version="1.0.0",
# )

# MODEL_NAME = "Qwen/Qwen2.5-0.5B"

# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# model = AutoModelForCausalLM.from_pretrained(
#     MODEL_NAME,
#     torch_dtype=torch.float32,
#     device_map="cpu",
# )

# class ChatMessage(BaseModel):
#     messages: list

# @app.get("/")
# def home():
#     return {"message": "Harshal AI backend running with Qwen 0.5B!"}

# @app.post("/chat")
# def chat(body: ChatMessage):
#     user_msg = body.messages[-1]["content"]
#     prompt = f"User: {user_msg}\nAssistant:"

#     inputs = tokenizer(prompt, return_tensors="pt")
#     outputs = model.generate(
#         **inputs,
#         max_new_tokens=120,
#         pad_token_id=tokenizer.eos_token_id,
#         temperature=0.4,
#     )

#     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
#     reply = text.split("Assistant:")[-1].strip()
#     return {"reply": reply}

# @app.get("/openapi.json")
# def openapi_json():
#     return get_openapi(
#         title="Harshal AI Backend",
#         version="1.0.0",
#         routes=app.routes
#     )


from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
import torch, os

app = FastAPI(title="Harshal AI Backend", version="1.0.0")

# CORS (Next.js frontend)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ============================================================
# 1) LOAD MAIN MODEL (Phi-3 Mini — good balance of quality/speed)
# ============================================================

MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
llm = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    dtype=torch.float32,        # instead of torch_dtype
)
llm.eval()

# ============================================================
# 2) LOAD EMBEDDINGS + BUILD RAG FROM resume.pdf
# ============================================================

EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBED_MODEL)

RESUME_FILE = "resume.pdf"
resume_rag = None


def chunk_text(text, max_chars=450, overlap=80):
    """Simple overlapping chunks."""
    text = " ".join(text.split())
    chunks, start = [], 0

    while start < len(text):
        end = start + max_chars
        chunks.append(text[start:end])
        start = end - overlap

    return chunks


def build_rag():
    """Reads resume.pdf → chunks → embeddings."""
    global resume_rag

    if not os.path.exists(RESUME_FILE):
        print("⚠ resume.pdf NOT FOUND — RAG disabled.")
        return

    reader = PdfReader(RESUME_FILE)
    full_text = ""

    for page in reader.pages:
        full_text += page.extract_text() or ""

    chunks = chunk_text(full_text)

    embeddings = embedder.encode(
        chunks, convert_to_tensor=True, normalize_embeddings=True
    )

    resume_rag = {"chunks": chunks, "embs": embeddings}
    print("✅ Resume RAG built with", len(chunks), "chunks")


build_rag()


def retrieve_rag(query, top_k=3):
    """Find most relevant resume chunks."""
    if resume_rag is None:
        return ""

    q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
    sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
    top = torch.topk(sims, k=min(top_k, len(sims)))

    return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)


# ============================================================
# 3) REQUEST MODELS
# ============================================================

class Message(BaseModel):
    role: str
    content: str

class ChatReq(BaseModel):
    messages: List[Message]


# ============================================================
# 4) SMART MODE CHAT ROUTE (Resume + Fallback)
# ============================================================

@app.post("/chat")
def chat(req: ChatReq):
    user_msg = req.messages[-1].content.strip()

    # RAG: take from resume
    rag_context = retrieve_rag(user_msg)

    # Detect if we found resume info
    has_relevant_rag = rag_context and len(rag_context) > 40

    # SMART RULE:
    #   If resume has relevant info → answer ONLY from resume
    #   If resume has nothing → fallback using general persona
    if has_relevant_rag:
        fallback_note = ""
        resume_instruction = """
Use ONLY the details provided in the resume context.
Do NOT invent any new companies, dates, or roles.
Stay strictly consistent with:
- Current company: Börm Bruckmeier Infotech India Pvt. Ltd.
- Past experience: SarvaTech Consultants (Internship)
- Personal projects listed in resume.
"""
    else:
        fallback_note = """
The resume does NOT mention this. 
Provide a short helpful answer based on Harshal’s background, tone, and skills — 
BUT do NOT invent companies, dates, or job titles not present in the resume.
"""
        resume_instruction = ""

    # System persona
    system_prompt = f"""
You are Harshal Sonawane — a real human software engineer from Pune, India.

Tone:
- Natural, confident, friendly.
- 2–8 sentences max.
- Never say you are an AI.

Your resume (RAG context):
{rag_context}

Instructions:
{resume_instruction}

Fallback rule:
{fallback_note}
""".strip()

    # Chat template
    msgs = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_msg},
    ]

    input_ids = tokenizer.apply_chat_template(
        msgs,
        return_tensors="pt",
        add_generation_prompt=True,
    )

    with torch.no_grad():
        output_ids = llm.generate(
            input_ids,
            max_new_tokens=160,
            temperature=0.55,
            top_p=0.9,
            repetition_penalty=1.06,
            do_sample=True,
        )

    gen = output_ids[0][input_ids.shape[-1]:]
    reply = tokenizer.decode(gen, skip_special_tokens=True).strip()

    return {"reply": reply}


# ============================================================
# 5) HEALTH CHECK
# ============================================================

@app.get("/")
def health():
    return {
        "status": "Harshal AI backend running (SMART MODE)",
        "model": MODEL_NAME
    }