File size: 6,702 Bytes
a7d165c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca245fc
8a093ba
260f6d6
a7d165c
5ae4b14
f06ca93
260f6d6
 
e1af920
260f6d6
81b1612
260f6d6
e1af920
260f6d6
 
 
 
 
 
 
e1af920
 
 
1a2efd8
e1af920
377a616
e1af920
 
 
 
 
 
377a616
e1af920
 
 
377a616
e1af920
 
377a616
e1af920
 
260f6d6
377a616
e1af920
 
 
 
ca245fc
e1af920
 
 
 
260f6d6
e1af920
377a616
f06ca93
e1af920
 
 
81b1612
e1af920
 
 
81b1612
e1af920
 
260f6d6
e1af920
 
260f6d6
e1af920
377a616
e1af920
 
 
377a616
e1af920
 
260f6d6
 
e1af920
260f6d6
ca245fc
e1af920
 
 
 
377a616
e1af920
 
 
81b1612
e1af920
81b1612
 
e1af920
 
 
81b1612
e1af920
5ae4b14
 
 
1a2efd8
e1af920
 
9fdd6be
e1af920
 
 
a34510d
f06ca93
1a2efd8
ca245fc
377a616
e1af920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9035251
e1af920
 
 
 
 
 
 
 
 
 
ca245fc
f06ca93
e1af920
 
 
a34510d
ca245fc
f06ca93
e1af920
 
 
 
ca245fc
a7d165c
e1af920
 
 
 
 
 
 
 
 
 
 
377a616
 
 
b986cc9
260f6d6
e1af920
 
 
 
260f6d6
1a2efd8
e1af920
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# from fastapi import FastAPI
# from pydantic import BaseModel
# from fastapi.openapi.utils import get_openapi
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch

# app = FastAPI(
#     title="Harshal AI Backend",
#     version="1.0.0",
# )

# MODEL_NAME = "Qwen/Qwen2.5-0.5B"

# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# model = AutoModelForCausalLM.from_pretrained(
#     MODEL_NAME,
#     torch_dtype=torch.float32,
#     device_map="cpu",
# )

# class ChatMessage(BaseModel):
#     messages: list

# @app.get("/")
# def home():
#     return {"message": "Harshal AI backend running with Qwen 0.5B!"}

# @app.post("/chat")
# def chat(body: ChatMessage):
#     user_msg = body.messages[-1]["content"]
#     prompt = f"User: {user_msg}\nAssistant:"

#     inputs = tokenizer(prompt, return_tensors="pt")
#     outputs = model.generate(
#         **inputs,
#         max_new_tokens=120,
#         pad_token_id=tokenizer.eos_token_id,
#         temperature=0.4,
#     )

#     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
#     reply = text.split("Assistant:")[-1].strip()
#     return {"reply": reply}

# @app.get("/openapi.json")
# def openapi_json():
#     return get_openapi(
#         title="Harshal AI Backend",
#         version="1.0.0",
#         routes=app.routes
#     )


from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
import torch, os

app = FastAPI(title="Harshal AI Backend", version="1.0.0")

# CORS (Next.js frontend)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# ============================================================
# 1) LOAD MAIN MODEL (Phi-3 Mini — good balance of quality/speed)
# ============================================================

MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
llm = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    dtype=torch.float32,        # instead of torch_dtype
)
llm.eval()

# ============================================================
# 2) LOAD EMBEDDINGS + BUILD RAG FROM resume.pdf
# ============================================================

EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBED_MODEL)

RESUME_FILE = "resume.pdf"
resume_rag = None


def chunk_text(text, max_chars=450, overlap=80):
    """Simple overlapping chunks."""
    text = " ".join(text.split())
    chunks, start = [], 0

    while start < len(text):
        end = start + max_chars
        chunks.append(text[start:end])
        start = end - overlap

    return chunks


def build_rag():
    """Reads resume.pdf → chunks → embeddings."""
    global resume_rag

    if not os.path.exists(RESUME_FILE):
        print("⚠ resume.pdf NOT FOUND — RAG disabled.")
        return

    reader = PdfReader(RESUME_FILE)
    full_text = ""

    for page in reader.pages:
        full_text += page.extract_text() or ""

    chunks = chunk_text(full_text)

    embeddings = embedder.encode(
        chunks, convert_to_tensor=True, normalize_embeddings=True
    )

    resume_rag = {"chunks": chunks, "embs": embeddings}
    print("✅ Resume RAG built with", len(chunks), "chunks")


build_rag()


def retrieve_rag(query, top_k=3):
    """Find most relevant resume chunks."""
    if resume_rag is None:
        return ""

    q = embedder.encode([query], convert_to_tensor=True, normalize_embeddings=True)[0]
    sims = torch.nn.functional.cosine_similarity(q.unsqueeze(0), resume_rag["embs"])
    top = torch.topk(sims, k=min(top_k, len(sims)))

    return "\n\n".join(resume_rag["chunks"][i] for i in top.indices)


# ============================================================
# 3) REQUEST MODELS
# ============================================================

class Message(BaseModel):
    role: str
    content: str

class ChatReq(BaseModel):
    messages: List[Message]


# ============================================================
# 4) SMART MODE CHAT ROUTE (Resume + Fallback)
# ============================================================

@app.post("/chat")
def chat(req: ChatReq):
    user_msg = req.messages[-1].content.strip()

    # RAG: take from resume
    rag_context = retrieve_rag(user_msg)

    # Detect if we found resume info
    has_relevant_rag = rag_context and len(rag_context) > 40

    # SMART RULE:
    #   If resume has relevant info → answer ONLY from resume
    #   If resume has nothing → fallback using general persona
    if has_relevant_rag:
        fallback_note = ""
        resume_instruction = """
Use ONLY the details provided in the resume context.
Do NOT invent any new companies, dates, or roles.
Stay strictly consistent with:
- Current company: Börm Bruckmeier Infotech India Pvt. Ltd.
- Past experience: SarvaTech Consultants (Internship)
- Personal projects listed in resume.
"""
    else:
        fallback_note = """
The resume does NOT mention this. 
Provide a short helpful answer based on Harshal’s background, tone, and skills — 
BUT do NOT invent companies, dates, or job titles not present in the resume.
"""
        resume_instruction = ""

    # System persona
    system_prompt = f"""
You are Harshal Sonawane — a real human software engineer from Pune, India.

Tone:
- Natural, confident, friendly.
- 2–8 sentences max.
- Never say you are an AI.

Your resume (RAG context):
{rag_context}

Instructions:
{resume_instruction}

Fallback rule:
{fallback_note}
""".strip()

    # Chat template
    msgs = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_msg},
    ]

    input_ids = tokenizer.apply_chat_template(
        msgs,
        return_tensors="pt",
        add_generation_prompt=True,
    )

    with torch.no_grad():
        output_ids = llm.generate(
            input_ids,
            max_new_tokens=160,
            temperature=0.55,
            top_p=0.9,
            repetition_penalty=1.06,
            do_sample=True,
        )

    gen = output_ids[0][input_ids.shape[-1]:]
    reply = tokenizer.decode(gen, skip_special_tokens=True).strip()

    return {"reply": reply}


# ============================================================
# 5) HEALTH CHECK
# ============================================================

@app.get("/")
def health():
    return {
        "status": "Harshal AI backend running (SMART MODE)",
        "model": MODEL_NAME
    }