Harsh123007 commited on
Commit
f06ca93
·
verified ·
1 Parent(s): c628a03

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +80 -28
main.py CHANGED
@@ -52,49 +52,101 @@
52
 
53
 
54
 
55
-
56
  from fastapi import FastAPI
57
- from fastapi.middleware.cors import CORSMiddleware
58
- from fastapi.openapi.utils import get_openapi
59
  from pydantic import BaseModel
60
- from transformers import AutoModelForCausalLM, AutoTokenizer
61
- from sentence_transformers import SentenceTransformer, util
62
- from pathlib import Path
63
- from pypdf import PdfReader
64
- import torch
65
 
66
  app = FastAPI()
67
 
68
- # Allow your Next.js frontend to call this from anywhere
69
- app.add_middleware(
70
- CORSMiddleware,
71
- allow_origins=["*"],
72
- allow_methods=["*"],
73
- allow_headers=["*"],
74
- )
75
 
76
- # -------- LLM --------
77
- LLM_MODEL = "microsoft/phi-2"
78
- tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL)
79
  model = AutoModelForCausalLM.from_pretrained(
80
- LLM_MODEL,
81
  torch_dtype=torch.float32,
82
- device_map="cpu",
83
  )
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- # -------- Embeddings + Resume store --------
87
- EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
88
- embedder = SentenceTransformer(EMBED_MODEL)
89
 
 
90
 
91
- class ResumeStore:
92
- def __init__(self):
93
- self.chunks: list[str] = []
94
- self.embeddings = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
97
- RESUME = ResumeStore()
98
 
99
 
100
  def load_resume():
 
52
 
53
 
54
 
 
55
  from fastapi import FastAPI
 
 
56
  from pydantic import BaseModel
57
+ from transformers import AutoTokenizer, AutoModelForCausalLM
58
+ import torch, os
 
 
 
59
 
60
  app = FastAPI()
61
 
62
+ # ------------------------------------------
63
+ # 1) USE FASTER + SMARTER MODEL (FREE)
64
+ # ------------------------------------------
65
+ MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
66
+
67
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
68
 
 
 
 
69
  model = AutoModelForCausalLM.from_pretrained(
70
+ MODEL_NAME,
71
  torch_dtype=torch.float32,
72
+ device_map="cpu"
73
  )
74
 
75
+ # ------------------------------------------
76
+ # 2) RAG LOADING (resume embeddings)
77
+ # ------------------------------------------
78
+ if os.path.exists("vector_store.pt"):
79
+ vectors = torch.load("vector_store.pt")
80
+ else:
81
+ vectors = None
82
+
83
+ def retrieve_relevant_text(query):
84
+ if vectors is None:
85
+ return ""
86
+ # simple cosine search (fast)
87
+ qs = embed(query)
88
+ sims = torch.nn.functional.cosine_similarity(qs, vectors['embeddings'])
89
+ top_idx = sims.argmax().item()
90
+ return vectors["chunks"][top_idx]
91
+
92
+ # ------------------------------------------
93
+ # Request format
94
+ # ------------------------------------------
95
+ class ChatRequest(BaseModel):
96
+ messages: list
97
+
98
+ # ------------------------------------------
99
+ # 3) MAIN CHAT ROUTE
100
+ # ------------------------------------------
101
+ @app.post("/chat")
102
+ def chat(req: ChatRequest):
103
+
104
+ user_msg = req.messages[-1]["content"]
105
+
106
+ # RAG context
107
+ rag_text = retrieve_relevant_text(user_msg)
108
+
109
+ # Persona system prompt
110
+ system_prompt = f"""
111
+ You are Harshal Sonawane — a real human software engineer.
112
+ You speak casually, friendly, and naturally.
113
+ You NEVER say you are an AI or chatbot.
114
+
115
+ Your goals:
116
+ - Answer clearly and like a human.
117
+ - Use resume knowledge when helpful.
118
+ - Do NOT hallucinate.
119
+ - Keep responses short (2–4 sentences).
120
+ - Stay in character as Harshal.
121
 
122
+ Resume context (if relevant):
123
+ {rag_text}
124
+ """.strip()
125
 
126
+ full_prompt = f"<|system|>{system_prompt}\n<|user|>{user_msg}\n<|assistant|>"
127
 
128
+ # Generate human-like answer FAST
129
+ inputs = tokenizer(full_prompt, return_tensors="pt")
130
+
131
+ output = model.generate(
132
+ **inputs,
133
+ max_new_tokens=120,
134
+ temperature=0.6,
135
+ top_p=0.9,
136
+ repetition_penalty=1.05,
137
+ pad_token_id=tokenizer.eos_token_id
138
+ )
139
+
140
+ reply = tokenizer.decode(output[0], skip_special_tokens=True)
141
+ reply = reply.split("<|assistant|>")[-1].strip()
142
+
143
+ return {"reply": reply}
144
+
145
+ @app.get("/")
146
+ def home():
147
+ return {"message": "Harshal AI backend running smoothly!"}
148
 
149
 
 
150
 
151
 
152
  def load_resume():