umerforsure commited on
Commit
1abf617
·
1 Parent(s): 5a107ba

⚡ Switched to Flan-T5 for faster response on CPU

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -6,6 +6,7 @@ import torch
6
  import gradio as gr
7
  from PyPDF2 import PdfReader
8
  from docx import Document as DocxDocument
 
9
  from pptx import Presentation
10
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
11
  from langchain_community.vectorstores import FAISS
@@ -13,16 +14,19 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from langchain.text_splitter import RecursiveCharacterTextSplitter
14
  from langchain_core.documents import Document
15
 
16
- # Load Reasoning Model
17
- model_id = "microsoft/phi-3-mini-128k-instruct"
18
  tokenizer = AutoTokenizer.from_pretrained(model_id)
19
- model = AutoModelForCausalLM.from_pretrained(
20
- model_id,
21
- torch_dtype=torch.bfloat16,
22
- device_map="auto"
 
 
 
 
 
23
  )
24
- reasoning_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer,
25
- max_new_tokens=512, temperature=0.7, top_p=0.9)
26
 
27
  # Embedding Model
28
  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
 
6
  import gradio as gr
7
  from PyPDF2 import PdfReader
8
  from docx import Document as DocxDocument
9
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
10
  from pptx import Presentation
11
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
12
  from langchain_community.vectorstores import FAISS
 
14
  from langchain.text_splitter import RecursiveCharacterTextSplitter
15
  from langchain_core.documents import Document
16
 
17
+ # Load Reasoning Model (lightweight + CPU friendly)
18
+ model_id = "google/flan-t5-base"
19
  tokenizer = AutoTokenizer.from_pretrained(model_id)
20
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
21
+
22
+ reasoning_pipeline = pipeline(
23
+ "text2text-generation",
24
+ model=model,
25
+ tokenizer=tokenizer,
26
+ max_new_tokens=512,
27
+ temperature=0.7,
28
+ top_p=0.9
29
  )
 
 
30
 
31
  # Embedding Model
32
  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")