umerforsure commited on
Commit
816e864
Β·
1 Parent(s): 72f13fa

πŸš‘ Fixed UTF-8 emoji bug, extension handling, and removed extra imports

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  import os
3
  import tempfile
4
  import re
@@ -6,14 +5,12 @@ import torch
6
  import gradio as gr
7
  from PyPDF2 import PdfReader
8
  from docx import Document as DocxDocument
9
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
10
  from pptx import Presentation
11
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
12
  from langchain_community.vectorstores import FAISS
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
14
  from langchain.text_splitter import RecursiveCharacterTextSplitter
15
  from langchain_core.documents import Document
16
- #plus
17
 
18
  # Load Reasoning Model (lightweight + CPU friendly)
19
  model_id = "google/flan-t5-base"
@@ -57,7 +54,7 @@ def extract_text(file_path, ext):
57
  doc = DocxDocument(file_path)
58
  return "\n".join([p.text for p in doc.paragraphs])
59
  elif ext == ".txt":
60
- with open(file_path, "r", encoding="utf-8") as f:
61
  return f.read()
62
  elif ext == ".pptx":
63
  prs = Presentation(file_path)
@@ -68,14 +65,13 @@ def extract_text(file_path, ext):
68
  def process_file(file):
69
  global vectorstore
70
  try:
71
- ext = os.path.splitext(getattr(file, "name", "file.pdf"))[1].lower()
 
 
72
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
73
- # Read bytes correctly depending on file type
74
  if hasattr(file, "read"):
75
- # Hugging Face NamedString or file-like
76
  file_bytes = file.read()
77
  elif isinstance(file, str) and os.path.exists(file):
78
- # Local file path fallback
79
  with open(file, "rb") as f:
80
  file_bytes = f.read()
81
  elif isinstance(file, bytes):
@@ -97,10 +93,6 @@ def process_file(file):
97
  except Exception as e:
98
  return f"❌ Error: {str(e)}"
99
 
100
-
101
-
102
-
103
-
104
  def generate_prompt(context, question):
105
  return f"""
106
  You are a helpful academic tutor assisting a student strictly based on course slides or textbook material.
@@ -136,17 +128,17 @@ def detect_question_type(q):
136
  def post_process_output(answer_text, question):
137
  qtype = detect_question_type(question)
138
  label_map = {
139
- "definition": "\ud83d\udcd8 **Definition**",
140
- "explanation": "\ud83d\udcd8 **Explanation**",
141
- "comparison": "\ud83d\udcd8 **Comparison**",
142
- "list": "\ud83d\udcd8 **Key Points**",
143
- "general": "\ud83d\udcd8 **Insight**",
144
  }
145
  answer_text = f"{label_map.get(qtype)}\n\n{answer_text}"
146
 
147
  if len(answer_text.split()) > 80:
148
  summary = summary_pipeline(answer_text, max_length=60, min_length=25, do_sample=False)[0]['summary_text']
149
- answer_text += f"\n\n\ud83d\udcdd **Summary:** {summary.strip()}"
150
 
151
  return answer_text
152
 
@@ -175,7 +167,7 @@ def ask_question(question):
175
  with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
176
  gr.Markdown("""
177
  # πŸ“š AI Study Assistant
178
- Upload your lecture slide/text file, ask questions, and get intelligent answers powered by Phi-3.
179
  """)
180
 
181
  with gr.Tab("Upload & Ask"):
 
 
1
  import os
2
  import tempfile
3
  import re
 
5
  import gradio as gr
6
  from PyPDF2 import PdfReader
7
  from docx import Document as DocxDocument
 
8
  from pptx import Presentation
9
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
10
  from langchain_community.vectorstores import FAISS
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain_core.documents import Document
 
14
 
15
  # Load Reasoning Model (lightweight + CPU friendly)
16
  model_id = "google/flan-t5-base"
 
54
  doc = DocxDocument(file_path)
55
  return "\n".join([p.text for p in doc.paragraphs])
56
  elif ext == ".txt":
57
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
58
  return f.read()
59
  elif ext == ".pptx":
60
  prs = Presentation(file_path)
 
65
  def process_file(file):
66
  global vectorstore
67
  try:
68
+ filename = getattr(file, "name", None)
69
+ ext = os.path.splitext(filename)[1].lower() if filename else ".pdf"
70
+
71
  with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
 
72
  if hasattr(file, "read"):
 
73
  file_bytes = file.read()
74
  elif isinstance(file, str) and os.path.exists(file):
 
75
  with open(file, "rb") as f:
76
  file_bytes = f.read()
77
  elif isinstance(file, bytes):
 
93
  except Exception as e:
94
  return f"❌ Error: {str(e)}"
95
 
 
 
 
 
96
  def generate_prompt(context, question):
97
  return f"""
98
  You are a helpful academic tutor assisting a student strictly based on course slides or textbook material.
 
128
  def post_process_output(answer_text, question):
129
  qtype = detect_question_type(question)
130
  label_map = {
131
+ "definition": "πŸ“˜ **Definition**",
132
+ "explanation": "πŸ“˜ **Explanation**",
133
+ "comparison": "πŸ“˜ **Comparison**",
134
+ "list": "πŸ“˜ **Key Points**",
135
+ "general": "πŸ“˜ **Insight**",
136
  }
137
  answer_text = f"{label_map.get(qtype)}\n\n{answer_text}"
138
 
139
  if len(answer_text.split()) > 80:
140
  summary = summary_pipeline(answer_text, max_length=60, min_length=25, do_sample=False)[0]['summary_text']
141
+ answer_text += f"\n\nπŸ“ **Summary:** {summary.strip()}"
142
 
143
  return answer_text
144
 
 
167
  with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
168
  gr.Markdown("""
169
  # πŸ“š AI Study Assistant
170
+ Upload your lecture slide/text file, ask questions, and get intelligent answers powered by Flan-T5.
171
  """)
172
 
173
  with gr.Tab("Upload & Ask"):