Spaces:
Running
Running
Commit
Β·
816e864
1
Parent(s):
72f13fa
π Fixed UTF-8 emoji bug, extension handling, and removed extra imports
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# app.py
|
| 2 |
import os
|
| 3 |
import tempfile
|
| 4 |
import re
|
|
@@ -6,14 +5,12 @@ import torch
|
|
| 6 |
import gradio as gr
|
| 7 |
from PyPDF2 import PdfReader
|
| 8 |
from docx import Document as DocxDocument
|
| 9 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 10 |
from pptx import Presentation
|
| 11 |
-
from transformers import pipeline, AutoTokenizer,
|
| 12 |
from langchain_community.vectorstores import FAISS
|
| 13 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 14 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 15 |
from langchain_core.documents import Document
|
| 16 |
-
#plus
|
| 17 |
|
| 18 |
# Load Reasoning Model (lightweight + CPU friendly)
|
| 19 |
model_id = "google/flan-t5-base"
|
|
@@ -57,7 +54,7 @@ def extract_text(file_path, ext):
|
|
| 57 |
doc = DocxDocument(file_path)
|
| 58 |
return "\n".join([p.text for p in doc.paragraphs])
|
| 59 |
elif ext == ".txt":
|
| 60 |
-
with open(file_path, "r", encoding="utf-8") as f:
|
| 61 |
return f.read()
|
| 62 |
elif ext == ".pptx":
|
| 63 |
prs = Presentation(file_path)
|
|
@@ -68,14 +65,13 @@ def extract_text(file_path, ext):
|
|
| 68 |
def process_file(file):
|
| 69 |
global vectorstore
|
| 70 |
try:
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
|
| 73 |
-
# Read bytes correctly depending on file type
|
| 74 |
if hasattr(file, "read"):
|
| 75 |
-
# Hugging Face NamedString or file-like
|
| 76 |
file_bytes = file.read()
|
| 77 |
elif isinstance(file, str) and os.path.exists(file):
|
| 78 |
-
# Local file path fallback
|
| 79 |
with open(file, "rb") as f:
|
| 80 |
file_bytes = f.read()
|
| 81 |
elif isinstance(file, bytes):
|
|
@@ -97,10 +93,6 @@ def process_file(file):
|
|
| 97 |
except Exception as e:
|
| 98 |
return f"β Error: {str(e)}"
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
def generate_prompt(context, question):
|
| 105 |
return f"""
|
| 106 |
You are a helpful academic tutor assisting a student strictly based on course slides or textbook material.
|
|
@@ -136,17 +128,17 @@ def detect_question_type(q):
|
|
| 136 |
def post_process_output(answer_text, question):
|
| 137 |
qtype = detect_question_type(question)
|
| 138 |
label_map = {
|
| 139 |
-
"definition": "
|
| 140 |
-
"explanation": "
|
| 141 |
-
"comparison": "
|
| 142 |
-
"list": "
|
| 143 |
-
"general": "
|
| 144 |
}
|
| 145 |
answer_text = f"{label_map.get(qtype)}\n\n{answer_text}"
|
| 146 |
|
| 147 |
if len(answer_text.split()) > 80:
|
| 148 |
summary = summary_pipeline(answer_text, max_length=60, min_length=25, do_sample=False)[0]['summary_text']
|
| 149 |
-
answer_text += f"\n\n
|
| 150 |
|
| 151 |
return answer_text
|
| 152 |
|
|
@@ -175,7 +167,7 @@ def ask_question(question):
|
|
| 175 |
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
| 176 |
gr.Markdown("""
|
| 177 |
# π AI Study Assistant
|
| 178 |
-
Upload your lecture slide/text file, ask questions, and get intelligent answers powered by
|
| 179 |
""")
|
| 180 |
|
| 181 |
with gr.Tab("Upload & Ask"):
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import tempfile
|
| 3 |
import re
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
from PyPDF2 import PdfReader
|
| 7 |
from docx import Document as DocxDocument
|
|
|
|
| 8 |
from pptx import Presentation
|
| 9 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
| 10 |
from langchain_community.vectorstores import FAISS
|
| 11 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 12 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 13 |
from langchain_core.documents import Document
|
|
|
|
| 14 |
|
| 15 |
# Load Reasoning Model (lightweight + CPU friendly)
|
| 16 |
model_id = "google/flan-t5-base"
|
|
|
|
| 54 |
doc = DocxDocument(file_path)
|
| 55 |
return "\n".join([p.text for p in doc.paragraphs])
|
| 56 |
elif ext == ".txt":
|
| 57 |
+
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
| 58 |
return f.read()
|
| 59 |
elif ext == ".pptx":
|
| 60 |
prs = Presentation(file_path)
|
|
|
|
| 65 |
def process_file(file):
|
| 66 |
global vectorstore
|
| 67 |
try:
|
| 68 |
+
filename = getattr(file, "name", None)
|
| 69 |
+
ext = os.path.splitext(filename)[1].lower() if filename else ".pdf"
|
| 70 |
+
|
| 71 |
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
|
|
|
|
| 72 |
if hasattr(file, "read"):
|
|
|
|
| 73 |
file_bytes = file.read()
|
| 74 |
elif isinstance(file, str) and os.path.exists(file):
|
|
|
|
| 75 |
with open(file, "rb") as f:
|
| 76 |
file_bytes = f.read()
|
| 77 |
elif isinstance(file, bytes):
|
|
|
|
| 93 |
except Exception as e:
|
| 94 |
return f"β Error: {str(e)}"
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
def generate_prompt(context, question):
|
| 97 |
return f"""
|
| 98 |
You are a helpful academic tutor assisting a student strictly based on course slides or textbook material.
|
|
|
|
| 128 |
def post_process_output(answer_text, question):
|
| 129 |
qtype = detect_question_type(question)
|
| 130 |
label_map = {
|
| 131 |
+
"definition": "π **Definition**",
|
| 132 |
+
"explanation": "π **Explanation**",
|
| 133 |
+
"comparison": "π **Comparison**",
|
| 134 |
+
"list": "π **Key Points**",
|
| 135 |
+
"general": "π **Insight**",
|
| 136 |
}
|
| 137 |
answer_text = f"{label_map.get(qtype)}\n\n{answer_text}"
|
| 138 |
|
| 139 |
if len(answer_text.split()) > 80:
|
| 140 |
summary = summary_pipeline(answer_text, max_length=60, min_length=25, do_sample=False)[0]['summary_text']
|
| 141 |
+
answer_text += f"\n\nπ **Summary:** {summary.strip()}"
|
| 142 |
|
| 143 |
return answer_text
|
| 144 |
|
|
|
|
| 167 |
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
| 168 |
gr.Markdown("""
|
| 169 |
# π AI Study Assistant
|
| 170 |
+
Upload your lecture slide/text file, ask questions, and get intelligent answers powered by Flan-T5.
|
| 171 |
""")
|
| 172 |
|
| 173 |
with gr.Tab("Upload & Ask"):
|