Spaces:

umerforsure
/

AI-Study-Assistant

Running

App Files Files Community

umerforsure commited on Aug 7

Commit

816e864

1 Parent(s): 72f13fa

🚑 Fixed UTF-8 emoji bug, extension handling, and removed extra imports

Browse files

Files changed (1) hide show

app.py +12 -20

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py
 import os
 import tempfile
 import re
@@ -6,14 +5,12 @@ import torch
 import gradio as gr
 from PyPDF2 import PdfReader
 from docx import Document as DocxDocument
-from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 from pptx import Presentation
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
-#plus
 # Load Reasoning Model (lightweight + CPU friendly)
 model_id = "google/flan-t5-base"
@@ -57,7 +54,7 @@ def extract_text(file_path, ext):
         doc = DocxDocument(file_path)
         return "\n".join([p.text for p in doc.paragraphs])
     elif ext == ".txt":
-        with open(file_path, "r", encoding="utf-8") as f:
             return f.read()
     elif ext == ".pptx":
         prs = Presentation(file_path)
@@ -68,14 +65,13 @@ def extract_text(file_path, ext):
 def process_file(file):
     global vectorstore
     try:
-        ext = os.path.splitext(getattr(file, "name", "file.pdf"))[1].lower()
         with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
-            # Read bytes correctly depending on file type
             if hasattr(file, "read"):
-                # Hugging Face NamedString or file-like
                 file_bytes = file.read()
             elif isinstance(file, str) and os.path.exists(file):
-                # Local file path fallback
                 with open(file, "rb") as f:
                     file_bytes = f.read()
             elif isinstance(file, bytes):
@@ -97,10 +93,6 @@ def process_file(file):
     except Exception as e:
         return f"❌ Error: {str(e)}"
 def generate_prompt(context, question):
     return f"""
 You are a helpful academic tutor assisting a student strictly based on course slides or textbook material.
@@ -136,17 +128,17 @@ def detect_question_type(q):
 def post_process_output(answer_text, question):
     qtype = detect_question_type(question)
     label_map = {
-        "definition": "\ud83d\udcd8 **Definition**",
-        "explanation": "\ud83d\udcd8 **Explanation**",
-        "comparison": "\ud83d\udcd8 **Comparison**",
-        "list": "\ud83d\udcd8 **Key Points**",
-        "general": "\ud83d\udcd8 **Insight**",
     }
     answer_text = f"{label_map.get(qtype)}\n\n{answer_text}"
     if len(answer_text.split()) > 80:
         summary = summary_pipeline(answer_text, max_length=60, min_length=25, do_sample=False)[0]['summary_text']
-        answer_text += f"\n\n\ud83d\udcdd **Summary:** {summary.strip()}"
     return answer_text
@@ -175,7 +167,7 @@ def ask_question(question):
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     gr.Markdown("""
     # 📚 AI Study Assistant
-    Upload your lecture slide/text file, ask questions, and get intelligent answers powered by Phi-3.
     """)
     with gr.Tab("Upload & Ask"):

 import os
 import tempfile
 import re
 import gradio as gr
 from PyPDF2 import PdfReader
 from docx import Document as DocxDocument
 from pptx import Presentation
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_core.documents import Document
 # Load Reasoning Model (lightweight + CPU friendly)
 model_id = "google/flan-t5-base"
         doc = DocxDocument(file_path)
         return "\n".join([p.text for p in doc.paragraphs])
     elif ext == ".txt":
+        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
             return f.read()
     elif ext == ".pptx":
         prs = Presentation(file_path)
 def process_file(file):
     global vectorstore
     try:
+        filename = getattr(file, "name", None)
+        ext = os.path.splitext(filename)[1].lower() if filename else ".pdf"
         with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
             if hasattr(file, "read"):
                 file_bytes = file.read()
             elif isinstance(file, str) and os.path.exists(file):
                 with open(file, "rb") as f:
                     file_bytes = f.read()
             elif isinstance(file, bytes):
     except Exception as e:
         return f"❌ Error: {str(e)}"
 def generate_prompt(context, question):
     return f"""
 You are a helpful academic tutor assisting a student strictly based on course slides or textbook material.
 def post_process_output(answer_text, question):
     qtype = detect_question_type(question)
     label_map = {
+        "definition": "📘 **Definition**",
+        "explanation": "📘 **Explanation**",
+        "comparison": "📘 **Comparison**",
+        "list": "📘 **Key Points**",
+        "general": "📘 **Insight**",
     }
     answer_text = f"{label_map.get(qtype)}\n\n{answer_text}"
     if len(answer_text.split()) > 80:
         summary = summary_pipeline(answer_text, max_length=60, min_length=25, do_sample=False)[0]['summary_text']
+        answer_text += f"\n\n📝 **Summary:** {summary.strip()}"
     return answer_text
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     gr.Markdown("""
     # 📚 AI Study Assistant
+    Upload your lecture slide/text file, ask questions, and get intelligent answers powered by Flan-T5.
     """)
     with gr.Tab("Upload & Ask"):