URL-To-Answer / interface.py
MohamedFahim's picture
Update interface.py
6aef3db verified
raw
history blame
27.8 kB
import gradio as gr
import requests
import time
import os
import json
from typing import Optional, Tuple, List
# ==================== API CONFIGURATION ====================
# For Hugging Face Spaces, both Gradio and FastAPI run in same container
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
MAX_RETRIES = 3
RETRY_DELAY = 5
# ==================== UTILITY FUNCTIONS ====================
def check_api_health(max_attempts=3) -> Tuple[bool, dict]:
"""Check API health with retry logic"""
for attempt in range(max_attempts):
try:
response = requests.get(f"{API_BASE_URL}/health_check", timeout=10)
if response.status_code == 200:
return True, response.json()
except Exception as e:
if attempt < max_attempts - 1:
time.sleep(2)
return False, {}
def make_request_with_retry(method: str, endpoint: str, **kwargs) -> requests.Response:
"""Make API request with retry logic"""
url = f"{API_BASE_URL}{endpoint}"
for attempt in range(MAX_RETRIES):
try:
if method.upper() == "GET":
response = requests.get(url, **kwargs)
elif method.upper() == "POST":
response = requests.post(url, **kwargs)
elif method.upper() == "DELETE":
response = requests.delete(url, **kwargs)
else:
raise ValueError(f"Unsupported method: {method}")
if response.status_code in [200, 201]:
return response
elif response.status_code == 500 and attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY)
continue
else:
response.raise_for_status()
except requests.exceptions.ConnectionError:
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY)
else:
raise gr.Error("❌ Cannot connect to API. Please refresh and try again.", duration=10)
except requests.exceptions.Timeout:
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY)
else:
raise gr.Error("⏱️ Request timeout. Please try again.", duration=10)
except Exception as e:
if attempt == MAX_RETRIES - 1:
raise gr.Error(f"❌ Error: {str(e)}", duration=10)
raise gr.Error("❌ Maximum retries exceeded. Please try again later.", duration=10)
# ==================== WEB SCRAPING FUNCTIONS ====================
def extract_links(url: str, progress=gr.Progress()) -> List[str]:
"""Extract links from URL with progress tracking"""
if not url:
raise gr.Error("❌ Please provide a URL", duration=5)
progress(0, desc="Connecting to website...")
try:
response = make_request_with_retry(
"POST",
"/extract_links",
json={"url": url},
timeout=30
)
progress(1, desc="Links extracted!")
return response.json()["unique_links"]
except Exception as e:
raise gr.Error(f"❌ Failed to extract links: {str(e)}", duration=10)
def extract_text(urls: List[str], progress=gr.Progress()) -> str:
"""Extract text from URLs with progress tracking"""
progress(0, desc="Starting text extraction...")
try:
response = make_request_with_retry(
"POST",
"/extract_text",
json=urls,
timeout=120
)
progress(1, desc="Text extraction complete!")
return response.json()["file_saved"]
except Exception as e:
raise gr.Error(f"❌ Failed to extract text: {str(e)}", duration=10)
def perform_rag(file_path: str, prompt: str, progress=gr.Progress()) -> dict:
"""Perform RAG with progress tracking"""
progress(0.3, desc="Analyzing content...")
try:
response = make_request_with_retry(
"POST",
"/rag",
json={"file_path": file_path, "prompt": prompt},
timeout=60
)
progress(1, desc="Analysis complete!")
return response.json()
except Exception as e:
raise gr.Error(f"❌ Failed to perform RAG: {str(e)}", duration=10)
def process_multiple_links(url: str, prompt: str, progress=gr.Progress()) -> Tuple[str, str, dict]:
"""Process multiple links with comprehensive progress tracking"""
if not url or not prompt:
raise gr.Error("❌ Please provide both URL and prompt", duration=5)
try:
progress(0, desc="πŸ” Extracting links from webpage...")
links = extract_links(url, progress)
progress(0.3, desc=f"πŸ“„ Found {len(links)} links. Processing top 5...")
sample_links = links[:5]
file_path = extract_text(sample_links, progress)
progress(0.7, desc="πŸ€– Generating AI response...")
result = perform_rag(file_path, prompt, progress)
progress(1, desc="βœ… Complete!")
status_msg = f"βœ… Processed {len(sample_links)} pages from {len(links)} total links found"
response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
return status_msg, response_text, result['sources']
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"❌ Processing error: {str(e)}", duration=10)
def process_homepage_only(url: str, prompt: str, progress=gr.Progress()) -> Tuple[str, str, dict]:
"""Process homepage only with progress tracking"""
if not url or not prompt:
raise gr.Error("❌ Please provide both URL and prompt", duration=5)
try:
progress(0.2, desc="πŸ“„ Extracting homepage content...")
file_path = extract_text([url], progress)
progress(0.6, desc="πŸ€– Generating AI response...")
result = perform_rag(file_path, prompt, progress)
progress(1, desc="βœ… Complete!")
status_msg = "βœ… Processed homepage content"
response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}"
return status_msg, response_text, result['sources']
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"❌ Processing error: {str(e)}", duration=10)
# ==================== DOCUMENT UPLOAD FUNCTIONS ====================
def upload_single_document(file, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
"""Upload single document with progress tracking"""
if not file:
raise gr.Error("❌ Please select a file to upload", duration=5)
if not collection_name:
collection_name = "default"
try:
progress(0.1, desc="πŸ“€ Uploading document...")
with open(file.name if hasattr(file, 'name') else file, 'rb') as f:
files = {'file': (os.path.basename(file.name if hasattr(file, 'name') else file), f)}
params = {'collection_name': collection_name}
progress(0.4, desc="πŸ”„ Processing document...")
response = make_request_with_retry(
"POST",
"/upload_document",
files=files,
params=params,
timeout=180
)
progress(1, desc="βœ… Upload complete!")
result = response.json()
status_msg = f"""βœ… Successfully uploaded '{result['filename']}'
πŸ“Š Created {result['chunks_created']} chunks
πŸ—‚οΈ Collection: {collection_name}
πŸ“ File Type: {result['file_type']}"""
return status_msg, result
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"❌ Upload failed: {str(e)}", duration=10)
def upload_multiple_documents(files, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
"""Upload multiple documents with progress tracking"""
if not files or len(files) == 0:
raise gr.Error("❌ Please select files to upload", duration=5)
if not collection_name:
collection_name = "default"
try:
progress(0.1, desc=f"πŸ“€ Uploading {len(files)} documents...")
files_to_upload = []
for idx, file in enumerate(files):
file_path = file.name if hasattr(file, 'name') else file
with open(file_path, 'rb') as f:
file_content = f.read()
files_to_upload.append(
('files', (os.path.basename(file_path), file_content))
)
progress((idx + 1) / len(files) * 0.5, desc=f"Reading file {idx + 1}/{len(files)}...")
progress(0.5, desc="πŸ”„ Processing all documents...")
params = {'collection_name': collection_name}
response = make_request_with_retry(
"POST",
"/upload_multiple_documents",
files=files_to_upload,
params=params,
timeout=300
)
progress(1, desc="βœ… All uploads complete!")
result = response.json()
status_msg = f"""βœ… Successfully uploaded {result['successful_uploads']} files
❌ Failed: {result['failed_uploads']}
πŸ—‚οΈ Collection: {collection_name}"""
return status_msg, result
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"❌ Upload failed: {str(e)}", duration=10)
def query_rag_documents(query: str, collection_name: str, top_k: int, progress=gr.Progress()) -> Tuple[str, str, dict]:
"""Query documents with progress tracking"""
if not query:
raise gr.Error("❌ Please enter a query", duration=5)
if not collection_name:
raise gr.Error("❌ Please select a collection", duration=5)
try:
progress(0.3, desc="πŸ” Searching documents...")
response = make_request_with_retry(
"POST",
"/query_documents",
json={
"query": query,
"collection_name": collection_name,
"top_k": top_k
},
timeout=60
)
progress(0.8, desc="πŸ€– Generating answer...")
result = response.json()
progress(1, desc="βœ… Complete!")
if "I couldn't find this information" in result['answer']:
status_msg = "⚠️ No relevant information found in documents"
else:
status_msg = f"βœ… Found relevant information from {len(result['sources'])} sources"
answer_text = f"**Query:** {result['query']}\n\n**Answer:** {result['answer']}"
return status_msg, answer_text, result['sources']
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"❌ Query failed: {str(e)}", duration=10)
def list_all_collections() -> Tuple[str, dict, gr.Dropdown]:
"""List all collections with error handling"""
try:
response = make_request_with_retry("GET", "/list_collections", timeout=10)
result = response.json()
collections = result['collections']
if not collections:
return "πŸ“‚ No collections found. Upload documents to create a collection.", None, gr.Dropdown(choices=["default"], value="default")
summary = f"πŸ“Š **Total Collections:** {len(collections)}\n\n"
for col in collections:
summary += f"πŸ—‚οΈ **{col['collection_name']}**\n"
summary += f" - Chunks: {col['total_chunks']}\n"
summary += f" - Dimension: {col['dimension']}\n\n"
collection_names = [col['collection_name'] for col in collections]
return summary, result, gr.Dropdown(choices=collection_names, value=collection_names[0] if collection_names else "default")
except Exception as e:
raise gr.Error(f"❌ Failed to list collections: {str(e)}", duration=10)
def delete_collection(collection_name: str) -> Tuple[str, str, dict, gr.Dropdown]:
"""Delete collection with confirmation"""
if not collection_name:
raise gr.Error("❌ Please select a collection to delete", duration=5)
try:
response = make_request_with_retry(
"DELETE",
f"/delete_collection/{collection_name}",
timeout=10
)
status = f"βœ… Successfully deleted collection '{collection_name}'"
# Refresh collections list
summary, result, dropdown = list_all_collections()
return status, summary, result, dropdown
except Exception as e:
raise gr.Error(f"❌ Failed to delete collection: {str(e)}", duration=10)
def get_system_health() -> Tuple[str, dict]:
"""Get system health information"""
try:
is_healthy, health_data = check_api_health()
if not is_healthy:
raise gr.Error("❌ System is offline. Please refresh the page.", duration=None)
health_summary = f"""🟒 **System Status: Healthy**
πŸ“Š **Configuration:**
- Supabase: {'βœ… Configured' if health_data.get('supabase_configured') else '❌ Not Configured'}
- Groq API: {'βœ… Configured' if health_data.get('groq_configured') else '❌ Not Configured'}
- Embedding Model: {health_data.get('embedding_model', 'N/A')}
πŸ“ **Vector Stores:**
- Total Collections: {health_data.get('vector_stores', 0)}
- Total Chunks: {health_data.get('total_chunks', 0)}
- Storage Path: {health_data.get('persistent_storage', 'N/A')}
πŸ“š **Available Collections:**
{', '.join(health_data.get('collections', [])) if health_data.get('collections') else 'None'}
"""
return health_summary, health_data
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"❌ Health check failed: {str(e)}", duration=10)
# ==================== GRADIO UI ====================
# Custom CSS (same as before)
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
* {
font-family: 'Inter', sans-serif !important;
}
.gradio-container {
background: linear-gradient(-45deg, #0f0c29, #302b63, #24243e, #1a1a2e);
background-size: 400% 400%;
animation: gradientShift 15s ease infinite;
}
@keyframes gradientShift {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
.main-container {
backdrop-filter: blur(20px);
background: rgba(20, 20, 30, 0.85);
border-radius: 24px;
padding: 2rem;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
border: 1px solid rgba(255, 255, 255, 0.1);
animation: fadeInUp 0.8s ease;
}
@keyframes fadeInUp {
from { opacity: 0; transform: translateY(30px); }
to { opacity: 1; transform: translateY(0); }
}
.animated-title {
background: linear-gradient(135deg, #00f2fe 0%, #4facfe 50%, #00c6ff 100%);
background-size: 200% 200%;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
animation: gradientText 3s ease infinite;
font-weight: 700;
font-size: 3.5rem;
text-align: center;
margin-bottom: 0.5rem;
filter: drop-shadow(0 0 20px rgba(0, 242, 254, 0.5));
}
@keyframes gradientText {
0% { background-position: 0% 50%; }
50% { background-position: 100% 50%; }
100% { background-position: 0% 50%; }
}
.floating-icon {
animation: float 3s ease-in-out infinite;
display: inline-block;
}
@keyframes float {
0%, 100% { transform: translateY(0px); }
50% { transform: translateY(-10px); }
}
textarea, input[type="text"] {
font-size: 1.1rem !important;
border-radius: 12px !important;
border: 2px solid rgba(0, 242, 254, 0.3) !important;
transition: all 0.3s ease !important;
background: rgba(30, 30, 45, 0.9) !important;
color: #e0e0e0 !important;
}
textarea:focus, input[type="text"]:focus {
border-color: #00f2fe !important;
box-shadow: 0 0 20px rgba(0, 242, 254, 0.4) !important;
transform: translateY(-2px);
background: rgba(35, 35, 50, 0.95) !important;
}
label {
font-weight: 600 !important;
color: #b0b0b0 !important;
font-size: 1.1rem !important;
}
.gr-button {
background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
color: #0a0a0f !important;
border: none !important;
border-radius: 12px !important;
padding: 0.75rem 2rem !important;
font-size: 1.1rem !important;
font-weight: 600 !important;
transition: all 0.3s ease !important;
box-shadow: 0 4px 15px rgba(0, 242, 254, 0.4) !important;
}
.gr-button:hover {
transform: translateY(-3px) !important;
box-shadow: 0 6px 25px rgba(0, 242, 254, 0.6) !important;
}
.output-box {
background: rgba(30, 30, 45, 0.95) !important;
border-radius: 16px !important;
border: 1px solid rgba(0, 242, 254, 0.2) !important;
backdrop-filter: blur(10px);
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3) !important;
color: #e0e0e0 !important;
padding: 1.5rem !important;
min-height: 150px !important;
}
.output-box strong {
color: #4facfe !important;
font-weight: 600 !important;
}
"""
# Build interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as interface:
gr.HTML("""
<div class="main-container">
<h1 class="animated-title">
<span class="floating-icon">🌐</span> Advanced RAG Assistant
</h1>
<p class="subtitle" style="color: #b0b0b0; font-size: 1.3rem; text-align: center; margin-bottom: 2rem;">
Upload documents (PDF/Markdown) or extract from web pages - Ask questions using AI-powered retrieval
</p>
</div>
""")
with gr.Tabs() as tabs:
# Web Scraping Tabs
with gr.Tab("πŸ”— Multiple Links Analysis"):
with gr.Row():
with gr.Column():
url_input_multi = gr.Textbox(label="🌍 Website URL", placeholder="https://example.com")
prompt_input_multi = gr.Textbox(label="πŸ’­ Your Question", placeholder="What is this website about?", lines=3)
submit_btn_multi = gr.Button("✨ Analyze Multiple Links", variant="primary")
with gr.Row():
with gr.Column():
status_output_multi = gr.Textbox(label="πŸ“Š Status", elem_classes="output-box")
response_output_multi = gr.Markdown(label="πŸ€– AI Response", elem_classes="output-box")
sources_output_multi = gr.JSON(label="πŸ“š Sources", elem_classes="output-box")
submit_btn_multi.click(
fn=process_multiple_links,
inputs=[url_input_multi, prompt_input_multi],
outputs=[status_output_multi, response_output_multi, sources_output_multi]
)
with gr.Tab("🏠 Homepage Only Analysis"):
with gr.Row():
with gr.Column():
url_input_home = gr.Textbox(label="🌍 Website URL", placeholder="https://example.com")
prompt_input_home = gr.Textbox(label="πŸ’­ Your Question", placeholder="What is this website about?", lines=3)
submit_btn_home = gr.Button("✨ Analyze Homepage", variant="primary")
with gr.Row():
with gr.Column():
status_output_home = gr.Textbox(label="πŸ“Š Status", elem_classes="output-box")
response_output_home = gr.Markdown(label="πŸ€– AI Response", elem_classes="output-box")
sources_output_home = gr.JSON(label="πŸ“š Sources", elem_classes="output-box")
submit_btn_home.click(
fn=process_homepage_only,
inputs=[url_input_home, prompt_input_home],
outputs=[status_output_home, response_output_home, sources_output_home]
)
# Document Upload Tab
with gr.Tab("πŸ“„ Document Upload & Query"):
gr.Markdown("""
### Upload PDF or Markdown documents and query them using RAG
- Supports **PDF** and **Markdown** files
- Documents are chunked and stored in FAISS vector database
- Organize documents into collections for better management
""")
with gr.Row():
with gr.Column():
gr.Markdown("#### πŸ“€ Upload Documents")
collection_name_upload = gr.Textbox(
label="πŸ—‚οΈ Collection Name",
placeholder="default",
value="default"
)
with gr.Tab("Single File"):
file_upload_single = gr.File(
label="πŸ“ Select Document (PDF/Markdown)",
file_types=[".pdf", ".md", ".txt"]
)
upload_btn_single = gr.Button("πŸ“€ Upload Single Document", variant="primary")
upload_status_single = gr.Textbox(label="πŸ“Š Upload Status", elem_classes="output-box")
upload_result_single = gr.JSON(label="πŸ“‹ Upload Details", elem_classes="output-box")
with gr.Tab("Multiple Files"):
file_upload_multi = gr.File(
label="πŸ“ Select Documents (PDF/Markdown)",
file_count="multiple",
file_types=[".pdf", ".md", ".txt"]
)
upload_btn_multi_doc = gr.Button("πŸ“€ Upload Multiple Documents", variant="primary")
upload_status_multi = gr.Textbox(label="πŸ“Š Upload Status", elem_classes="output-box")
upload_result_multi = gr.JSON(label="πŸ“‹ Upload Details", elem_classes="output-box")
with gr.Column():
gr.Markdown("#### πŸ” Query Documents")
refresh_btn = gr.Button("πŸ”„ Refresh Collections", variant="secondary")
collection_dropdown = gr.Dropdown(
label="πŸ—‚οΈ Select Collection",
choices=["default"],
value="default"
)
query_input = gr.Textbox(
label="πŸ’­ Your Question",
placeholder="Ask a question about your documents...",
lines=3
)
top_k_slider = gr.Slider(
minimum=1,
maximum=10,
value=3,
step=1,
label="πŸ“Š Number of Sources (top-k)"
)
query_btn = gr.Button("πŸ” Search Documents", variant="primary")
query_status = gr.Textbox(label="πŸ“Š Query Status", elem_classes="output-box")
query_response = gr.Markdown(label="πŸ€– AI Answer", elem_classes="output-box")
query_sources = gr.JSON(label="πŸ“š Source Citations", elem_classes="output-box")
# Connect buttons
upload_btn_single.click(
fn=upload_single_document,
inputs=[file_upload_single, collection_name_upload],
outputs=[upload_status_single, upload_result_single]
)
upload_btn_multi_doc.click(
fn=upload_multiple_documents,
inputs=[file_upload_multi, collection_name_upload],
outputs=[upload_status_multi, upload_result_multi]
)
query_btn.click(
fn=query_rag_documents,
inputs=[query_input, collection_dropdown, top_k_slider],
outputs=[query_status, query_response, query_sources]
)
def refresh_collections():
_, _, dropdown = list_all_collections()
return dropdown
refresh_btn.click(
fn=refresh_collections,
outputs=[collection_dropdown]
)
# Collection Management Tab
with gr.Tab("πŸ—‚οΈ Collection Management"):
gr.Markdown("### Manage Your Document Collections")
with gr.Row():
with gr.Column():
list_btn = gr.Button("πŸ“‹ List All Collections", variant="primary")
collections_output = gr.Markdown(label="πŸ“Š Collections Overview", elem_classes="output-box")
collections_json = gr.JSON(label="πŸ“‹ Detailed Information", elem_classes="output-box")
with gr.Column():
gr.Markdown("#### πŸ—‘οΈ Delete Collection")
collection_to_delete = gr.Dropdown(label="πŸ—‚οΈ Select Collection to Delete", choices=["default"])
delete_btn = gr.Button("πŸ—‘οΈ Delete Collection", variant="stop")
delete_status = gr.Textbox(label="πŸ“Š Status", elem_classes="output-box")
list_btn.click(
fn=list_all_collections,
outputs=[collections_output, collections_json, collection_to_delete]
)
delete_btn.click(
fn=delete_collection,
inputs=[collection_to_delete],
outputs=[delete_status, collections_output, collections_json, collection_to_delete]
)
# System Health Tab
with gr.Tab("βš™οΈ System Health"):
gr.Markdown("### System Status & Configuration")
health_check_btn = gr.Button("πŸ” Check System Health", variant="primary")
health_output = gr.Markdown(label="🟒 System Status", elem_classes="output-box")
health_json = gr.JSON(label="πŸ“‹ Detailed Configuration", elem_classes="output-box")
health_check_btn.click(
fn=get_system_health,
outputs=[health_output, health_json]
)
gr.HTML("""
<div class="note-box" style="margin-top: 2rem; background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%); border-radius: 12px; padding: 1rem; border-left: 4px solid #00f2fe;">
<p style="margin: 0; font-size: 1.05rem; color: #00c6ff;">
ℹ️ <strong>Note:</strong> This app features automatic retry logic and progress tracking.
If you encounter errors, the system will automatically retry. Large files may take longer to process.
</p>
</div>
""")
# Launch configuration for Hugging Face Spaces
if __name__ == "__main__":
# Check API health on startup
is_healthy, _ = check_api_health(max_attempts=5)
if not is_healthy:
print("⚠️ Warning: API is not responding. The app will launch but may not work correctly.")
interface.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True,
show_api=False
)