Spaces:

MohamedFahim
/

URL-To-Answer

Sleeping

App Files Files Community

URL-To-Answer / interface.py

MohamedFahim

Update interface.py

6aef3db verified about 2 months ago

raw

history blame

27.8 kB

	import gradio as gr
	import requests
	import time
	import os
	import json
	from typing import Optional, Tuple, List


	# ==================== API CONFIGURATION ====================

	# For Hugging Face Spaces, both Gradio and FastAPI run in same container
	API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
	MAX_RETRIES = 3
	RETRY_DELAY = 5

	# ==================== UTILITY FUNCTIONS ====================

	def check_api_health(max_attempts=3) -> Tuple[bool, dict]:
	"""Check API health with retry logic"""
	for attempt in range(max_attempts):
	try:
	response = requests.get(f"{API_BASE_URL}/health_check", timeout=10)
	if response.status_code == 200:
	return True, response.json()
	except Exception as e:
	if attempt < max_attempts - 1:
	time.sleep(2)
	return False, {}

	def make_request_with_retry(method: str, endpoint: str, **kwargs) -> requests.Response:
	"""Make API request with retry logic"""
	url = f"{API_BASE_URL}{endpoint}"

	for attempt in range(MAX_RETRIES):
	try:
	if method.upper() == "GET":
	response = requests.get(url, **kwargs)
	elif method.upper() == "POST":
	response = requests.post(url, **kwargs)
	elif method.upper() == "DELETE":
	response = requests.delete(url, **kwargs)
	else:
	raise ValueError(f"Unsupported method: {method}")

	if response.status_code in [200, 201]:
	return response
	elif response.status_code == 500 and attempt < MAX_RETRIES - 1:
	time.sleep(RETRY_DELAY)
	continue
	else:
	response.raise_for_status()

	except requests.exceptions.ConnectionError:
	if attempt < MAX_RETRIES - 1:
	time.sleep(RETRY_DELAY)
	else:
	raise gr.Error("❌ Cannot connect to API. Please refresh and try again.", duration=10)
	except requests.exceptions.Timeout:
	if attempt < MAX_RETRIES - 1:
	time.sleep(RETRY_DELAY)
	else:
	raise gr.Error("⏱️ Request timeout. Please try again.", duration=10)
	except Exception as e:
	if attempt == MAX_RETRIES - 1:
	raise gr.Error(f"❌ Error: {str(e)}", duration=10)

	raise gr.Error("❌ Maximum retries exceeded. Please try again later.", duration=10)

	# ==================== WEB SCRAPING FUNCTIONS ====================

	def extract_links(url: str, progress=gr.Progress()) -> List[str]:
	"""Extract links from URL with progress tracking"""
	if not url:
	raise gr.Error("❌ Please provide a URL", duration=5)

	progress(0, desc="Connecting to website...")

	try:
	response = make_request_with_retry(
	"POST",
	"/extract_links",
	json={"url": url},
	timeout=30
	)
	progress(1, desc="Links extracted!")
	return response.json()["unique_links"]
	except Exception as e:
	raise gr.Error(f"❌ Failed to extract links: {str(e)}", duration=10)

	def extract_text(urls: List[str], progress=gr.Progress()) -> str:
	"""Extract text from URLs with progress tracking"""
	progress(0, desc="Starting text extraction...")

	try:
	response = make_request_with_retry(
	"POST",
	"/extract_text",
	json=urls,
	timeout=120
	)
	progress(1, desc="Text extraction complete!")
	return response.json()["file_saved"]
	except Exception as e:
	raise gr.Error(f"❌ Failed to extract text: {str(e)}", duration=10)

	def perform_rag(file_path: str, prompt: str, progress=gr.Progress()) -> dict:
	"""Perform RAG with progress tracking"""
	progress(0.3, desc="Analyzing content...")

	try:
	response = make_request_with_retry(
	"POST",
	"/rag",
	json={"file_path": file_path, "prompt": prompt},
	timeout=60
	)
	progress(1, desc="Analysis complete!")
	return response.json()
	except Exception as e:
	raise gr.Error(f"❌ Failed to perform RAG: {str(e)}", duration=10)

	def process_multiple_links(url: str, prompt: str, progress=gr.Progress()) -> Tuple[str, str, dict]:
	"""Process multiple links with comprehensive progress tracking"""
	if not url or not prompt:
	raise gr.Error("❌ Please provide both URL and prompt", duration=5)

	try:
	progress(0, desc="🔍 Extracting links from webpage...")
	links = extract_links(url, progress)

	progress(0.3, desc=f"📄 Found {len(links)} links. Processing top 5...")
	sample_links = links[:5]
	file_path = extract_text(sample_links, progress)

	progress(0.7, desc="🤖 Generating AI response...")
	result = perform_rag(file_path, prompt, progress)

	progress(1, desc="✅ Complete!")

	status_msg = f"✅ Processed {len(sample_links)} pages from {len(links)} total links found"
	response_text = f"Query: {result['user_query']}\n\nResponse: {result['assistant_response']}"

	return status_msg, response_text, result['sources']

	except gr.Error:
	raise
	except Exception as e:
	raise gr.Error(f"❌ Processing error: {str(e)}", duration=10)

	def process_homepage_only(url: str, prompt: str, progress=gr.Progress()) -> Tuple[str, str, dict]:
	"""Process homepage only with progress tracking"""
	if not url or not prompt:
	raise gr.Error("❌ Please provide both URL and prompt", duration=5)

	try:
	progress(0.2, desc="📄 Extracting homepage content...")
	file_path = extract_text([url], progress)

	progress(0.6, desc="🤖 Generating AI response...")
	result = perform_rag(file_path, prompt, progress)

	progress(1, desc="✅ Complete!")

	status_msg = "✅ Processed homepage content"
	response_text = f"Query: {result['user_query']}\n\nResponse: {result['assistant_response']}"

	return status_msg, response_text, result['sources']

	except gr.Error:
	raise
	except Exception as e:
	raise gr.Error(f"❌ Processing error: {str(e)}", duration=10)

	# ==================== DOCUMENT UPLOAD FUNCTIONS ====================

	def upload_single_document(file, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
	"""Upload single document with progress tracking"""
	if not file:
	raise gr.Error("❌ Please select a file to upload", duration=5)

	if not collection_name:
	collection_name = "default"

	try:
	progress(0.1, desc="📤 Uploading document...")

	with open(file.name if hasattr(file, 'name') else file, 'rb') as f:
	files = {'file': (os.path.basename(file.name if hasattr(file, 'name') else file), f)}
	params = {'collection_name': collection_name}

	progress(0.4, desc="🔄 Processing document...")
	response = make_request_with_retry(
	"POST",
	"/upload_document",
	files=files,
	params=params,
	timeout=180
	)

	progress(1, desc="✅ Upload complete!")

	result = response.json()
	status_msg = f"""✅ Successfully uploaded '{result['filename']}'
	📊 Created {result['chunks_created']} chunks
	🗂️ Collection: {collection_name}
	📁 File Type: {result['file_type']}"""

	return status_msg, result

	except gr.Error:
	raise
	except Exception as e:
	raise gr.Error(f"❌ Upload failed: {str(e)}", duration=10)

	def upload_multiple_documents(files, collection_name: str, progress=gr.Progress()) -> Tuple[str, dict]:
	"""Upload multiple documents with progress tracking"""
	if not files or len(files) == 0:
	raise gr.Error("❌ Please select files to upload", duration=5)

	if not collection_name:
	collection_name = "default"

	try:
	progress(0.1, desc=f"📤 Uploading {len(files)} documents...")

	files_to_upload = []
	for idx, file in enumerate(files):
	file_path = file.name if hasattr(file, 'name') else file
	with open(file_path, 'rb') as f:
	file_content = f.read()
	files_to_upload.append(
	('files', (os.path.basename(file_path), file_content))
	)
	progress((idx + 1) / len(files) * 0.5, desc=f"Reading file {idx + 1}/{len(files)}...")

	progress(0.5, desc="🔄 Processing all documents...")
	params = {'collection_name': collection_name}
	response = make_request_with_retry(
	"POST",
	"/upload_multiple_documents",
	files=files_to_upload,
	params=params,
	timeout=300
	)

	progress(1, desc="✅ All uploads complete!")

	result = response.json()
	status_msg = f"""✅ Successfully uploaded {result['successful_uploads']} files
	❌ Failed: {result['failed_uploads']}
	🗂️ Collection: {collection_name}"""

	return status_msg, result

	except gr.Error:
	raise
	except Exception as e:
	raise gr.Error(f"❌ Upload failed: {str(e)}", duration=10)

	def query_rag_documents(query: str, collection_name: str, top_k: int, progress=gr.Progress()) -> Tuple[str, str, dict]:
	"""Query documents with progress tracking"""
	if not query:
	raise gr.Error("❌ Please enter a query", duration=5)

	if not collection_name:
	raise gr.Error("❌ Please select a collection", duration=5)

	try:
	progress(0.3, desc="🔍 Searching documents...")

	response = make_request_with_retry(
	"POST",
	"/query_documents",
	json={
	"query": query,
	"collection_name": collection_name,
	"top_k": top_k
	},
	timeout=60
	)

	progress(0.8, desc="🤖 Generating answer...")
	result = response.json()

	progress(1, desc="✅ Complete!")

	if "I couldn't find this information" in result['answer']:
	status_msg = "⚠️ No relevant information found in documents"
	else:
	status_msg = f"✅ Found relevant information from {len(result['sources'])} sources"

	answer_text = f"Query: {result['query']}\n\nAnswer: {result['answer']}"

	return status_msg, answer_text, result['sources']

	except gr.Error:
	raise
	except Exception as e:
	raise gr.Error(f"❌ Query failed: {str(e)}", duration=10)

	def list_all_collections() -> Tuple[str, dict, gr.Dropdown]:
	"""List all collections with error handling"""
	try:
	response = make_request_with_retry("GET", "/list_collections", timeout=10)
	result = response.json()
	collections = result['collections']

	if not collections:
	return "📂 No collections found. Upload documents to create a collection.", None, gr.Dropdown(choices=["default"], value="default")

	summary = f"📊 Total Collections: {len(collections)}\n\n"
	for col in collections:
	summary += f"🗂️ {col['collection_name']}\n"
	summary += f" - Chunks: {col['total_chunks']}\n"
	summary += f" - Dimension: {col['dimension']}\n\n"

	collection_names = [col['collection_name'] for col in collections]

	return summary, result, gr.Dropdown(choices=collection_names, value=collection_names[0] if collection_names else "default")

	except Exception as e:
	raise gr.Error(f"❌ Failed to list collections: {str(e)}", duration=10)

	def delete_collection(collection_name: str) -> Tuple[str, str, dict, gr.Dropdown]:
	"""Delete collection with confirmation"""
	if not collection_name:
	raise gr.Error("❌ Please select a collection to delete", duration=5)

	try:
	response = make_request_with_retry(
	"DELETE",
	f"/delete_collection/{collection_name}",
	timeout=10
	)

	status = f"✅ Successfully deleted collection '{collection_name}'"

	# Refresh collections list
	summary, result, dropdown = list_all_collections()

	return status, summary, result, dropdown

	except Exception as e:
	raise gr.Error(f"❌ Failed to delete collection: {str(e)}", duration=10)

	def get_system_health() -> Tuple[str, dict]:
	"""Get system health information"""
	try:
	is_healthy, health_data = check_api_health()

	if not is_healthy:
	raise gr.Error("❌ System is offline. Please refresh the page.", duration=None)

	health_summary = f"""🟢 System Status: Healthy

	📊 Configuration:
	- Supabase: {'✅ Configured' if health_data.get('supabase_configured') else '❌ Not Configured'}
	- Groq API: {'✅ Configured' if health_data.get('groq_configured') else '❌ Not Configured'}
	- Embedding Model: {health_data.get('embedding_model', 'N/A')}

	📁 Vector Stores:
	- Total Collections: {health_data.get('vector_stores', 0)}
	- Total Chunks: {health_data.get('total_chunks', 0)}
	- Storage Path: {health_data.get('persistent_storage', 'N/A')}

	📚 Available Collections:
	{', '.join(health_data.get('collections', [])) if health_data.get('collections') else 'None'}
	"""
	return health_summary, health_data

	except gr.Error:
	raise
	except Exception as e:
	raise gr.Error(f"❌ Health check failed: {str(e)}", duration=10)

	# ==================== GRADIO UI ====================

	# Custom CSS (same as before)
	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
	* {
	font-family: 'Inter', sans-serif !important;
	}
	.gradio-container {
	background: linear-gradient(-45deg, #0f0c29, #302b63, #24243e, #1a1a2e);
	background-size: 400% 400%;
	animation: gradientShift 15s ease infinite;
	}
	@keyframes gradientShift {
	0% { background-position: 0% 50%; }
	50% { background-position: 100% 50%; }
	100% { background-position: 0% 50%; }
	}
	.main-container {
	backdrop-filter: blur(20px);
	background: rgba(20, 20, 30, 0.85);
	border-radius: 24px;
	padding: 2rem;
	box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
	border: 1px solid rgba(255, 255, 255, 0.1);
	animation: fadeInUp 0.8s ease;
	}
	@keyframes fadeInUp {
	from { opacity: 0; transform: translateY(30px); }
	to { opacity: 1; transform: translateY(0); }
	}
	.animated-title {
	background: linear-gradient(135deg, #00f2fe 0%, #4facfe 50%, #00c6ff 100%);
	background-size: 200% 200%;
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	animation: gradientText 3s ease infinite;
	font-weight: 700;
	font-size: 3.5rem;
	text-align: center;
	margin-bottom: 0.5rem;
	filter: drop-shadow(0 0 20px rgba(0, 242, 254, 0.5));
	}
	@keyframes gradientText {
	0% { background-position: 0% 50%; }
	50% { background-position: 100% 50%; }
	100% { background-position: 0% 50%; }
	}
	.floating-icon {
	animation: float 3s ease-in-out infinite;
	display: inline-block;
	}
	@keyframes float {
	0%, 100% { transform: translateY(0px); }
	50% { transform: translateY(-10px); }
	}
	textarea, input[type="text"] {
	font-size: 1.1rem !important;
	border-radius: 12px !important;
	border: 2px solid rgba(0, 242, 254, 0.3) !important;
	transition: all 0.3s ease !important;
	background: rgba(30, 30, 45, 0.9) !important;
	color: #e0e0e0 !important;
	}
	textarea:focus, input[type="text"]:focus {
	border-color: #00f2fe !important;
	box-shadow: 0 0 20px rgba(0, 242, 254, 0.4) !important;
	transform: translateY(-2px);
	background: rgba(35, 35, 50, 0.95) !important;
	}
	label {
	font-weight: 600 !important;
	color: #b0b0b0 !important;
	font-size: 1.1rem !important;
	}
	.gr-button {
	background: linear-gradient(135deg, #00f2fe 0%, #4facfe 100%) !important;
	color: #0a0a0f !important;
	border: none !important;
	border-radius: 12px !important;
	padding: 0.75rem 2rem !important;
	font-size: 1.1rem !important;
	font-weight: 600 !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 4px 15px rgba(0, 242, 254, 0.4) !important;
	}
	.gr-button:hover {
	transform: translateY(-3px) !important;
	box-shadow: 0 6px 25px rgba(0, 242, 254, 0.6) !important;
	}
	.output-box {
	background: rgba(30, 30, 45, 0.95) !important;
	border-radius: 16px !important;
	border: 1px solid rgba(0, 242, 254, 0.2) !important;
	backdrop-filter: blur(10px);
	box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3) !important;
	color: #e0e0e0 !important;
	padding: 1.5rem !important;
	min-height: 150px !important;
	}
	.output-box strong {
	color: #4facfe !important;
	font-weight: 600 !important;
	}
	"""

	# Build interface
	with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="RAG Assistant") as interface:
	gr.HTML("""
	<div class="main-container">
	<h1 class="animated-title">
	<span class="floating-icon">🌐</span> Advanced RAG Assistant
	</h1>
	<p class="subtitle" style="color: #b0b0b0; font-size: 1.3rem; text-align: center; margin-bottom: 2rem;">
	Upload documents (PDF/Markdown) or extract from web pages - Ask questions using AI-powered retrieval
	</p>
	</div>
	""")

	with gr.Tabs() as tabs:
	# Web Scraping Tabs
	with gr.Tab("🔗 Multiple Links Analysis"):
	with gr.Row():
	with gr.Column():
	url_input_multi = gr.Textbox(label="🌍 Website URL", placeholder="https://example.com")
	prompt_input_multi = gr.Textbox(label="💭 Your Question", placeholder="What is this website about?", lines=3)
	submit_btn_multi = gr.Button("✨ Analyze Multiple Links", variant="primary")

	with gr.Row():
	with gr.Column():
	status_output_multi = gr.Textbox(label="📊 Status", elem_classes="output-box")
	response_output_multi = gr.Markdown(label="🤖 AI Response", elem_classes="output-box")
	sources_output_multi = gr.JSON(label="📚 Sources", elem_classes="output-box")

	submit_btn_multi.click(
	fn=process_multiple_links,
	inputs=[url_input_multi, prompt_input_multi],
	outputs=[status_output_multi, response_output_multi, sources_output_multi]
	)

	with gr.Tab("🏠 Homepage Only Analysis"):
	with gr.Row():
	with gr.Column():
	url_input_home = gr.Textbox(label="🌍 Website URL", placeholder="https://example.com")
	prompt_input_home = gr.Textbox(label="💭 Your Question", placeholder="What is this website about?", lines=3)
	submit_btn_home = gr.Button("✨ Analyze Homepage", variant="primary")

	with gr.Row():
	with gr.Column():
	status_output_home = gr.Textbox(label="📊 Status", elem_classes="output-box")
	response_output_home = gr.Markdown(label="🤖 AI Response", elem_classes="output-box")
	sources_output_home = gr.JSON(label="📚 Sources", elem_classes="output-box")

	submit_btn_home.click(
	fn=process_homepage_only,
	inputs=[url_input_home, prompt_input_home],
	outputs=[status_output_home, response_output_home, sources_output_home]
	)

	# Document Upload Tab
	with gr.Tab("📄 Document Upload & Query"):
	gr.Markdown("""
	### Upload PDF or Markdown documents and query them using RAG
	- Supports PDF and Markdown files
	- Documents are chunked and stored in FAISS vector database
	- Organize documents into collections for better management
	""")

	with gr.Row():
	with gr.Column():
	gr.Markdown("#### 📤 Upload Documents")
	collection_name_upload = gr.Textbox(
	label="🗂️ Collection Name",
	placeholder="default",
	value="default"
	)

	with gr.Tab("Single File"):
	file_upload_single = gr.File(
	label="📁 Select Document (PDF/Markdown)",
	file_types=[".pdf", ".md", ".txt"]
	)
	upload_btn_single = gr.Button("📤 Upload Single Document", variant="primary")
	upload_status_single = gr.Textbox(label="📊 Upload Status", elem_classes="output-box")
	upload_result_single = gr.JSON(label="📋 Upload Details", elem_classes="output-box")

	with gr.Tab("Multiple Files"):
	file_upload_multi = gr.File(
	label="📁 Select Documents (PDF/Markdown)",
	file_count="multiple",
	file_types=[".pdf", ".md", ".txt"]
	)
	upload_btn_multi_doc = gr.Button("📤 Upload Multiple Documents", variant="primary")
	upload_status_multi = gr.Textbox(label="📊 Upload Status", elem_classes="output-box")
	upload_result_multi = gr.JSON(label="📋 Upload Details", elem_classes="output-box")

	with gr.Column():
	gr.Markdown("#### 🔍 Query Documents")

	refresh_btn = gr.Button("🔄 Refresh Collections", variant="secondary")

	collection_dropdown = gr.Dropdown(
	label="🗂️ Select Collection",
	choices=["default"],
	value="default"
	)

	query_input = gr.Textbox(
	label="💭 Your Question",
	placeholder="Ask a question about your documents...",
	lines=3
	)

	top_k_slider = gr.Slider(
	minimum=1,
	maximum=10,
	value=3,
	step=1,
	label="📊 Number of Sources (top-k)"
	)

	query_btn = gr.Button("🔍 Search Documents", variant="primary")

	query_status = gr.Textbox(label="📊 Query Status", elem_classes="output-box")
	query_response = gr.Markdown(label="🤖 AI Answer", elem_classes="output-box")
	query_sources = gr.JSON(label="📚 Source Citations", elem_classes="output-box")

	# Connect buttons
	upload_btn_single.click(
	fn=upload_single_document,
	inputs=[file_upload_single, collection_name_upload],
	outputs=[upload_status_single, upload_result_single]
	)

	upload_btn_multi_doc.click(
	fn=upload_multiple_documents,
	inputs=[file_upload_multi, collection_name_upload],
	outputs=[upload_status_multi, upload_result_multi]
	)

	query_btn.click(
	fn=query_rag_documents,
	inputs=[query_input, collection_dropdown, top_k_slider],
	outputs=[query_status, query_response, query_sources]
	)

	def refresh_collections():
	_, _, dropdown = list_all_collections()
	return dropdown

	refresh_btn.click(
	fn=refresh_collections,
	outputs=[collection_dropdown]
	)

	# Collection Management Tab
	with gr.Tab("🗂️ Collection Management"):
	gr.Markdown("### Manage Your Document Collections")

	with gr.Row():
	with gr.Column():
	list_btn = gr.Button("📋 List All Collections", variant="primary")
	collections_output = gr.Markdown(label="📊 Collections Overview", elem_classes="output-box")
	collections_json = gr.JSON(label="📋 Detailed Information", elem_classes="output-box")

	with gr.Column():
	gr.Markdown("#### 🗑️ Delete Collection")
	collection_to_delete = gr.Dropdown(label="🗂️ Select Collection to Delete", choices=["default"])
	delete_btn = gr.Button("🗑️ Delete Collection", variant="stop")
	delete_status = gr.Textbox(label="📊 Status", elem_classes="output-box")

	list_btn.click(
	fn=list_all_collections,
	outputs=[collections_output, collections_json, collection_to_delete]
	)

	delete_btn.click(
	fn=delete_collection,
	inputs=[collection_to_delete],
	outputs=[delete_status, collections_output, collections_json, collection_to_delete]
	)

	# System Health Tab
	with gr.Tab("⚙️ System Health"):
	gr.Markdown("### System Status & Configuration")

	health_check_btn = gr.Button("🔍 Check System Health", variant="primary")
	health_output = gr.Markdown(label="🟢 System Status", elem_classes="output-box")
	health_json = gr.JSON(label="📋 Detailed Configuration", elem_classes="output-box")

	health_check_btn.click(
	fn=get_system_health,
	outputs=[health_output, health_json]
	)

	gr.HTML("""
	<div class="note-box" style="margin-top: 2rem; background: linear-gradient(135deg, rgba(0, 242, 254, 0.08) 0%, rgba(79, 172, 254, 0.08) 100%); border-radius: 12px; padding: 1rem; border-left: 4px solid #00f2fe;">
	<p style="margin: 0; font-size: 1.05rem; color: #00c6ff;">
	ℹ️ <strong>Note:</strong> This app features automatic retry logic and progress tracking.
	If you encounter errors, the system will automatically retry. Large files may take longer to process.
	</p>
	</div>
	""")

	# Launch configuration for Hugging Face Spaces
	if __name__ == "__main__":
	# Check API health on startup
	is_healthy, _ = check_api_health(max_attempts=5)

	if not is_healthy:
	print("⚠️ Warning: API is not responding. The app will launch but may not work correctly.")

	interface.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True,
	show_api=False
	)