Spaces:

LordTenson
/

saudi-eou-detection

Sleeping

App Files Files Community

saudi-eou-detection / app.py

LordTenson

Rename gradio_space_app.py to app.py

a802a97 verified 26 days ago

raw

history blame contribute delete

6.79 kB

	"""
	Hugging Face Space - Arabic EOU Detection Demo
	File: app.py

	This creates an interactive web demo for your model
	"""

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	# ============================================================================
	# LOAD MODEL
	# ============================================================================

	MODEL_NAME = "LordTenson/Saudi-EOU" # Replace with your model name

	print("Loading model...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)
	model.eval()
	print(f"✅ Model loaded on {device}")
	except Exception as e:
	print(f"Error loading model: {e}")
	print("Falling back to local model...")
	MODEL_NAME = "./arabert_eou_final"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
	device = "cpu"
	model.to(device)
	model.eval()

	# ============================================================================
	# PREDICTION FUNCTION
	# ============================================================================

	def predict_eou(text, threshold=0.5):
	"""
	Predict if text is end-of-utterance

	Args:
	text: Arabic text to analyze
	threshold: Confidence threshold

	Returns:
	Prediction result and confidence
	"""
	if not text or len(text.strip()) == 0:
	return "❌ Please enter some text", 0.0, 0.0

	# Tokenize
	inputs = tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=256,
	padding=True
	)

	# Move to device
	inputs = {k: v.to(device) for k, v in inputs.items()}

	# Inference
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.softmax(outputs.logits, dim=1)

	# Get probabilities
	not_eou_prob = probs[0][0].item()
	eou_prob = probs[0][1].item()

	# Determine result
	is_eou = eou_prob >= threshold

	if is_eou:
	result = f"✅ END OF TURN - Speaker has finished"
	color = "green"
	else:
	result = f"⏳ CONTINUE - Speaker is still talking"
	color = "orange"

	# Return results
	return result, eou_prob, not_eou_prob

	# ============================================================================
	# GRADIO INTERFACE
	# ============================================================================

	def create_demo():
	"""Create Gradio interface"""

	with gr.Blocks(title="Arabic EOU Detection", theme=gr.themes.Soft()) as demo:

	# Header
	gr.Markdown("""
	# 🎤 Arabic End-of-Utterance Detection

	This model detects whether a speaker has finished their turn in Arabic conversations.
	Fine-tuned AraBERT model on Saudi dialect conversations.

	Use Case: Real-time voice agents, conversation systems, live transcription
	""")

	with gr.Row():
	with gr.Column(scale=2):
	# Input
	text_input = gr.Textbox(
	label="Enter Arabic Text",
	placeholder="مثال: السلام عليكم كيف حالك",
	lines=3,
	rtl=True # Right-to-left for Arabic
	)

	threshold_slider = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.5,
	step=0.05,
	label="Detection Threshold",
	info="Lower = more sensitive, Higher = less sensitive"
	)

	submit_btn = gr.Button("🔍 Analyze", variant="primary")

	with gr.Column(scale=1):
	# Output
	result_output = gr.Markdown(label="Prediction")

	with gr.Row():
	eou_prob = gr.Number(label="EOU Probability", precision=3)
	not_eou_prob = gr.Number(label="Not-EOU Probability", precision=3)

	# Examples
	gr.Markdown("### 📝 Try These Examples:")

	gr.Examples(
	examples=[
	["السلام عليكم كيف حالك", 0.5],
	["أنا رايح", 0.5],
	["شكراً لك والله", 0.5],
	["يعني مثلاً", 0.5],
	["تمام فهمت عليك", 0.5],
	["أبي أقول لك", 0.5],
	["والله ما أدري كيف", 0.5],
	["خلاص انتهينا من الموضوع", 0.5],
	],
	inputs=[text_input, threshold_slider],
	outputs=[result_output, eou_prob, not_eou_prob],
	fn=predict_eou,
	cache_examples=False,
	)

	# Model Info
	with gr.Accordion("ℹ️ Model Information", open=False):
	gr.Markdown(f"""
	### Model Details
	- Base Model: aubmindlab/bert-base-arabertv2
	- Fine-tuned on: Saudi Arabic dialect conversations
	- Accuracy: 62%
	- F1 Score: 0.62 (balanced)
	- Latency: ~45ms average

	### How It Works
	1. The model analyzes Arabic text
	2. Predicts probability of turn completion
	3. If probability > threshold → Turn ends
	4. Used in real-time voice agents for natural conversations

	### Classes
	- EOU (End-of-Utterance): Speaker has finished their turn
	- Not-EOU: Speaker is continuing, more words expected

	### Links
	- 🤗 [Model on Hugging Face]({MODEL_NAME})
	- 📊 [Dataset](your-dataset-link)
	- 💻 [GitHub Repository](your-github-link)
	""")

	# Connect interface
	submit_btn.click(
	fn=predict_eou,
	inputs=[text_input, threshold_slider],
	outputs=[result_output, eou_prob, not_eou_prob]
	)

	text_input.submit(
	fn=predict_eou,
	inputs=[text_input, threshold_slider],
	outputs=[result_output, eou_prob, not_eou_prob]
	)

	return demo

	# ============================================================================
	# LAUNCH
	# ============================================================================

	if __name__ == "__main__":
	demo = create_demo()
	demo.launch()