LordTenson's picture
Rename gradio_space_app.py to app.py
a802a97 verified
"""
Hugging Face Space - Arabic EOU Detection Demo
File: app.py
This creates an interactive web demo for your model
"""
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# ============================================================================
# LOAD MODEL
# ============================================================================
MODEL_NAME = "LordTenson/Saudi-EOU" # Replace with your model name
print("Loading model...")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()
print(f"โœ… Model loaded on {device}")
except Exception as e:
print(f"Error loading model: {e}")
print("Falling back to local model...")
MODEL_NAME = "./arabert_eou_final"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
device = "cpu"
model.to(device)
model.eval()
# ============================================================================
# PREDICTION FUNCTION
# ============================================================================
def predict_eou(text, threshold=0.5):
"""
Predict if text is end-of-utterance
Args:
text: Arabic text to analyze
threshold: Confidence threshold
Returns:
Prediction result and confidence
"""
if not text or len(text.strip()) == 0:
return "โŒ Please enter some text", 0.0, 0.0
# Tokenize
inputs = tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=256,
padding=True
)
# Move to device
inputs = {k: v.to(device) for k, v in inputs.items()}
# Inference
with torch.no_grad():
outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=1)
# Get probabilities
not_eou_prob = probs[0][0].item()
eou_prob = probs[0][1].item()
# Determine result
is_eou = eou_prob >= threshold
if is_eou:
result = f"โœ… **END OF TURN** - Speaker has finished"
color = "green"
else:
result = f"โณ **CONTINUE** - Speaker is still talking"
color = "orange"
# Return results
return result, eou_prob, not_eou_prob
# ============================================================================
# GRADIO INTERFACE
# ============================================================================
def create_demo():
"""Create Gradio interface"""
with gr.Blocks(title="Arabic EOU Detection", theme=gr.themes.Soft()) as demo:
# Header
gr.Markdown("""
# ๐ŸŽค Arabic End-of-Utterance Detection
This model detects whether a speaker has finished their turn in Arabic conversations.
Fine-tuned AraBERT model on Saudi dialect conversations.
**Use Case**: Real-time voice agents, conversation systems, live transcription
""")
with gr.Row():
with gr.Column(scale=2):
# Input
text_input = gr.Textbox(
label="Enter Arabic Text",
placeholder="ู…ุซุงู„: ุงู„ุณู„ุงู… ุนู„ูŠูƒู… ูƒูŠู ุญุงู„ูƒ",
lines=3,
rtl=True # Right-to-left for Arabic
)
threshold_slider = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.5,
step=0.05,
label="Detection Threshold",
info="Lower = more sensitive, Higher = less sensitive"
)
submit_btn = gr.Button("๐Ÿ” Analyze", variant="primary")
with gr.Column(scale=1):
# Output
result_output = gr.Markdown(label="Prediction")
with gr.Row():
eou_prob = gr.Number(label="EOU Probability", precision=3)
not_eou_prob = gr.Number(label="Not-EOU Probability", precision=3)
# Examples
gr.Markdown("### ๐Ÿ“ Try These Examples:")
gr.Examples(
examples=[
["ุงู„ุณู„ุงู… ุนู„ูŠูƒู… ูƒูŠู ุญุงู„ูƒ", 0.5],
["ุฃู†ุง ุฑุงูŠุญ", 0.5],
["ุดูƒุฑุงู‹ ู„ูƒ ูˆุงู„ู„ู‡", 0.5],
["ูŠุนู†ูŠ ู…ุซู„ุงู‹", 0.5],
["ุชู…ุงู… ูู‡ู…ุช ุนู„ูŠูƒ", 0.5],
["ุฃุจูŠ ุฃู‚ูˆู„ ู„ูƒ", 0.5],
["ูˆุงู„ู„ู‡ ู…ุง ุฃุฏุฑูŠ ูƒูŠู", 0.5],
["ุฎู„ุงุต ุงู†ุชู‡ูŠู†ุง ู…ู† ุงู„ู…ูˆุถูˆุน", 0.5],
],
inputs=[text_input, threshold_slider],
outputs=[result_output, eou_prob, not_eou_prob],
fn=predict_eou,
cache_examples=False,
)
# Model Info
with gr.Accordion("โ„น๏ธ Model Information", open=False):
gr.Markdown(f"""
### Model Details
- **Base Model**: aubmindlab/bert-base-arabertv2
- **Fine-tuned on**: Saudi Arabic dialect conversations
- **Accuracy**: 62%
- **F1 Score**: 0.62 (balanced)
- **Latency**: ~45ms average
### How It Works
1. The model analyzes Arabic text
2. Predicts probability of turn completion
3. If probability > threshold โ†’ Turn ends
4. Used in real-time voice agents for natural conversations
### Classes
- **EOU (End-of-Utterance)**: Speaker has finished their turn
- **Not-EOU**: Speaker is continuing, more words expected
### Links
- ๐Ÿค— [Model on Hugging Face]({MODEL_NAME})
- ๐Ÿ“Š [Dataset](your-dataset-link)
- ๐Ÿ’ป [GitHub Repository](your-github-link)
""")
# Connect interface
submit_btn.click(
fn=predict_eou,
inputs=[text_input, threshold_slider],
outputs=[result_output, eou_prob, not_eou_prob]
)
text_input.submit(
fn=predict_eou,
inputs=[text_input, threshold_slider],
outputs=[result_output, eou_prob, not_eou_prob]
)
return demo
# ============================================================================
# LAUNCH
# ============================================================================
if __name__ == "__main__":
demo = create_demo()
demo.launch()