Spaces:
Sleeping
Sleeping
| """ | |
| Hugging Face Space - Arabic EOU Detection Demo | |
| File: app.py | |
| This creates an interactive web demo for your model | |
| """ | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| # ============================================================================ | |
| # LOAD MODEL | |
| # ============================================================================ | |
| MODEL_NAME = "LordTenson/Saudi-EOU" # Replace with your model name | |
| print("Loading model...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| model.eval() | |
| print(f"โ Model loaded on {device}") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| print("Falling back to local model...") | |
| MODEL_NAME = "./arabert_eou_final" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
| device = "cpu" | |
| model.to(device) | |
| model.eval() | |
| # ============================================================================ | |
| # PREDICTION FUNCTION | |
| # ============================================================================ | |
| def predict_eou(text, threshold=0.5): | |
| """ | |
| Predict if text is end-of-utterance | |
| Args: | |
| text: Arabic text to analyze | |
| threshold: Confidence threshold | |
| Returns: | |
| Prediction result and confidence | |
| """ | |
| if not text or len(text.strip()) == 0: | |
| return "โ Please enter some text", 0.0, 0.0 | |
| # Tokenize | |
| inputs = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=256, | |
| padding=True | |
| ) | |
| # Move to device | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| # Inference | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probs = torch.softmax(outputs.logits, dim=1) | |
| # Get probabilities | |
| not_eou_prob = probs[0][0].item() | |
| eou_prob = probs[0][1].item() | |
| # Determine result | |
| is_eou = eou_prob >= threshold | |
| if is_eou: | |
| result = f"โ **END OF TURN** - Speaker has finished" | |
| color = "green" | |
| else: | |
| result = f"โณ **CONTINUE** - Speaker is still talking" | |
| color = "orange" | |
| # Return results | |
| return result, eou_prob, not_eou_prob | |
| # ============================================================================ | |
| # GRADIO INTERFACE | |
| # ============================================================================ | |
| def create_demo(): | |
| """Create Gradio interface""" | |
| with gr.Blocks(title="Arabic EOU Detection", theme=gr.themes.Soft()) as demo: | |
| # Header | |
| gr.Markdown(""" | |
| # ๐ค Arabic End-of-Utterance Detection | |
| This model detects whether a speaker has finished their turn in Arabic conversations. | |
| Fine-tuned AraBERT model on Saudi dialect conversations. | |
| **Use Case**: Real-time voice agents, conversation systems, live transcription | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Input | |
| text_input = gr.Textbox( | |
| label="Enter Arabic Text", | |
| placeholder="ู ุซุงู: ุงูุณูุงู ุนูููู ููู ุญุงูู", | |
| lines=3, | |
| rtl=True # Right-to-left for Arabic | |
| ) | |
| threshold_slider = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.5, | |
| step=0.05, | |
| label="Detection Threshold", | |
| info="Lower = more sensitive, Higher = less sensitive" | |
| ) | |
| submit_btn = gr.Button("๐ Analyze", variant="primary") | |
| with gr.Column(scale=1): | |
| # Output | |
| result_output = gr.Markdown(label="Prediction") | |
| with gr.Row(): | |
| eou_prob = gr.Number(label="EOU Probability", precision=3) | |
| not_eou_prob = gr.Number(label="Not-EOU Probability", precision=3) | |
| # Examples | |
| gr.Markdown("### ๐ Try These Examples:") | |
| gr.Examples( | |
| examples=[ | |
| ["ุงูุณูุงู ุนูููู ููู ุญุงูู", 0.5], | |
| ["ุฃูุง ุฑุงูุญ", 0.5], | |
| ["ุดูุฑุงู ูู ูุงููู", 0.5], | |
| ["ูุนูู ู ุซูุงู", 0.5], | |
| ["ุชู ุงู ููู ุช ุนููู", 0.5], | |
| ["ุฃุจู ุฃููู ูู", 0.5], | |
| ["ูุงููู ู ุง ุฃุฏุฑู ููู", 0.5], | |
| ["ุฎูุงุต ุงูุชูููุง ู ู ุงูู ูุถูุน", 0.5], | |
| ], | |
| inputs=[text_input, threshold_slider], | |
| outputs=[result_output, eou_prob, not_eou_prob], | |
| fn=predict_eou, | |
| cache_examples=False, | |
| ) | |
| # Model Info | |
| with gr.Accordion("โน๏ธ Model Information", open=False): | |
| gr.Markdown(f""" | |
| ### Model Details | |
| - **Base Model**: aubmindlab/bert-base-arabertv2 | |
| - **Fine-tuned on**: Saudi Arabic dialect conversations | |
| - **Accuracy**: 62% | |
| - **F1 Score**: 0.62 (balanced) | |
| - **Latency**: ~45ms average | |
| ### How It Works | |
| 1. The model analyzes Arabic text | |
| 2. Predicts probability of turn completion | |
| 3. If probability > threshold โ Turn ends | |
| 4. Used in real-time voice agents for natural conversations | |
| ### Classes | |
| - **EOU (End-of-Utterance)**: Speaker has finished their turn | |
| - **Not-EOU**: Speaker is continuing, more words expected | |
| ### Links | |
| - ๐ค [Model on Hugging Face]({MODEL_NAME}) | |
| - ๐ [Dataset](your-dataset-link) | |
| - ๐ป [GitHub Repository](your-github-link) | |
| """) | |
| # Connect interface | |
| submit_btn.click( | |
| fn=predict_eou, | |
| inputs=[text_input, threshold_slider], | |
| outputs=[result_output, eou_prob, not_eou_prob] | |
| ) | |
| text_input.submit( | |
| fn=predict_eou, | |
| inputs=[text_input, threshold_slider], | |
| outputs=[result_output, eou_prob, not_eou_prob] | |
| ) | |
| return demo | |
| # ============================================================================ | |
| # LAUNCH | |
| # ============================================================================ | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| demo.launch() | |