File size: 6,793 Bytes
a2d71c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
"""
Hugging Face Space - Arabic EOU Detection Demo
File: app.py

This creates an interactive web demo for your model
"""

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ============================================================================
# LOAD MODEL
# ============================================================================

MODEL_NAME = "LordTenson/Saudi-EOU"  # Replace with your model name

print("Loading model...")
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    model.eval()
    print(f"โœ… Model loaded on {device}")
except Exception as e:
    print(f"Error loading model: {e}")
    print("Falling back to local model...")
    MODEL_NAME = "./arabert_eou_final"
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
    device = "cpu"
    model.to(device)
    model.eval()

# ============================================================================
# PREDICTION FUNCTION
# ============================================================================

def predict_eou(text, threshold=0.5):
    """
    Predict if text is end-of-utterance
    
    Args:
        text: Arabic text to analyze
        threshold: Confidence threshold
    
    Returns:
        Prediction result and confidence
    """
    if not text or len(text.strip()) == 0:
        return "โŒ Please enter some text", 0.0, 0.0
    
    # Tokenize
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=256,
        padding=True
    )
    
    # Move to device
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Inference
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)
    
    # Get probabilities
    not_eou_prob = probs[0][0].item()
    eou_prob = probs[0][1].item()
    
    # Determine result
    is_eou = eou_prob >= threshold
    
    if is_eou:
        result = f"โœ… **END OF TURN** - Speaker has finished"
        color = "green"
    else:
        result = f"โณ **CONTINUE** - Speaker is still talking"
        color = "orange"
    
    # Return results
    return result, eou_prob, not_eou_prob

# ============================================================================
# GRADIO INTERFACE
# ============================================================================

def create_demo():
    """Create Gradio interface"""
    
    with gr.Blocks(title="Arabic EOU Detection", theme=gr.themes.Soft()) as demo:
        
        # Header
        gr.Markdown("""
        # ๐ŸŽค Arabic End-of-Utterance Detection
        
        This model detects whether a speaker has finished their turn in Arabic conversations.
        Fine-tuned AraBERT model on Saudi dialect conversations.
        
        **Use Case**: Real-time voice agents, conversation systems, live transcription
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                # Input
                text_input = gr.Textbox(
                    label="Enter Arabic Text",
                    placeholder="ู…ุซุงู„: ุงู„ุณู„ุงู… ุนู„ูŠูƒู… ูƒูŠู ุญุงู„ูƒ",
                    lines=3,
                    rtl=True  # Right-to-left for Arabic
                )
                
                threshold_slider = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.5,
                    step=0.05,
                    label="Detection Threshold",
                    info="Lower = more sensitive, Higher = less sensitive"
                )
                
                submit_btn = gr.Button("๐Ÿ” Analyze", variant="primary")
                
            with gr.Column(scale=1):
                # Output
                result_output = gr.Markdown(label="Prediction")
                
                with gr.Row():
                    eou_prob = gr.Number(label="EOU Probability", precision=3)
                    not_eou_prob = gr.Number(label="Not-EOU Probability", precision=3)
        
        # Examples
        gr.Markdown("### ๐Ÿ“ Try These Examples:")
        
        gr.Examples(
            examples=[
                ["ุงู„ุณู„ุงู… ุนู„ูŠูƒู… ูƒูŠู ุญุงู„ูƒ", 0.5],
                ["ุฃู†ุง ุฑุงูŠุญ", 0.5],
                ["ุดูƒุฑุงู‹ ู„ูƒ ูˆุงู„ู„ู‡", 0.5],
                ["ูŠุนู†ูŠ ู…ุซู„ุงู‹", 0.5],
                ["ุชู…ุงู… ูู‡ู…ุช ุนู„ูŠูƒ", 0.5],
                ["ุฃุจูŠ ุฃู‚ูˆู„ ู„ูƒ", 0.5],
                ["ูˆุงู„ู„ู‡ ู…ุง ุฃุฏุฑูŠ ูƒูŠู", 0.5],
                ["ุฎู„ุงุต ุงู†ุชู‡ูŠู†ุง ู…ู† ุงู„ู…ูˆุถูˆุน", 0.5],
            ],
            inputs=[text_input, threshold_slider],
            outputs=[result_output, eou_prob, not_eou_prob],
            fn=predict_eou,
            cache_examples=False,
        )
        
        # Model Info
        with gr.Accordion("โ„น๏ธ Model Information", open=False):
            gr.Markdown(f"""
            ### Model Details
            - **Base Model**: aubmindlab/bert-base-arabertv2
            - **Fine-tuned on**: Saudi Arabic dialect conversations
            - **Accuracy**: 62%
            - **F1 Score**: 0.62 (balanced)
            - **Latency**: ~45ms average
            
            ### How It Works
            1. The model analyzes Arabic text
            2. Predicts probability of turn completion
            3. If probability > threshold โ†’ Turn ends
            4. Used in real-time voice agents for natural conversations
            
            ### Classes
            - **EOU (End-of-Utterance)**: Speaker has finished their turn
            - **Not-EOU**: Speaker is continuing, more words expected
            
            ### Links
            - ๐Ÿค— [Model on Hugging Face]({MODEL_NAME})
            - ๐Ÿ“Š [Dataset](your-dataset-link)
            - ๐Ÿ’ป [GitHub Repository](your-github-link)
            """)
        
        # Connect interface
        submit_btn.click(
            fn=predict_eou,
            inputs=[text_input, threshold_slider],
            outputs=[result_output, eou_prob, not_eou_prob]
        )
        
        text_input.submit(
            fn=predict_eou,
            inputs=[text_input, threshold_slider],
            outputs=[result_output, eou_prob, not_eou_prob]
        )
    
    return demo

# ============================================================================
# LAUNCH
# ============================================================================

if __name__ == "__main__":
    demo = create_demo()
    demo.launch()