Spaces:
Sleeping
Sleeping
File size: 6,793 Bytes
a2d71c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 | """
Hugging Face Space - Arabic EOU Detection Demo
File: app.py
This creates an interactive web demo for your model
"""
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# ============================================================================
# LOAD MODEL
# ============================================================================
MODEL_NAME = "LordTenson/Saudi-EOU" # Replace with your model name
print("Loading model...")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()
print(f"โ
Model loaded on {device}")
except Exception as e:
print(f"Error loading model: {e}")
print("Falling back to local model...")
MODEL_NAME = "./arabert_eou_final"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
device = "cpu"
model.to(device)
model.eval()
# ============================================================================
# PREDICTION FUNCTION
# ============================================================================
def predict_eou(text, threshold=0.5):
"""
Predict if text is end-of-utterance
Args:
text: Arabic text to analyze
threshold: Confidence threshold
Returns:
Prediction result and confidence
"""
if not text or len(text.strip()) == 0:
return "โ Please enter some text", 0.0, 0.0
# Tokenize
inputs = tokenizer(
text,
return_tensors="pt",
truncation=True,
max_length=256,
padding=True
)
# Move to device
inputs = {k: v.to(device) for k, v in inputs.items()}
# Inference
with torch.no_grad():
outputs = model(**inputs)
probs = torch.softmax(outputs.logits, dim=1)
# Get probabilities
not_eou_prob = probs[0][0].item()
eou_prob = probs[0][1].item()
# Determine result
is_eou = eou_prob >= threshold
if is_eou:
result = f"โ
**END OF TURN** - Speaker has finished"
color = "green"
else:
result = f"โณ **CONTINUE** - Speaker is still talking"
color = "orange"
# Return results
return result, eou_prob, not_eou_prob
# ============================================================================
# GRADIO INTERFACE
# ============================================================================
def create_demo():
"""Create Gradio interface"""
with gr.Blocks(title="Arabic EOU Detection", theme=gr.themes.Soft()) as demo:
# Header
gr.Markdown("""
# ๐ค Arabic End-of-Utterance Detection
This model detects whether a speaker has finished their turn in Arabic conversations.
Fine-tuned AraBERT model on Saudi dialect conversations.
**Use Case**: Real-time voice agents, conversation systems, live transcription
""")
with gr.Row():
with gr.Column(scale=2):
# Input
text_input = gr.Textbox(
label="Enter Arabic Text",
placeholder="ู
ุซุงู: ุงูุณูุงู
ุนูููู
ููู ุญุงูู",
lines=3,
rtl=True # Right-to-left for Arabic
)
threshold_slider = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.5,
step=0.05,
label="Detection Threshold",
info="Lower = more sensitive, Higher = less sensitive"
)
submit_btn = gr.Button("๐ Analyze", variant="primary")
with gr.Column(scale=1):
# Output
result_output = gr.Markdown(label="Prediction")
with gr.Row():
eou_prob = gr.Number(label="EOU Probability", precision=3)
not_eou_prob = gr.Number(label="Not-EOU Probability", precision=3)
# Examples
gr.Markdown("### ๐ Try These Examples:")
gr.Examples(
examples=[
["ุงูุณูุงู
ุนูููู
ููู ุญุงูู", 0.5],
["ุฃูุง ุฑุงูุญ", 0.5],
["ุดูุฑุงู ูู ูุงููู", 0.5],
["ูุนูู ู
ุซูุงู", 0.5],
["ุชู
ุงู
ููู
ุช ุนููู", 0.5],
["ุฃุจู ุฃููู ูู", 0.5],
["ูุงููู ู
ุง ุฃุฏุฑู ููู", 0.5],
["ุฎูุงุต ุงูุชูููุง ู
ู ุงูู
ูุถูุน", 0.5],
],
inputs=[text_input, threshold_slider],
outputs=[result_output, eou_prob, not_eou_prob],
fn=predict_eou,
cache_examples=False,
)
# Model Info
with gr.Accordion("โน๏ธ Model Information", open=False):
gr.Markdown(f"""
### Model Details
- **Base Model**: aubmindlab/bert-base-arabertv2
- **Fine-tuned on**: Saudi Arabic dialect conversations
- **Accuracy**: 62%
- **F1 Score**: 0.62 (balanced)
- **Latency**: ~45ms average
### How It Works
1. The model analyzes Arabic text
2. Predicts probability of turn completion
3. If probability > threshold โ Turn ends
4. Used in real-time voice agents for natural conversations
### Classes
- **EOU (End-of-Utterance)**: Speaker has finished their turn
- **Not-EOU**: Speaker is continuing, more words expected
### Links
- ๐ค [Model on Hugging Face]({MODEL_NAME})
- ๐ [Dataset](your-dataset-link)
- ๐ป [GitHub Repository](your-github-link)
""")
# Connect interface
submit_btn.click(
fn=predict_eou,
inputs=[text_input, threshold_slider],
outputs=[result_output, eou_prob, not_eou_prob]
)
text_input.submit(
fn=predict_eou,
inputs=[text_input, threshold_slider],
outputs=[result_output, eou_prob, not_eou_prob]
)
return demo
# ============================================================================
# LAUNCH
# ============================================================================
if __name__ == "__main__":
demo = create_demo()
demo.launch()
|