Spaces:
Running
on
Zero
Running
on
Zero
Fix
Browse files
app.py
CHANGED
|
@@ -31,7 +31,7 @@ def transcribe(audio_path: str) -> str:
|
|
| 31 |
"""
|
| 32 |
if not audio_path:
|
| 33 |
return ""
|
| 34 |
-
|
| 35 |
data, _ = librosa.load(audio_path, sr=SAMPLE_RATE)
|
| 36 |
if len(data) > MAX_SAMPLE_SIZE:
|
| 37 |
data = data[:MAX_SAMPLE_SIZE]
|
|
@@ -52,7 +52,7 @@ custom_theme = gr.themes.Soft(
|
|
| 52 |
font=gr.themes.GoogleFont("Inter"),
|
| 53 |
text_size="lg",
|
| 54 |
spacing_size="md",
|
| 55 |
-
radius_size="lg"
|
| 56 |
).set(
|
| 57 |
button_primary_background_fill="*primary_600",
|
| 58 |
button_primary_background_fill_hover="*primary_700",
|
|
@@ -197,32 +197,24 @@ with gr.Blocks(fill_height=False) as demo:
|
|
| 197 |
<p class="header-subtitle">Advanced English Audio Transcription powered by AI</p>
|
| 198 |
</div>
|
| 199 |
""")
|
| 200 |
-
|
| 201 |
# Info banner
|
| 202 |
gr.HTML(f"""
|
| 203 |
<div class="info-banner">
|
| 204 |
βΉοΈ Upload or record audio in English (max {MAX_DURATION} seconds). Supports WAV, MP3, and other common formats.
|
| 205 |
</div>
|
| 206 |
""")
|
| 207 |
-
|
| 208 |
# Main content
|
| 209 |
with gr.Group(elem_classes="main-card"):
|
| 210 |
# Audio input
|
| 211 |
audio = gr.Audio(
|
| 212 |
-
label="π΅ Audio Input",
|
| 213 |
-
type="filepath",
|
| 214 |
-
sources=["upload", "microphone"],
|
| 215 |
-
elem_classes="audio-container"
|
| 216 |
)
|
| 217 |
-
|
| 218 |
# Transcribe button
|
| 219 |
-
transcribe_btn = gr.Button(
|
| 220 |
-
|
| 221 |
-
variant="primary",
|
| 222 |
-
size="lg",
|
| 223 |
-
elem_classes="primary-button"
|
| 224 |
-
)
|
| 225 |
-
|
| 226 |
# Output
|
| 227 |
output = gr.Textbox(
|
| 228 |
label="π Transcription",
|
|
@@ -230,9 +222,9 @@ with gr.Blocks(fill_height=False) as demo:
|
|
| 230 |
lines=6,
|
| 231 |
max_lines=12,
|
| 232 |
show_copy_button=True,
|
| 233 |
-
elem_classes="transcription-output"
|
| 234 |
)
|
| 235 |
-
|
| 236 |
# Examples section
|
| 237 |
with gr.Group(elem_classes="examples-container"):
|
| 238 |
gr.Markdown("### π‘ Try These Examples")
|
|
@@ -244,7 +236,7 @@ with gr.Blocks(fill_height=False) as demo:
|
|
| 244 |
cache_examples=True,
|
| 245 |
examples_per_page=5,
|
| 246 |
)
|
| 247 |
-
|
| 248 |
# Footer
|
| 249 |
gr.HTML("""
|
| 250 |
<div class="footer-container">
|
|
@@ -254,22 +246,12 @@ with gr.Blocks(fill_height=False) as demo:
|
|
| 254 |
</p>
|
| 255 |
</div>
|
| 256 |
""")
|
| 257 |
-
|
| 258 |
# Event handlers
|
| 259 |
-
transcribe_btn.click(
|
| 260 |
-
|
| 261 |
-
inputs=audio,
|
| 262 |
-
outputs=output,
|
| 263 |
-
api_name="transcribe"
|
| 264 |
-
)
|
| 265 |
-
|
| 266 |
# Auto-transcribe on audio upload (optional - remove if you prefer manual triggering)
|
| 267 |
-
audio.change(
|
| 268 |
-
fn=transcribe,
|
| 269 |
-
inputs=audio,
|
| 270 |
-
outputs=output,
|
| 271 |
-
show_progress="minimal"
|
| 272 |
-
)
|
| 273 |
|
| 274 |
|
| 275 |
if __name__ == "__main__":
|
|
@@ -278,44 +260,7 @@ if __name__ == "__main__":
|
|
| 278 |
css=custom_css,
|
| 279 |
footer_links=[
|
| 280 |
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
|
| 281 |
-
{"label": "Model", "url": "https://huggingface.co/kyutai/stt-2.6b-en-trfs"}
|
| 282 |
],
|
| 283 |
-
mcp_server=True
|
| 284 |
)
|
| 285 |
-
|
| 286 |
-
---
|
| 287 |
-
|
| 288 |
-
## β¨ Key Improvements
|
| 289 |
-
|
| 290 |
-
### π¨ Modern Design
|
| 291 |
-
- **Gradient header** with clean typography
|
| 292 |
-
- **Card-based layout** for better visual hierarchy
|
| 293 |
-
- **Soft theme** with custom blue/purple gradient accent colors
|
| 294 |
-
- **Professional spacing** and rounded corners
|
| 295 |
-
|
| 296 |
-
### π± Mobile-First
|
| 297 |
-
- **Responsive design** that adapts to all screen sizes
|
| 298 |
-
- **Optimized padding** and font sizes for mobile
|
| 299 |
-
- **Touch-friendly** button sizes
|
| 300 |
-
- **Max-width container** for better readability on large screens
|
| 301 |
-
|
| 302 |
-
### π UX Enhancements
|
| 303 |
-
- **Info banner** explaining max duration and supported formats
|
| 304 |
-
- **Auto-transcribe** on audio upload (optional)
|
| 305 |
-
- **Copy button** on transcription output
|
| 306 |
-
- **Visual feedback** with hover effects on buttons
|
| 307 |
-
- **Clear visual hierarchy** with icons and labels
|
| 308 |
-
|
| 309 |
-
### π― Minimal & Clean
|
| 310 |
-
- **Removed unnecessary Row/Column** nesting
|
| 311 |
-
- **Streamlined components** - only what's needed
|
| 312 |
-
- **Better grouping** with semantic sections
|
| 313 |
-
- **Professional footer** with proper attribution
|
| 314 |
-
|
| 315 |
-
### π§ Technical
|
| 316 |
-
- **Gradio 6 compliant** - all parameters in correct locations
|
| 317 |
-
- **Custom theme** with Soft base and gradient accents
|
| 318 |
-
- **Mobile-optimized CSS** with media queries
|
| 319 |
-
- **Proper error handling** with empty audio check
|
| 320 |
-
|
| 321 |
-
The redesign maintains all functionality while providing a modern, professional, mobile-friendly interface! π
|
|
|
|
| 31 |
"""
|
| 32 |
if not audio_path:
|
| 33 |
return ""
|
| 34 |
+
|
| 35 |
data, _ = librosa.load(audio_path, sr=SAMPLE_RATE)
|
| 36 |
if len(data) > MAX_SAMPLE_SIZE:
|
| 37 |
data = data[:MAX_SAMPLE_SIZE]
|
|
|
|
| 52 |
font=gr.themes.GoogleFont("Inter"),
|
| 53 |
text_size="lg",
|
| 54 |
spacing_size="md",
|
| 55 |
+
radius_size="lg",
|
| 56 |
).set(
|
| 57 |
button_primary_background_fill="*primary_600",
|
| 58 |
button_primary_background_fill_hover="*primary_700",
|
|
|
|
| 197 |
<p class="header-subtitle">Advanced English Audio Transcription powered by AI</p>
|
| 198 |
</div>
|
| 199 |
""")
|
| 200 |
+
|
| 201 |
# Info banner
|
| 202 |
gr.HTML(f"""
|
| 203 |
<div class="info-banner">
|
| 204 |
βΉοΈ Upload or record audio in English (max {MAX_DURATION} seconds). Supports WAV, MP3, and other common formats.
|
| 205 |
</div>
|
| 206 |
""")
|
| 207 |
+
|
| 208 |
# Main content
|
| 209 |
with gr.Group(elem_classes="main-card"):
|
| 210 |
# Audio input
|
| 211 |
audio = gr.Audio(
|
| 212 |
+
label="π΅ Audio Input", type="filepath", sources=["upload", "microphone"], elem_classes="audio-container"
|
|
|
|
|
|
|
|
|
|
| 213 |
)
|
| 214 |
+
|
| 215 |
# Transcribe button
|
| 216 |
+
transcribe_btn = gr.Button("β¨ Transcribe Audio", variant="primary", size="lg", elem_classes="primary-button")
|
| 217 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
# Output
|
| 219 |
output = gr.Textbox(
|
| 220 |
label="π Transcription",
|
|
|
|
| 222 |
lines=6,
|
| 223 |
max_lines=12,
|
| 224 |
show_copy_button=True,
|
| 225 |
+
elem_classes="transcription-output",
|
| 226 |
)
|
| 227 |
+
|
| 228 |
# Examples section
|
| 229 |
with gr.Group(elem_classes="examples-container"):
|
| 230 |
gr.Markdown("### π‘ Try These Examples")
|
|
|
|
| 236 |
cache_examples=True,
|
| 237 |
examples_per_page=5,
|
| 238 |
)
|
| 239 |
+
|
| 240 |
# Footer
|
| 241 |
gr.HTML("""
|
| 242 |
<div class="footer-container">
|
|
|
|
| 246 |
</p>
|
| 247 |
</div>
|
| 248 |
""")
|
| 249 |
+
|
| 250 |
# Event handlers
|
| 251 |
+
transcribe_btn.click(fn=transcribe, inputs=audio, outputs=output, api_name="transcribe")
|
| 252 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
# Auto-transcribe on audio upload (optional - remove if you prefer manual triggering)
|
| 254 |
+
audio.change(fn=transcribe, inputs=audio, outputs=output, show_progress="minimal")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
|
| 257 |
if __name__ == "__main__":
|
|
|
|
| 260 |
css=custom_css,
|
| 261 |
footer_links=[
|
| 262 |
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
|
| 263 |
+
{"label": "Model", "url": "https://huggingface.co/kyutai/stt-2.6b-en-trfs"},
|
| 264 |
],
|
| 265 |
+
mcp_server=True,
|
| 266 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|