Cheeky Sparrow commited on
Commit
823e4d7
·
1 Parent(s): cc9a12f
Files changed (2) hide show
  1. app.py +0 -110
  2. requirements.txt +0 -1
app.py CHANGED
@@ -6,12 +6,6 @@ from typing import Literal, Optional
6
 
7
  import gradio as gr
8
  import torch
9
- import numpy as np
10
- import matplotlib.pyplot as plt
11
- import librosa
12
- import soundfile as sf
13
- from matplotlib.figure import Figure
14
- from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
15
 
16
  from NatureLM.config import Config
17
  from NatureLM.models.NatureLM import NatureLM
@@ -19,71 +13,6 @@ from NatureLM.utils import generate_sample_batches, prepare_sample_waveforms
19
  import spaces
20
 
21
 
22
- def create_spectrogram(audio_path: str, sr: int = None, n_fft: int = 2048, hop_length: int = 512) -> str:
23
- """
24
- Create a spectrogram visualization from an audio file.
25
-
26
- Args:
27
- audio_path: Path to the audio file
28
- sr: Sample rate (if None, uses original sample rate)
29
- n_fft: FFT window size
30
- hop_length: Hop length for STFT
31
-
32
- Returns:
33
- Path to the saved spectrogram image
34
- """
35
- try:
36
- # Load audio
37
- audio, orig_sr = sf.read(audio_path)
38
- if len(audio.shape) == 2: # stereo to mono
39
- audio = audio.mean(axis=1)
40
-
41
- # Use original sample rate if not specified
42
- if sr is None:
43
- sr = orig_sr
44
-
45
- # Create spectrogram using original sample rate
46
- D = librosa.stft(audio, n_fft=n_fft, hop_length=hop_length, sr=orig_sr)
47
- S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
48
-
49
- # Create figure
50
- fig, ax = plt.subplots(figsize=(10, 6))
51
- img = librosa.display.specshow(S_db, sr=orig_sr, hop_length=hop_length, x_axis='time', y_axis='hz', ax=ax)
52
- ax.set_title('Spectrogram')
53
- fig.colorbar(img, ax=ax, format='%+2.0f dB')
54
-
55
- # Save to temporary file
56
- temp_path = tempfile.mktemp(suffix='.png')
57
- fig.savefig(temp_path, dpi=150, bbox_inches='tight')
58
- plt.close(fig)
59
-
60
- return temp_path
61
- except Exception as e:
62
- print(f"Error creating spectrogram: {e}")
63
- return None
64
-
65
-
66
- def audio_visualization_component(audio_path: str, view_type: str = "waveform") -> str:
67
- """
68
- Create an audio visualization component that can show either waveform or spectrogram.
69
-
70
- Args:
71
- audio_path: Path to the audio file
72
- view_type: Either "waveform" or "spectrogram"
73
-
74
- Returns:
75
- Path to the visualization image
76
- """
77
- if not audio_path:
78
- return None
79
-
80
- if view_type == "spectrogram":
81
- return create_spectrogram(audio_path)
82
- else:
83
- # For waveform, we'll use the default Gradio audio component
84
- return audio_path
85
-
86
-
87
  class ModelManager:
88
  """Manages model loading and state"""
89
 
@@ -256,18 +185,6 @@ def _chat_tab(examples):
256
  visible=True
257
  )
258
 
259
- # Add visualization options for chat
260
- with gr.Row():
261
- view_selector = gr.Radio(
262
- choices=["Waveform", "Spectrogram"],
263
- value="Waveform",
264
- label="Audio Visualization Type",
265
- interactive=True
266
- )
267
-
268
- # Add visualization display
269
- visualization_display = gr.Image(label="Audio Visualization", visible=True)
270
-
271
  chatbot = gr.Chatbot(
272
  label="Model inputs",
273
  elem_id="chatbot",
@@ -281,33 +198,6 @@ def _chat_tab(examples):
281
  send_all = gr.Button("Send all", elem_id="send-all")
282
  clear_button = gr.ClearButton(components=[chatbot, chat_input], visible=False)
283
 
284
- # Function to update audio visualization in chat
285
- def update_chat_visualization(message, view_type):
286
- if not message or not message.get("files"):
287
- return None
288
-
289
- # Get the first audio file from the message
290
- audio_files = message.get("files", [])
291
- if not audio_files:
292
- return None
293
-
294
- audio_path = audio_files[0]
295
- return audio_visualization_component(audio_path, view_type.lower())
296
-
297
- # Connect visualization update for chat
298
- view_selector.change(
299
- update_chat_visualization,
300
- [chat_input, view_selector],
301
- [visualization_display]
302
- )
303
-
304
- # Also update when audio is uploaded
305
- chat_input.change(
306
- update_chat_visualization,
307
- [chat_input, view_selector],
308
- [visualization_display]
309
- )
310
-
311
  chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
312
  bot_msg = send_all.click(
313
  bot_response,
 
6
 
7
  import gradio as gr
8
  import torch
 
 
 
 
 
 
9
 
10
  from NatureLM.config import Config
11
  from NatureLM.models.NatureLM import NatureLM
 
13
  import spaces
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class ModelManager:
17
  """Manages model loading and state"""
18
 
 
185
  visible=True
186
  )
187
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  chatbot = gr.Chatbot(
189
  label="Model inputs",
190
  elem_id="chatbot",
 
198
  send_all = gr.Button("Send all", elem_id="send-all")
199
  clear_button = gr.ClearButton(components=[chatbot, chat_input], visible=False)
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
202
  bot_msg = send_all.click(
203
  bot_response,
requirements.txt CHANGED
@@ -9,7 +9,6 @@ gradio>=5.10.0
9
  google-cloud-aiplatform>=1.76.0
10
  Levenshtein>=0.25.1
11
  librosa>=0.9.2
12
- matplotlib>=3.9.0
13
  memoization>=0.4.0
14
  mir-eval>=0.7
15
  numpy>=1.26.4
 
9
  google-cloud-aiplatform>=1.76.0
10
  Levenshtein>=0.25.1
11
  librosa>=0.9.2
 
12
  memoization>=0.4.0
13
  mir-eval>=0.7
14
  numpy>=1.26.4