| import gradio as gr | |
| import torch | |
| import scipy.io.wavfile as wavfile | |
| from transformers import AutoProcessor, SeamlessM4TModel | |
| tokenizer = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium") | |
| model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium") | |
| text = "some example text in the English language" | |
| def greet(text): | |
| inputs = tokenizer(text, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = model(**inputs).waveform | |
| out = output[0] | |
| wavfile.write("tmp.wav", rate=16000, data=out) | |
| return open("tmp.wav", "rb").read() | |
| iface = gr.Interface(fn=greet, inputs="text", outputs="audio") | |
| iface.launch() |