| import gradio as gr |
| from pipeline.utils import extract_audio |
| from pipeline.asr_diarization import transcribe_and_align |
| from pipeline.translation import translate_text |
| from pipeline.voice_synth import synthesize_speech |
| from pipeline.mixing import mix_speech_with_background |
| from pipeline.mux import mux_audio_video |
|
|
| def full_pipeline(video_path: str, target_language="hi"): |
| |
| audio_path = extract_audio(video_path, out_audio="audio.wav") |
| _, aligned = transcribe_and_align(audio_path, device="cpu") |
|
|
| speech_segments = [] |
| for seg in aligned["segments"]: |
| src_text = seg["text"] |
| translated = translate_text(src_text) |
| synth_path = synthesize_speech(translated, speaker_id="speaker1") |
| start_ms = int(seg["start"] * 1000) |
| speech_segments.append({"file": synth_path, "start_ms": start_ms}) |
|
|
| final_audio = mix_speech_with_background(audio_path, speech_segments) |
| output_video = mux_audio_video(video_path, final_audio) |
| return output_video |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("## AI Dubbing Prototype (translation + placeholder voice)") |
| video_in = gr.Video(label="Upload short video clip") |
| lang = gr.Textbox(value="hi", label="Target language (currently fixed)") |
| out_video = gr.Video(label="Dubbed output") |
| btn = gr.Button("Run Dubbing Pipeline") |
| btn.click(fn=full_pipeline, inputs=[video_in, lang], outputs=[out_video]) |
| demo.launch() |
|
|