File size: 2,318 Bytes
15a3001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34a788d
 
 
 
15a3001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from smolagents import tool
import tempfile
import os

@tool
def youtube_to_text(url: str) -> str:
    """
    Transcribe a YouTube video.
    First tries to retrieve official captions.
    Falls back to Whisper transcription if captions are unavailable.

    Args:
        url: Full YouTube video URL

    Returns:
        Transcribed text
    """
    # ---- Step 1: Try official YouTube transcripts ----
    try:
        from youtube_transcript_api import YouTubeTranscriptApi
        from urllib.parse import urlparse, parse_qs

        query = parse_qs(orlparse(url).query)
        video_id = query.get("v", [None])[0]

        if video_id:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            text = " ".join([chunk["text"] for chunk in transcript])
            return text
    except Exception:
        pass # Silent fallback to Whisper

    # ---- Step 2: Fallback to Whisper transcription ----
    try:
        import whisper
        from pytubefix import YouTube

        yt = YouTube(url)
        audio_stream = yt.streams.get_audio_only()

        temp_dir = tempfile.gettempdir()
        audio_path = audio_stream.download(output_path=temp_dir)

        model = whisper.load_model("base")
        result = model.transcribe(audio_path)

        return result["text"]

    except Exception as e:
        return f"Error transcribing YouTube video: {str(e)}"
    finally:
        # Cleanup temp file to save space
        if audio_path and os.path.exists(audio_path):
            os.remove(audio_path)

@tool
def transcribe_audio(file_path: str) -> str:
    """
    Transcribes audio files into text using the Whisper model. 
    Supports multiple formats including .mp3, .wav, .m4a, .flac, and .ogg.

    Args:
        file_path: The local path to the audio file to be transcribed.

    Returns:
        The transcribed text as a string.
    """
    try:
        import whisper

        model = whisper.load_model("base")
        result = model.transcribe(file_path)
        
        return result["text"]
    
    except ImportError:
        return (
            "Whisper is not installed. "
            "Install it with `pip install openai-whisper` and ensure ffmpeg is available."
        )
    except Exception as e:
        return f"Error transcribing audio file: {str(e)}"