File size: 4,731 Bytes
f86436f
 
 
 
 
 
 
 
60a350c
 
 
 
 
 
 
 
f86436f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60a350c
 
 
 
 
 
 
 
 
 
 
 
 
f86436f
 
 
 
 
 
 
 
 
60a350c
 
 
 
 
f86436f
60a350c
 
 
 
 
f86436f
 
 
 
 
 
60a350c
 
 
 
 
f86436f
 
 
 
60a350c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
Lightweight web tools for the agent: DuckDuckGo instant answers and Wikipedia search.

These do not require API keys and are suitable for Level-1 GAIA tasks that need quick factual
retrieval. They are intentionally simple and return concise text.
"""
from typing import Optional
import requests
import logging

logger = logging.getLogger(__name__)

try:
    from smolagents import tool
except Exception:
    tool = None


def duckduckgo_search(query: str, region: str = "wt-wt") -> str:
    """Use DuckDuckGo Instant Answer API to fetch a brief summary for a query.

    Returns a short text summarizing the AbstractText and some related topics.
    """
    if not query:
        return "(no query provided)"
    try:
        params = {"q": query, "format": "json", "no_html": 1, "skip_disambig": 1}
        resp = requests.get("https://api.duckduckgo.com/", params=params, timeout=10)
        resp.raise_for_status()
        data = resp.json()
        parts = []
        if data.get("AbstractText"):
            parts.append(data.get("AbstractText"))
        # include first related topic text if present
        related = data.get("RelatedTopics") or []
        if related:
            # RelatedTopics may have nested structure
            first = related[0]
            if isinstance(first, dict) and first.get("Text"):
                parts.append(first.get("Text"))
        if parts:
            return " \n".join(parts)
        # fallback to heading
        return data.get("Heading") or "(no summary found)"
    except Exception as e:
        return f"(duckduckgo error) {e}"


# Export a smolagents-wrapped tool if available so the agent's interpreter
# can call `duckduckgo_search` directly when executing parsed code.
if tool is not None:
    try:
        @tool
        def duckduckgo_search_tool(query: str, region: str = "wt-wt") -> str:
            return duckduckgo_search(query, region)
    except Exception:
        duckduckgo_search_tool = duckduckgo_search
else:
    duckduckgo_search_tool = duckduckgo_search


def wikipedia_search(query: str) -> str:
    """Search Wikipedia for a query and return the page summary for the top hit.

    This function first searches for pages via the MediaWiki API, then fetches the summary
    for the top search result using the REST summary endpoint.
    """
    if not query:
        return "(no query provided)"
    try:
        # Use browser-like headers to reduce chance of 403/blocks
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0 Safari/537.36",
            "Accept": "application/json, text/javascript, */*; q=0.01",
        }
        search_params = {"action": "query", "list": "search", "srsearch": query, "format": "json", "srlimit": 1}
        r = requests.get("https://en.wikipedia.org/w/api.php", params=search_params, timeout=10, headers=headers)
        # If API returns 403 or other non-200, fallback to DuckDuckGo
        if r.status_code != 200:
            logger.warning("wikipedia_search API returned status %s, falling back to DuckDuckGo", r.status_code)
            return duckduckgo_search(query)
        r.raise_for_status()
        sr = r.json().get("query", {}).get("search", [])
        if not sr:
            return "(no wiki result)"
        title = sr[0].get("title")
        # fetch summary
        summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.requote_uri(title)}"
        summary_resp = requests.get(summary_url, timeout=10, headers=headers)
        if summary_resp.status_code != 200:
            logger.warning("wikipedia summary endpoint returned status %s for %s, falling back to DuckDuckGo", summary_resp.status_code, title)
            return duckduckgo_search(query)
        summary_resp.raise_for_status()
        summary = summary_resp.json()
        return summary.get("extract") or summary.get("description") or "(no summary)"
    except Exception as e:
        logger.warning("wikipedia_search failed: %s; falling back to DuckDuckGo", e)
        try:
            return duckduckgo_search(query)
        except Exception:
            return f"(wikipedia error) {e}"


# Export a smolagents-wrapped tool if available so the agent's interpreter
# can call `wikipedia_search` directly when executing parsed code.
if tool is not None:
    try:
        @tool
        def wikipedia_search_tool(query: str) -> str:
            return wikipedia_search(query)
    except Exception:
        wikipedia_search_tool = wikipedia_search
else:
    wikipedia_search_tool = wikipedia_search

__all__ = [
    "duckduckgo_search",
    "duckduckgo_search_tool",
    "wikipedia_search",
    "wikipedia_search_tool",
]