Spaces:
Running
Running
| """ | |
| Simple browser tool to fetch and extract textual content from a webpage. | |
| Provides `visit_page(url)` which returns a short cleaned text excerpt. | |
| """ | |
| import requests | |
| import logging | |
| from typing import Optional | |
| logger = logging.getLogger(__name__) | |
| try: | |
| from bs4 import BeautifulSoup | |
| except Exception: | |
| BeautifulSoup = None | |
| try: | |
| from smolagents import tool | |
| except Exception: | |
| tool = None | |
| def visit_page(url: str, max_chars: int = 2000) -> str: | |
| if not url: | |
| return "(no url provided)" | |
| try: | |
| resp = requests.get(url, timeout=10) | |
| resp.raise_for_status() | |
| text = resp.text | |
| if BeautifulSoup is not None: | |
| soup = BeautifulSoup(text, "html.parser") | |
| # remove scripts and styles | |
| for s in soup(["script", "style", "noscript"]): | |
| s.extract() | |
| body = soup.get_text(separator=" \n") | |
| lines = [l.strip() for l in body.splitlines() if l.strip()] | |
| excerpt = " \n".join(lines)[:max_chars] | |
| return excerpt | |
| # fallback: return first chunk of raw HTML (not ideal) | |
| return text[:max_chars] | |
| except Exception as e: | |
| logger.warning("visit_page failed for %s: %s", url, e) | |
| return f"(visit_page error) {e}" | |
| # If smolagents is available, expose a decorated tool usable by CodeAgent | |
| if tool is not None: | |
| try: | |
| def visit_page_tool(url: str, max_chars: int = 2000) -> str: | |
| """smolagents-wrapped tool around `visit_page`""" | |
| return visit_page(url, max_chars) | |
| except Exception: | |
| visit_page_tool = visit_page | |
| else: | |
| visit_page_tool = visit_page | |
| __all__ = ["visit_page", "visit_page_tool"] | |