Spaces:

alfulanny
/

huggingface_ai_final

Running

huggingface_ai_final / browser_tools.py

Rename smolagents_agent.py to browser_tools.py

5ccf326 verified 8 days ago

1.71 kB

	"""
	Simple browser tool to fetch and extract textual content from a webpage.

	Provides `visit_page(url)` which returns a short cleaned text excerpt.
	"""
	import requests
	import logging
	from typing import Optional

	logger = logging.getLogger(__name__)

	try:
	from bs4 import BeautifulSoup
	except Exception:
	BeautifulSoup = None

	try:
	from smolagents import tool
	except Exception:
	tool = None


	def visit_page(url: str, max_chars: int = 2000) -> str:
	if not url:
	return "(no url provided)"
	try:
	resp = requests.get(url, timeout=10)
	resp.raise_for_status()
	text = resp.text
	if BeautifulSoup is not None:
	soup = BeautifulSoup(text, "html.parser")
	# remove scripts and styles
	for s in soup(["script", "style", "noscript"]):
	s.extract()
	body = soup.get_text(separator=" \n")
	lines = [l.strip() for l in body.splitlines() if l.strip()]
	excerpt = " \n".join(lines)[:max_chars]
	return excerpt
	# fallback: return first chunk of raw HTML (not ideal)
	return text[:max_chars]
	except Exception as e:
	logger.warning("visit_page failed for %s: %s", url, e)
	return f"(visit_page error) {e}"


	# If smolagents is available, expose a decorated tool usable by CodeAgent
	if tool is not None:
	try:
	@tool
	def visit_page_tool(url: str, max_chars: int = 2000) -> str:
	"""smolagents-wrapped tool around `visit_page`"""
	return visit_page(url, max_chars)
	except Exception:
	visit_page_tool = visit_page
	else:
	visit_page_tool = visit_page

	__all__ = ["visit_page", "visit_page_tool"]