alfulanny commited on
Commit
5ccf326
·
verified ·
1 Parent(s): 898eb3d

Rename smolagents_agent.py to browser_tools.py

Browse files
Files changed (2) hide show
  1. browser_tools.py +58 -0
  2. smolagents_agent.py +0 -258
browser_tools.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple browser tool to fetch and extract textual content from a webpage.
3
+
4
+ Provides `visit_page(url)` which returns a short cleaned text excerpt.
5
+ """
6
+ import requests
7
+ import logging
8
+ from typing import Optional
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ try:
13
+ from bs4 import BeautifulSoup
14
+ except Exception:
15
+ BeautifulSoup = None
16
+
17
+ try:
18
+ from smolagents import tool
19
+ except Exception:
20
+ tool = None
21
+
22
+
23
+ def visit_page(url: str, max_chars: int = 2000) -> str:
24
+ if not url:
25
+ return "(no url provided)"
26
+ try:
27
+ resp = requests.get(url, timeout=10)
28
+ resp.raise_for_status()
29
+ text = resp.text
30
+ if BeautifulSoup is not None:
31
+ soup = BeautifulSoup(text, "html.parser")
32
+ # remove scripts and styles
33
+ for s in soup(["script", "style", "noscript"]):
34
+ s.extract()
35
+ body = soup.get_text(separator=" \n")
36
+ lines = [l.strip() for l in body.splitlines() if l.strip()]
37
+ excerpt = " \n".join(lines)[:max_chars]
38
+ return excerpt
39
+ # fallback: return first chunk of raw HTML (not ideal)
40
+ return text[:max_chars]
41
+ except Exception as e:
42
+ logger.warning("visit_page failed for %s: %s", url, e)
43
+ return f"(visit_page error) {e}"
44
+
45
+
46
+ # If smolagents is available, expose a decorated tool usable by CodeAgent
47
+ if tool is not None:
48
+ try:
49
+ @tool
50
+ def visit_page_tool(url: str, max_chars: int = 2000) -> str:
51
+ """smolagents-wrapped tool around `visit_page`"""
52
+ return visit_page(url, max_chars)
53
+ except Exception:
54
+ visit_page_tool = visit_page
55
+ else:
56
+ visit_page_tool = visit_page
57
+
58
+ __all__ = ["visit_page", "visit_page_tool"]
smolagents_agent.py DELETED
@@ -1,258 +0,0 @@
1
- import os
2
- import re
3
- from typing import Optional
4
- from dotenv import load_dotenv
5
- from smolagents import InferenceClientModel
6
-
7
-
8
- # Load environment variables
9
- load_dotenv()
10
-
11
- # -------------------------
12
- # Direct Tool-Based Agent (No Code Generation)
13
- # -------------------------
14
-
15
- class OptimizedSmolagentsGAIAgent:
16
- """
17
- A direct agent that uses tools without code generation.
18
- This avoids all the syntax and runtime errors from generated code.
19
- """
20
-
21
- def __init__(self):
22
- # Initialize model
23
- self.model = self._initialize_model()
24
-
25
- # Available tools
26
- self.tools = {
27
- 'calculator': self._safe_calculate,
28
- 'web_search': self._safe_web_search,
29
- 'wikipedia': self._safe_wikipedia_search,
30
- 'visit_webpage': self._safe_visit_webpage,
31
- 'image_analysis': self._safe_image_analysis
32
- }
33
-
34
- def _initialize_model(self):
35
- """Initialize model with multiple fallbacks"""
36
- hf_token = os.getenv("HF_TOKEN")
37
- if not hf_token:
38
- print("HF_TOKEN not found. Using fallback mode.")
39
- return None
40
-
41
- # Try multiple models for reliability
42
- model_options = [
43
- "allenai/Olmo-3-7B-Instruct",
44
- "allenai/Olmo-3-7B-Think"
45
- ]
46
-
47
- for model in model_options:
48
- try:
49
- model = InferenceClientModel(
50
- model_id=model,
51
- token=hf_token,
52
- timeout=30
53
- )
54
- print(f"Using model: {model}")
55
- return model
56
- except Exception as e:
57
- print(f"Failed to initialize {model}: {e}")
58
- continue
59
-
60
- return None
61
-
62
- def _classify_question(self, question: str) -> str:
63
- """Classify question type for appropriate tool selection"""
64
- q_lower = question.lower()
65
-
66
- # Mathematical questions
67
- if any(word in q_lower for word in ['calculate', 'compute', 'solve', '+', '-', '*', '/', '=']):
68
- return 'math'
69
-
70
- # Web search questions
71
- elif any(word in q_lower for word in ['search', 'find', 'recent', 'current', 'today']):
72
- return 'search'
73
-
74
- # Factual/Wikipedia questions
75
- elif any(word in q_lower for word in ['who is', 'what is', 'when', 'where', 'history', 'biography']):
76
- return 'wikipedia'
77
-
78
- # Webpage questions
79
- elif 'http' in question or 'www.' in question:
80
- return 'webpage'
81
-
82
- # Image questions
83
- elif any(word in q_lower for word in ['image', 'picture', 'photo', 'visual', 'chess']):
84
- return 'image'
85
-
86
- # Default to search for general questions
87
- else:
88
- return 'search'
89
-
90
- def _safe_calculate(self, question: str) -> str:
91
- """Safe calculation using basic math"""
92
- try:
93
- # Extract math expressions
94
- math_pattern = r'[\d\+\-\*\/\.\(\)\s]+'
95
- match = re.search(math_pattern, question)
96
- if match:
97
- expr = match.group().strip()
98
- # Only allow basic arithmetic
99
- if re.match(r'^[\d\+\-\*\/\.\(\)\s]+$', expr):
100
- result = eval(expr)
101
- return f"The calculation result is: {result}"
102
- return "No clear mathematical expression found in the question."
103
- except Exception as e:
104
- return f"Unable to calculate: {str(e)}"
105
-
106
- def _safe_web_search(self, question: str) -> str:
107
- """Safe web search with error handling"""
108
- try:
109
- from smolagents import DuckDuckGoSearchTool
110
- search_tool = DuckDuckGoSearchTool()
111
-
112
- # Clean the query for search
113
- query = re.sub(r'[^a-zA-Z0-9\s]', '', question)
114
- if len(query) > 100:
115
- query = query[:100]
116
-
117
- result = search_tool.forward(query)
118
- if isinstance(result, str):
119
- if len(result) > 300:
120
- result = result[:300] + "..."
121
- return f"Search results for '{query}': {result}"
122
- return "Search completed successfully."
123
- except Exception as e:
124
- return f"Web search error: {str(e)}"
125
-
126
- def _safe_wikipedia_search(self, question: str) -> str:
127
- """Safe Wikipedia search with error handling"""
128
- try:
129
- import wikipedia
130
-
131
- # Extract search terms
132
- if 'who is' in question.lower():
133
- query = question.lower().replace('who is', '').strip()
134
- elif 'what is' in question.lower():
135
- query = question.lower().replace('what is', '').strip()
136
- else:
137
- query = question.strip()
138
-
139
- if query:
140
- summary = wikipedia.summary(query, sentences=2)
141
- if len(summary) > 200:
142
- summary = summary[:200] + "..."
143
- return f"Information about '{query}': {summary}"
144
- return "Unable to extract search terms from question."
145
- except Exception as e:
146
- return f"Wikipedia search error: {str(e)}"
147
-
148
- def _safe_visit_webpage(self, question: str) -> str:
149
- """Safe webpage visiting with error handling"""
150
- try:
151
- from smolagents import VisitWebpageTool
152
- visit_tool = VisitWebpageTool()
153
-
154
- # Extract URL from question
155
- url_pattern = r'https?://[^\s\)]+'
156
- urls = re.findall(url_pattern, question)
157
-
158
- if urls:
159
- url = urls[0]
160
- result = visit_tool.forward(url)
161
- if isinstance(result, str):
162
- if len(result) > 200:
163
- result = result[:200] + "..."
164
- return f"Content from {url}: {result}"
165
- return f"Successfully visited {url}"
166
- return "No URL found in the question."
167
- except Exception as e:
168
- return f"Webpage visit error: {str(e)}"
169
-
170
- def _safe_image_analysis(self, question: str) -> str:
171
- """Safe image analysis with error handling"""
172
- try:
173
- # For chess questions
174
- if 'chess' in question.lower():
175
- return "Chess position analysis: This appears to be a chess-related question. Black's turn means black pieces need to make the next move. Without the actual board image, I cannot provide the specific move, but typical strategic considerations include developing pieces, controlling center, or_castling."
176
-
177
- # For general image questions
178
- elif any(word in question.lower() for word in ['image', 'picture', 'photo']):
179
- return "Image analysis: The question references image content that I cannot directly access. For visual analysis tasks, please describe what you can see in the image or provide specific details about the visual elements."
180
-
181
- else:
182
- return "Image processing: Unable to analyze image content directly. Please provide more details about what visual information you need."
183
- except Exception as e:
184
- return f"Image analysis error: {str(e)}"
185
-
186
- def _generate_direct_answer(self, question: str, question_type: str) -> str:
187
- """Generate direct answers without code generation"""
188
-
189
- if question_type == 'math':
190
- return self._safe_calculate(question)
191
-
192
- elif question_type == 'search':
193
- return self._safe_web_search(question)
194
-
195
- elif question_type == 'wikipedia':
196
- return self._safe_wikipedia_search(question)
197
-
198
- elif question_type == 'webpage':
199
- return self._safe_visit_webpage(question)
200
-
201
- elif question_type == 'image':
202
- return self._safe_image_analysis(question)
203
-
204
- else:
205
- # Default fallback
206
- return self._safe_web_search(question)
207
-
208
- def process_question(self, question: str) -> str:
209
- """Process question using direct tool approach (no code generation)"""
210
-
211
- # Handle no model case
212
- if not self.model:
213
- return "No language model available. Please set HF_TOKEN in environment variables."
214
-
215
- try:
216
- # Classify question type
217
- question_type = self._classify_question(question)
218
-
219
- # Generate direct answer using appropriate tool
220
- answer = self._generate_direct_answer(question, question_type)
221
-
222
- return answer
223
-
224
- except Exception as e:
225
- error_msg = str(e)
226
-
227
- # Specific error handling
228
- if "timeout" in error_msg.lower():
229
- return "Request timed out. The question may be too complex. Please try a simpler question."
230
-
231
- elif "500" in error_msg:
232
- return "Server error occurred. This may be a temporary issue. Please try again later."
233
-
234
- else:
235
- return f"Unable to process question: {error_msg[:200]}"
236
-
237
- # -------------------------
238
- # Test the direct tool agent
239
- # -------------------------
240
-
241
- if __name__ == "__main__":
242
- agent = OptimizedSmolagentsGAIAgent()
243
-
244
- test_questions = [
245
- "What is the capital of France?",
246
- "Calculate 15 + 27 * 3",
247
- "Who is Mercedes Sosa?",
248
- "Review the chess position in the image",
249
- "What does this webpage say: https://example.com"
250
- ]
251
-
252
- print("=== DIRECT TOOL AGENT TEST ===\n")
253
-
254
- for question in test_questions:
255
- print(f"Q: {question}")
256
- answer = agent.process_question(question)
257
- print(f"A: {answer[:200]}...")
258
- print("-" * 50)