Spaces:
Build error
Build error
Upload folder using huggingface_hub
Browse files- Dockerfile +10 -5
- __init__.py +12 -0
- client.py +6 -0
- coding_env_client.py +60 -0
- models.py +40 -0
- openenv.yaml +8 -0
- server/Dockerfile +29 -0
- server/README.md +51 -0
- server/__init__.py +2 -0
- server/app.py +39 -0
- server/python_codeact_env.py +115 -0
- server/requirements.txt +1 -0
- server/transforms.py +94 -0
Dockerfile
CHANGED
|
@@ -10,16 +10,20 @@
|
|
| 10 |
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 11 |
FROM ghcr.io/meta-pytorch/openenv-base:latest
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# Install dependencies
|
| 14 |
-
|
| 15 |
RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
|
| 16 |
|
| 17 |
# Copy only what's needed for this environment
|
| 18 |
-
|
| 19 |
-
|
| 20 |
|
| 21 |
# Copy README for web interface documentation
|
| 22 |
-
|
| 23 |
|
| 24 |
# Health check
|
| 25 |
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
|
@@ -27,5 +31,6 @@ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
|
| 27 |
|
| 28 |
# Run the FastAPI server
|
| 29 |
ENV ENABLE_WEB_INTERFACE=true
|
|
|
|
| 30 |
|
| 31 |
-
CMD ["uvicorn", "
|
|
|
|
| 10 |
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 11 |
FROM ghcr.io/meta-pytorch/openenv-base:latest
|
| 12 |
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
|
| 15 |
+
COPY . /app
|
| 16 |
+
|
| 17 |
# Install dependencies
|
| 18 |
+
|
| 19 |
RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
|
| 20 |
|
| 21 |
# Copy only what's needed for this environment
|
| 22 |
+
|
| 23 |
+
|
| 24 |
|
| 25 |
# Copy README for web interface documentation
|
| 26 |
+
|
| 27 |
|
| 28 |
# Health check
|
| 29 |
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
|
|
|
| 31 |
|
| 32 |
# Run the FastAPI server
|
| 33 |
ENV ENABLE_WEB_INTERFACE=true
|
| 34 |
+
ENV PYTHONPATH=/app
|
| 35 |
|
| 36 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Coding Environment - A Python code execution environment."""
|
| 8 |
+
|
| 9 |
+
from .coding_env_client import CodingEnv
|
| 10 |
+
from .models import CodeAction, CodeObservation, CodeState
|
| 11 |
+
|
| 12 |
+
__all__ = ["CodeAction", "CodeObservation", "CodeState", "CodingEnv"]
|
client.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Client stub for coding_env environment."""
|
| 2 |
+
|
| 3 |
+
class coding_envEnv:
|
| 4 |
+
pass
|
| 5 |
+
|
| 6 |
+
|
coding_env_client.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CodingEnv
|
| 3 |
+
---------
|
| 4 |
+
Client-side wrapper for the Coding environment server.
|
| 5 |
+
Talks HTTP to a single base_url exposing: /reset and /step.
|
| 6 |
+
|
| 7 |
+
- users instantiate CodingEnv with a base_url provided by the higher-level
|
| 8 |
+
vector/orchestration layer.
|
| 9 |
+
- Environment authors ship the Docker image that serves the HTTP API.
|
| 10 |
+
|
| 11 |
+
(Seeds, episode IDs, request IDs, capabilities can be added later in the payloads.)
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
from typing import Optional, TYPE_CHECKING
|
| 17 |
+
|
| 18 |
+
from core.client_types import StepResult
|
| 19 |
+
|
| 20 |
+
from core.http_env_client import HTTPEnvClient
|
| 21 |
+
|
| 22 |
+
from .models import CodeAction, CodeObservation, CodeState
|
| 23 |
+
|
| 24 |
+
if TYPE_CHECKING:
|
| 25 |
+
from core.containers.runtime import ContainerProvider
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class CodingEnv(HTTPEnvClient[CodeAction, CodeObservation]):
|
| 29 |
+
# --- HTTPEnvClient abstract hooks ---
|
| 30 |
+
|
| 31 |
+
def _step_payload(self, action: CodeAction) -> dict:
|
| 32 |
+
# Shape expected by the server's /step endpoint under "action"
|
| 33 |
+
return {
|
| 34 |
+
"code": action.code,
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
def _parse_result(self, payload: dict) -> StepResult[CodeObservation]:
|
| 38 |
+
# Expecting: { "observation": {...}, "reward": <float|null>, "done": <bool>, "info": {...} }
|
| 39 |
+
obs = CodeObservation(**payload["observation"])
|
| 40 |
+
return StepResult(
|
| 41 |
+
observation=obs,
|
| 42 |
+
reward=payload.get("reward"),
|
| 43 |
+
done=bool(payload.get("done", False)),
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
def _parse_state(self, payload: dict) -> CodeState:
|
| 47 |
+
"""
|
| 48 |
+
Parse server response into CodeState object.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
payload: JSON response from /state endpoint
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
CodeState object with episode_id, step_count, and last_exit_code
|
| 55 |
+
"""
|
| 56 |
+
return CodeState(
|
| 57 |
+
episode_id=payload.get("episode_id"),
|
| 58 |
+
step_count=payload.get("step_count", 0),
|
| 59 |
+
last_exit_code=payload.get("last_exit_code", 0),
|
| 60 |
+
)
|
models.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
envs/coding_env/models.py
|
| 3 |
+
--------------------------------
|
| 4 |
+
Action/Observation types for the Coding environment.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
from typing import Any, Optional
|
| 11 |
+
|
| 12 |
+
from core.env_server import Action, Observation, State
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class CodeAction(Action):
|
| 17 |
+
"""
|
| 18 |
+
Represents a single code execution request.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
code: str
|
| 22 |
+
# Optional: future fields like 'lint': bool, 'timeout_s': float, etc.
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class CodeObservation(Observation):
|
| 27 |
+
"""
|
| 28 |
+
Result of executing code in the environment.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
stdout: str = ""
|
| 32 |
+
stderr: str = ""
|
| 33 |
+
exit_code: int = 0
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class CodeState(State):
|
| 38 |
+
"""State for CodeAct environment with persistent execution context."""
|
| 39 |
+
|
| 40 |
+
last_exit_code: int = 0
|
openenv.yaml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: coding_env
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: envs.coding_env.server.app:app
|
| 6 |
+
port: 8000
|
| 7 |
+
|
| 8 |
+
|
server/Dockerfile
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Use the standard openenv base image
|
| 8 |
+
# Built from: docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
|
| 9 |
+
# In GitHub Actions, this is overridden to use the GHCR base image
|
| 10 |
+
ARG BASE_IMAGE=openenv-base:latest
|
| 11 |
+
FROM ${BASE_IMAGE}
|
| 12 |
+
|
| 13 |
+
# Install dependencies
|
| 14 |
+
COPY src/envs/coding_env/server/requirements.txt /tmp/requirements.txt
|
| 15 |
+
RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
|
| 16 |
+
|
| 17 |
+
# Copy only what's needed for this environment
|
| 18 |
+
COPY src/core/ /app/src/core/
|
| 19 |
+
COPY src/envs/coding_env/ /app/src/envs/coding_env/
|
| 20 |
+
|
| 21 |
+
# Copy README for web interface documentation
|
| 22 |
+
COPY src/envs/coding_env/README.md /app/README.md
|
| 23 |
+
|
| 24 |
+
# Health check
|
| 25 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 26 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 27 |
+
|
| 28 |
+
# Run the FastAPI server
|
| 29 |
+
CMD ["uvicorn", "envs.coding_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
server/README.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CodingEnv HTTP Server
|
| 2 |
+
|
| 3 |
+
This directory contains the HTTP server implementation for the CodingEnvironment.
|
| 4 |
+
|
| 5 |
+
## Running Locally
|
| 6 |
+
|
| 7 |
+
### Prerequisites
|
| 8 |
+
```bash
|
| 9 |
+
pip install fastapi uvicorn
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
### Start the server
|
| 13 |
+
```bash
|
| 14 |
+
# From the project root (/Users/pankit/git/envtorch)
|
| 15 |
+
cd src
|
| 16 |
+
uvicorn envs.coding_env.server.app:app --reload --host 0.0.0.0 --port 8000
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
The server will be available at `http://localhost:8000`
|
| 20 |
+
|
| 21 |
+
### API Endpoints
|
| 22 |
+
|
| 23 |
+
- `POST /reset` - Reset the environment
|
| 24 |
+
- `POST /step` - Execute a code action
|
| 25 |
+
- `GET /state` - Get current environment state
|
| 26 |
+
- `GET /health` - Health check
|
| 27 |
+
|
| 28 |
+
### Test with curl
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
# Health check
|
| 32 |
+
curl http://localhost:8000/health
|
| 33 |
+
|
| 34 |
+
# Reset
|
| 35 |
+
curl -X POST http://localhost:8000/reset \
|
| 36 |
+
-H "Content-Type: application/json" \
|
| 37 |
+
-d '{}'
|
| 38 |
+
|
| 39 |
+
# Execute code
|
| 40 |
+
curl -X POST http://localhost:8000/step \
|
| 41 |
+
-H "Content-Type: application/json" \
|
| 42 |
+
-d '{
|
| 43 |
+
"action": {
|
| 44 |
+
"code": "print(\"Hello from HTTP!\")"
|
| 45 |
+
},
|
| 46 |
+
"timeout_s": 15
|
| 47 |
+
}'
|
| 48 |
+
|
| 49 |
+
# Get state
|
| 50 |
+
curl http://localhost:8000/state
|
| 51 |
+
```
|
server/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
server/app.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
FastAPI application for the Coding Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the PythonCodeActEnv
|
| 11 |
+
over HTTP endpoints, making it compatible with HTTPEnvClient.
|
| 12 |
+
|
| 13 |
+
Usage:
|
| 14 |
+
# Development (with auto-reload):
|
| 15 |
+
uvicorn envs.coding_env.server.app:app --reload --host 0.0.0.0 --port 8000
|
| 16 |
+
|
| 17 |
+
# Production:
|
| 18 |
+
uvicorn envs.coding_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 19 |
+
|
| 20 |
+
# Or run directly:
|
| 21 |
+
python -m envs.coding_env.server.app
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
from core.env_server import create_app
|
| 25 |
+
|
| 26 |
+
from ..models import CodeAction, CodeObservation
|
| 27 |
+
from .python_codeact_env import PythonCodeActEnv
|
| 28 |
+
|
| 29 |
+
# Create the environment instance
|
| 30 |
+
env = PythonCodeActEnv()
|
| 31 |
+
|
| 32 |
+
# Create the app with web interface and README integration
|
| 33 |
+
app = create_app(env, CodeAction, CodeObservation, env_name="coding_env")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
if __name__ == "__main__":
|
| 37 |
+
import uvicorn
|
| 38 |
+
|
| 39 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
server/python_codeact_env.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Python Code Action Environment.
|
| 9 |
+
|
| 10 |
+
This module provides a server-side environment implementation for executing
|
| 11 |
+
Python code actions using PyExecutor.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import uuid
|
| 15 |
+
|
| 16 |
+
from core.env_server import Action, Environment, Observation
|
| 17 |
+
from core.tools import PyExecutor
|
| 18 |
+
|
| 19 |
+
from ..models import CodeAction, CodeObservation, CodeState
|
| 20 |
+
from .transforms import create_safe_coding_transform
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class PythonCodeActEnv(Environment):
|
| 24 |
+
"""
|
| 25 |
+
Python Code Action Environment for executing code and tracking state.
|
| 26 |
+
|
| 27 |
+
This environment executes Python code submitted as CodeAction during step,
|
| 28 |
+
maintains the last exit code in its state, and returns results wrapped
|
| 29 |
+
in CodeObservation.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
transform: Optional transform to apply to observations
|
| 33 |
+
additional_imports: List of additional module imports to authorize
|
| 34 |
+
(e.g., ["numpy", "pandas", "matplotlib"])
|
| 35 |
+
|
| 36 |
+
Example:
|
| 37 |
+
>>> env = PythonCodeActEnv()
|
| 38 |
+
>>> obs = env.reset()
|
| 39 |
+
>>> action = CodeAction(code="print('Hello, World!')")
|
| 40 |
+
>>> obs = env.step(action)
|
| 41 |
+
>>> print(obs.stdout) # "Hello, World!\n"
|
| 42 |
+
>>> print(obs.exit_code) # 0
|
| 43 |
+
>>> print(env.state.last_exit_code) # 0
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
def __init__(
|
| 47 |
+
self,
|
| 48 |
+
):
|
| 49 |
+
self.transform = create_safe_coding_transform()
|
| 50 |
+
self._executor = PyExecutor()
|
| 51 |
+
self._state = CodeState()
|
| 52 |
+
|
| 53 |
+
def reset(self) -> Observation:
|
| 54 |
+
"""
|
| 55 |
+
Reset environment and start fresh execution session.
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
Initial observation with empty stdout/stderr and exit_code=0
|
| 59 |
+
"""
|
| 60 |
+
# Initialize fresh state
|
| 61 |
+
self._state = CodeState(episode_id=str(uuid.uuid4()), step_count=0)
|
| 62 |
+
# Add last_exit_code to state
|
| 63 |
+
self._state.last_exit_code = 0
|
| 64 |
+
|
| 65 |
+
# Reset executor to clear any previously defined variables/functions
|
| 66 |
+
self._executor = PyExecutor()
|
| 67 |
+
|
| 68 |
+
# Reset transform to clear any accumulated state
|
| 69 |
+
self.transform = create_safe_coding_transform()
|
| 70 |
+
|
| 71 |
+
# Return initial observation
|
| 72 |
+
observation = CodeObservation(
|
| 73 |
+
stdout="",
|
| 74 |
+
stderr="",
|
| 75 |
+
exit_code=0,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
return self._apply_transform(observation)
|
| 79 |
+
|
| 80 |
+
def step(self, action: Action) -> Observation:
|
| 81 |
+
"""
|
| 82 |
+
Execute code action and return observation.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
action: CodeAction containing the code to execute
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
CodeObservation with execution results (stdout, stderr, exit_code)
|
| 89 |
+
|
| 90 |
+
Raises:
|
| 91 |
+
ValueError: If action is not a CodeAction instance
|
| 92 |
+
"""
|
| 93 |
+
if not isinstance(action, CodeAction):
|
| 94 |
+
raise ValueError(f"Expected CodeAction, got {type(action)}")
|
| 95 |
+
|
| 96 |
+
# Execute the code using PyExecutor
|
| 97 |
+
result = self._executor.run(action.code)
|
| 98 |
+
|
| 99 |
+
# Update state
|
| 100 |
+
self._state.step_count += 1
|
| 101 |
+
self._state.last_exit_code = result.exit_code
|
| 102 |
+
|
| 103 |
+
# Create observation from execution result
|
| 104 |
+
observation = CodeObservation(
|
| 105 |
+
stdout=result.stdout,
|
| 106 |
+
stderr=result.stderr,
|
| 107 |
+
exit_code=result.exit_code,
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
return self._apply_transform(observation)
|
| 111 |
+
|
| 112 |
+
@property
|
| 113 |
+
def state(self) -> CodeState:
|
| 114 |
+
"""Get current environment state including last exit code."""
|
| 115 |
+
return self._state
|
server/requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
smolagents
|
server/transforms.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Transforms specific to coding environments."""
|
| 8 |
+
|
| 9 |
+
import ast
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
+
from core.env_server.base_transforms import CompositeTransform
|
| 13 |
+
from core.env_server.interfaces import Transform
|
| 14 |
+
from core.env_server.types import Observation
|
| 15 |
+
|
| 16 |
+
from ..models import CodeObservation
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class CodeSafetyTransform(Transform):
|
| 20 |
+
"""Evaluates code safety and assigns penalties for dangerous patterns."""
|
| 21 |
+
|
| 22 |
+
def __init__(self, penalty: float = -1.0):
|
| 23 |
+
self.penalty = penalty
|
| 24 |
+
self.dangerous_patterns = [
|
| 25 |
+
r"import\s+os",
|
| 26 |
+
r"import\s+subprocess",
|
| 27 |
+
r"eval\(",
|
| 28 |
+
r"exec\(",
|
| 29 |
+
r"__import__",
|
| 30 |
+
r"open\(",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
def __call__(self, observation: Observation) -> Observation:
|
| 34 |
+
if not isinstance(observation, CodeObservation):
|
| 35 |
+
return observation
|
| 36 |
+
|
| 37 |
+
if "last_code" in observation.metadata:
|
| 38 |
+
code = observation.metadata["last_code"]
|
| 39 |
+
for pattern in self.dangerous_patterns:
|
| 40 |
+
if re.search(pattern, code):
|
| 41 |
+
observation.reward = self.penalty
|
| 42 |
+
observation.metadata["safety_violation"] = pattern
|
| 43 |
+
break
|
| 44 |
+
else:
|
| 45 |
+
if observation.reward is None:
|
| 46 |
+
observation.reward = 0.0
|
| 47 |
+
|
| 48 |
+
return observation
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class CodeQualityTransform(Transform):
|
| 52 |
+
"""Evaluates and rewards code quality metrics."""
|
| 53 |
+
|
| 54 |
+
def __init__(
|
| 55 |
+
self,
|
| 56 |
+
concise_bonus: float = 0.1,
|
| 57 |
+
max_length_threshold: int = 100,
|
| 58 |
+
syntax_penalty: float = -0.2,
|
| 59 |
+
):
|
| 60 |
+
self.concise_bonus = concise_bonus
|
| 61 |
+
self.max_length_threshold = max_length_threshold
|
| 62 |
+
self.syntax_penalty = syntax_penalty
|
| 63 |
+
|
| 64 |
+
def __call__(self, observation: Observation) -> Observation:
|
| 65 |
+
if not isinstance(observation, CodeObservation):
|
| 66 |
+
return observation
|
| 67 |
+
|
| 68 |
+
quality_score = 0.0
|
| 69 |
+
|
| 70 |
+
if "last_code" in observation.metadata:
|
| 71 |
+
code = observation.metadata["last_code"]
|
| 72 |
+
|
| 73 |
+
# Reward concise code
|
| 74 |
+
if len(code.strip()) <= self.max_length_threshold:
|
| 75 |
+
quality_score += self.concise_bonus
|
| 76 |
+
|
| 77 |
+
# Check syntax (redundant but useful for quality assessment)
|
| 78 |
+
try:
|
| 79 |
+
ast.parse(code)
|
| 80 |
+
except SyntaxError:
|
| 81 |
+
quality_score += self.syntax_penalty
|
| 82 |
+
|
| 83 |
+
# Add to existing reward
|
| 84 |
+
if observation.reward is None:
|
| 85 |
+
observation.reward = quality_score
|
| 86 |
+
else:
|
| 87 |
+
observation.reward += quality_score
|
| 88 |
+
|
| 89 |
+
return observation
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def create_safe_coding_transform() -> CompositeTransform:
|
| 93 |
+
"""Create a transform focused on safe coding practices and quality."""
|
| 94 |
+
return CompositeTransform([CodeSafetyTransform(), CodeQualityTransform()])
|