zkwentz commited on
Commit
33e0bd8
·
verified ·
1 Parent(s): 6e0dc5f

Upload folder using huggingface_hub

Browse files
Dockerfile CHANGED
@@ -10,16 +10,20 @@
10
  ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
11
  FROM ghcr.io/meta-pytorch/openenv-base:latest
12
 
 
 
 
 
13
  # Install dependencies
14
- COPY src/envs/coding_env/server/requirements.txt /tmp/requirements.txt
15
  RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
16
 
17
  # Copy only what's needed for this environment
18
- COPY src/core/ /app/src/core/
19
- COPY src/envs/coding_env/ /app/src/envs/coding_env/
20
 
21
  # Copy README for web interface documentation
22
- COPY src/envs/coding_env/README.md /app/README.md
23
 
24
  # Health check
25
  HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
@@ -27,5 +31,6 @@ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
27
 
28
  # Run the FastAPI server
29
  ENV ENABLE_WEB_INTERFACE=true
 
30
 
31
- CMD ["uvicorn", "envs.coding_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
 
10
  ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
11
  FROM ghcr.io/meta-pytorch/openenv-base:latest
12
 
13
+ WORKDIR /app
14
+
15
+ COPY . /app
16
+
17
  # Install dependencies
18
+
19
  RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
20
 
21
  # Copy only what's needed for this environment
22
+
23
+
24
 
25
  # Copy README for web interface documentation
26
+
27
 
28
  # Health check
29
  HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
 
31
 
32
  # Run the FastAPI server
33
  ENV ENABLE_WEB_INTERFACE=true
34
+ ENV PYTHONPATH=/app
35
 
36
+ CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Coding Environment - A Python code execution environment."""
8
+
9
+ from .coding_env_client import CodingEnv
10
+ from .models import CodeAction, CodeObservation, CodeState
11
+
12
+ __all__ = ["CodeAction", "CodeObservation", "CodeState", "CodingEnv"]
client.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """Client stub for coding_env environment."""
2
+
3
+ class coding_envEnv:
4
+ pass
5
+
6
+
coding_env_client.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CodingEnv
3
+ ---------
4
+ Client-side wrapper for the Coding environment server.
5
+ Talks HTTP to a single base_url exposing: /reset and /step.
6
+
7
+ - users instantiate CodingEnv with a base_url provided by the higher-level
8
+ vector/orchestration layer.
9
+ - Environment authors ship the Docker image that serves the HTTP API.
10
+
11
+ (Seeds, episode IDs, request IDs, capabilities can be added later in the payloads.)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Optional, TYPE_CHECKING
17
+
18
+ from core.client_types import StepResult
19
+
20
+ from core.http_env_client import HTTPEnvClient
21
+
22
+ from .models import CodeAction, CodeObservation, CodeState
23
+
24
+ if TYPE_CHECKING:
25
+ from core.containers.runtime import ContainerProvider
26
+
27
+
28
+ class CodingEnv(HTTPEnvClient[CodeAction, CodeObservation]):
29
+ # --- HTTPEnvClient abstract hooks ---
30
+
31
+ def _step_payload(self, action: CodeAction) -> dict:
32
+ # Shape expected by the server's /step endpoint under "action"
33
+ return {
34
+ "code": action.code,
35
+ }
36
+
37
+ def _parse_result(self, payload: dict) -> StepResult[CodeObservation]:
38
+ # Expecting: { "observation": {...}, "reward": <float|null>, "done": <bool>, "info": {...} }
39
+ obs = CodeObservation(**payload["observation"])
40
+ return StepResult(
41
+ observation=obs,
42
+ reward=payload.get("reward"),
43
+ done=bool(payload.get("done", False)),
44
+ )
45
+
46
+ def _parse_state(self, payload: dict) -> CodeState:
47
+ """
48
+ Parse server response into CodeState object.
49
+
50
+ Args:
51
+ payload: JSON response from /state endpoint
52
+
53
+ Returns:
54
+ CodeState object with episode_id, step_count, and last_exit_code
55
+ """
56
+ return CodeState(
57
+ episode_id=payload.get("episode_id"),
58
+ step_count=payload.get("step_count", 0),
59
+ last_exit_code=payload.get("last_exit_code", 0),
60
+ )
models.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ envs/coding_env/models.py
3
+ --------------------------------
4
+ Action/Observation types for the Coding environment.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any, Optional
11
+
12
+ from core.env_server import Action, Observation, State
13
+
14
+
15
+ @dataclass
16
+ class CodeAction(Action):
17
+ """
18
+ Represents a single code execution request.
19
+ """
20
+
21
+ code: str
22
+ # Optional: future fields like 'lint': bool, 'timeout_s': float, etc.
23
+
24
+
25
+ @dataclass
26
+ class CodeObservation(Observation):
27
+ """
28
+ Result of executing code in the environment.
29
+ """
30
+
31
+ stdout: str = ""
32
+ stderr: str = ""
33
+ exit_code: int = 0
34
+
35
+
36
+ @dataclass
37
+ class CodeState(State):
38
+ """State for CodeAct environment with persistent execution context."""
39
+
40
+ last_exit_code: int = 0
openenv.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: coding_env
3
+ type: space
4
+ runtime: fastapi
5
+ app: envs.coding_env.server.app:app
6
+ port: 8000
7
+
8
+
server/Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Use the standard openenv base image
8
+ # Built from: docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
9
+ # In GitHub Actions, this is overridden to use the GHCR base image
10
+ ARG BASE_IMAGE=openenv-base:latest
11
+ FROM ${BASE_IMAGE}
12
+
13
+ # Install dependencies
14
+ COPY src/envs/coding_env/server/requirements.txt /tmp/requirements.txt
15
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
16
+
17
+ # Copy only what's needed for this environment
18
+ COPY src/core/ /app/src/core/
19
+ COPY src/envs/coding_env/ /app/src/envs/coding_env/
20
+
21
+ # Copy README for web interface documentation
22
+ COPY src/envs/coding_env/README.md /app/README.md
23
+
24
+ # Health check
25
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
26
+ CMD curl -f http://localhost:8000/health || exit 1
27
+
28
+ # Run the FastAPI server
29
+ CMD ["uvicorn", "envs.coding_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
server/README.md ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CodingEnv HTTP Server
2
+
3
+ This directory contains the HTTP server implementation for the CodingEnvironment.
4
+
5
+ ## Running Locally
6
+
7
+ ### Prerequisites
8
+ ```bash
9
+ pip install fastapi uvicorn
10
+ ```
11
+
12
+ ### Start the server
13
+ ```bash
14
+ # From the project root (/Users/pankit/git/envtorch)
15
+ cd src
16
+ uvicorn envs.coding_env.server.app:app --reload --host 0.0.0.0 --port 8000
17
+ ```
18
+
19
+ The server will be available at `http://localhost:8000`
20
+
21
+ ### API Endpoints
22
+
23
+ - `POST /reset` - Reset the environment
24
+ - `POST /step` - Execute a code action
25
+ - `GET /state` - Get current environment state
26
+ - `GET /health` - Health check
27
+
28
+ ### Test with curl
29
+
30
+ ```bash
31
+ # Health check
32
+ curl http://localhost:8000/health
33
+
34
+ # Reset
35
+ curl -X POST http://localhost:8000/reset \
36
+ -H "Content-Type: application/json" \
37
+ -d '{}'
38
+
39
+ # Execute code
40
+ curl -X POST http://localhost:8000/step \
41
+ -H "Content-Type: application/json" \
42
+ -d '{
43
+ "action": {
44
+ "code": "print(\"Hello from HTTP!\")"
45
+ },
46
+ "timeout_s": 15
47
+ }'
48
+
49
+ # Get state
50
+ curl http://localhost:8000/state
51
+ ```
server/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+
server/app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ FastAPI application for the Coding Environment.
9
+
10
+ This module creates an HTTP server that exposes the PythonCodeActEnv
11
+ over HTTP endpoints, making it compatible with HTTPEnvClient.
12
+
13
+ Usage:
14
+ # Development (with auto-reload):
15
+ uvicorn envs.coding_env.server.app:app --reload --host 0.0.0.0 --port 8000
16
+
17
+ # Production:
18
+ uvicorn envs.coding_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
19
+
20
+ # Or run directly:
21
+ python -m envs.coding_env.server.app
22
+ """
23
+
24
+ from core.env_server import create_app
25
+
26
+ from ..models import CodeAction, CodeObservation
27
+ from .python_codeact_env import PythonCodeActEnv
28
+
29
+ # Create the environment instance
30
+ env = PythonCodeActEnv()
31
+
32
+ # Create the app with web interface and README integration
33
+ app = create_app(env, CodeAction, CodeObservation, env_name="coding_env")
34
+
35
+
36
+ if __name__ == "__main__":
37
+ import uvicorn
38
+
39
+ uvicorn.run(app, host="0.0.0.0", port=8000)
server/python_codeact_env.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Python Code Action Environment.
9
+
10
+ This module provides a server-side environment implementation for executing
11
+ Python code actions using PyExecutor.
12
+ """
13
+
14
+ import uuid
15
+
16
+ from core.env_server import Action, Environment, Observation
17
+ from core.tools import PyExecutor
18
+
19
+ from ..models import CodeAction, CodeObservation, CodeState
20
+ from .transforms import create_safe_coding_transform
21
+
22
+
23
+ class PythonCodeActEnv(Environment):
24
+ """
25
+ Python Code Action Environment for executing code and tracking state.
26
+
27
+ This environment executes Python code submitted as CodeAction during step,
28
+ maintains the last exit code in its state, and returns results wrapped
29
+ in CodeObservation.
30
+
31
+ Args:
32
+ transform: Optional transform to apply to observations
33
+ additional_imports: List of additional module imports to authorize
34
+ (e.g., ["numpy", "pandas", "matplotlib"])
35
+
36
+ Example:
37
+ >>> env = PythonCodeActEnv()
38
+ >>> obs = env.reset()
39
+ >>> action = CodeAction(code="print('Hello, World!')")
40
+ >>> obs = env.step(action)
41
+ >>> print(obs.stdout) # "Hello, World!\n"
42
+ >>> print(obs.exit_code) # 0
43
+ >>> print(env.state.last_exit_code) # 0
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ ):
49
+ self.transform = create_safe_coding_transform()
50
+ self._executor = PyExecutor()
51
+ self._state = CodeState()
52
+
53
+ def reset(self) -> Observation:
54
+ """
55
+ Reset environment and start fresh execution session.
56
+
57
+ Returns:
58
+ Initial observation with empty stdout/stderr and exit_code=0
59
+ """
60
+ # Initialize fresh state
61
+ self._state = CodeState(episode_id=str(uuid.uuid4()), step_count=0)
62
+ # Add last_exit_code to state
63
+ self._state.last_exit_code = 0
64
+
65
+ # Reset executor to clear any previously defined variables/functions
66
+ self._executor = PyExecutor()
67
+
68
+ # Reset transform to clear any accumulated state
69
+ self.transform = create_safe_coding_transform()
70
+
71
+ # Return initial observation
72
+ observation = CodeObservation(
73
+ stdout="",
74
+ stderr="",
75
+ exit_code=0,
76
+ )
77
+
78
+ return self._apply_transform(observation)
79
+
80
+ def step(self, action: Action) -> Observation:
81
+ """
82
+ Execute code action and return observation.
83
+
84
+ Args:
85
+ action: CodeAction containing the code to execute
86
+
87
+ Returns:
88
+ CodeObservation with execution results (stdout, stderr, exit_code)
89
+
90
+ Raises:
91
+ ValueError: If action is not a CodeAction instance
92
+ """
93
+ if not isinstance(action, CodeAction):
94
+ raise ValueError(f"Expected CodeAction, got {type(action)}")
95
+
96
+ # Execute the code using PyExecutor
97
+ result = self._executor.run(action.code)
98
+
99
+ # Update state
100
+ self._state.step_count += 1
101
+ self._state.last_exit_code = result.exit_code
102
+
103
+ # Create observation from execution result
104
+ observation = CodeObservation(
105
+ stdout=result.stdout,
106
+ stderr=result.stderr,
107
+ exit_code=result.exit_code,
108
+ )
109
+
110
+ return self._apply_transform(observation)
111
+
112
+ @property
113
+ def state(self) -> CodeState:
114
+ """Get current environment state including last exit code."""
115
+ return self._state
server/requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ smolagents
server/transforms.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Transforms specific to coding environments."""
8
+
9
+ import ast
10
+ import re
11
+
12
+ from core.env_server.base_transforms import CompositeTransform
13
+ from core.env_server.interfaces import Transform
14
+ from core.env_server.types import Observation
15
+
16
+ from ..models import CodeObservation
17
+
18
+
19
+ class CodeSafetyTransform(Transform):
20
+ """Evaluates code safety and assigns penalties for dangerous patterns."""
21
+
22
+ def __init__(self, penalty: float = -1.0):
23
+ self.penalty = penalty
24
+ self.dangerous_patterns = [
25
+ r"import\s+os",
26
+ r"import\s+subprocess",
27
+ r"eval\(",
28
+ r"exec\(",
29
+ r"__import__",
30
+ r"open\(",
31
+ ]
32
+
33
+ def __call__(self, observation: Observation) -> Observation:
34
+ if not isinstance(observation, CodeObservation):
35
+ return observation
36
+
37
+ if "last_code" in observation.metadata:
38
+ code = observation.metadata["last_code"]
39
+ for pattern in self.dangerous_patterns:
40
+ if re.search(pattern, code):
41
+ observation.reward = self.penalty
42
+ observation.metadata["safety_violation"] = pattern
43
+ break
44
+ else:
45
+ if observation.reward is None:
46
+ observation.reward = 0.0
47
+
48
+ return observation
49
+
50
+
51
+ class CodeQualityTransform(Transform):
52
+ """Evaluates and rewards code quality metrics."""
53
+
54
+ def __init__(
55
+ self,
56
+ concise_bonus: float = 0.1,
57
+ max_length_threshold: int = 100,
58
+ syntax_penalty: float = -0.2,
59
+ ):
60
+ self.concise_bonus = concise_bonus
61
+ self.max_length_threshold = max_length_threshold
62
+ self.syntax_penalty = syntax_penalty
63
+
64
+ def __call__(self, observation: Observation) -> Observation:
65
+ if not isinstance(observation, CodeObservation):
66
+ return observation
67
+
68
+ quality_score = 0.0
69
+
70
+ if "last_code" in observation.metadata:
71
+ code = observation.metadata["last_code"]
72
+
73
+ # Reward concise code
74
+ if len(code.strip()) <= self.max_length_threshold:
75
+ quality_score += self.concise_bonus
76
+
77
+ # Check syntax (redundant but useful for quality assessment)
78
+ try:
79
+ ast.parse(code)
80
+ except SyntaxError:
81
+ quality_score += self.syntax_penalty
82
+
83
+ # Add to existing reward
84
+ if observation.reward is None:
85
+ observation.reward = quality_score
86
+ else:
87
+ observation.reward += quality_score
88
+
89
+ return observation
90
+
91
+
92
+ def create_safe_coding_transform() -> CompositeTransform:
93
+ """Create a transform focused on safe coding practices and quality."""
94
+ return CompositeTransform([CodeSafetyTransform(), CodeQualityTransform()])