Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e3123be445 | |||
| e46d5b2c13 | |||
| 34cc666105 | |||
| d6832260f9 | |||
| d2652e980f | |||
| 89cea9fd2d | |||
| 143e72c145 | |||
| 51305b3f3d | |||
| 570e52b342 | |||
| d6e874491d | |||
| dd3812dffe | |||
| 6e17630bac | |||
| 53b710b13f | |||
| 5b1e8059cb | |||
| ff16a33cdd | |||
| 7cfb9eb1f6 | |||
| c7b15f8ce1 | |||
| 7602c462ee | |||
| e38c24363c | |||
| d768b244a5 |
@@ -13,6 +13,7 @@ Core layers:
|
||||
Concrete environments:
|
||||
- terminal_test_env/: Simple file-creation tasks for testing the stack
|
||||
- hermes_swe_env/: SWE-bench style tasks with Modal sandboxes
|
||||
- endless_terminals/: Terminal tasks from HuggingFace dataset with Apptainer containers
|
||||
|
||||
Benchmarks (eval-only):
|
||||
- benchmarks/terminalbench_2/: Terminal-Bench 2.0 evaluation
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Endless Terminals Environment - Terminal task training from HuggingFace dataset."""
|
||||
|
||||
from .endless_terminals_env import EndlessTerminalsEnv, EndlessTerminalsEnvConfig
|
||||
|
||||
__all__ = ["EndlessTerminalsEnv", "EndlessTerminalsEnvConfig"]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,91 @@
|
||||
# Endless Terminals - Qwen3-4B-Instruct-2507
|
||||
# Single config for both trainer (launch_training.py) and env (endless_terminals_env.py serve)
|
||||
#
|
||||
# Usage:
|
||||
# Terminal 1: run-api
|
||||
# Terminal 2: cd tinker-atropos && python launch_training.py --config ../environments/endless_terminals/tinker_qwen.yaml
|
||||
# Terminal 3: python environments/endless_terminals/endless_terminals_env.py serve --config environments/endless_terminals/tinker_qwen.yaml
|
||||
|
||||
env:
|
||||
# Toolsets
|
||||
enabled_toolsets: ["terminal", "file"]
|
||||
|
||||
# Model / tokenizer
|
||||
tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507"
|
||||
|
||||
# Agent configuration
|
||||
max_agent_turns: 16
|
||||
max_token_length: 2048
|
||||
agent_temperature: 0.6
|
||||
extra_body:
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
tool_call_parser: "hermes"
|
||||
|
||||
# Terminal backend
|
||||
terminal_backend: "docker"
|
||||
|
||||
# Dataset settings
|
||||
use_dataset: true
|
||||
dataset_name: "obiwan96/endless-terminals"
|
||||
dataset_split: "train"
|
||||
dataset_cache_dir: "~/.cache/huggingface/datasets"
|
||||
tasks_base_dir: "/Users/samherring/Desktop/Projects/Hermes-Agent/endless-terminals"
|
||||
|
||||
# Test execution
|
||||
test_timeout_s: 180
|
||||
default_docker_image: "ubuntu:22.04"
|
||||
max_concurrent_containers: 16
|
||||
|
||||
# Training configuration
|
||||
group_size: 16
|
||||
batch_size: 64 # 4 groups × 16 rollouts per step
|
||||
total_steps: 500
|
||||
steps_per_eval: 5
|
||||
min_items_sent_before_logging: 1
|
||||
ensure_scores_are_not_same: true
|
||||
max_num_workers: 2048
|
||||
worker_timeout: 3600
|
||||
inference_weight: 1.0
|
||||
eval_limit_ratio: 0.1
|
||||
rollout_server_url: "http://localhost:8000"
|
||||
|
||||
# Evaluation configuration
|
||||
num_eval_tasks: 20
|
||||
eval_split_ratio: 0.1
|
||||
|
||||
# Logging
|
||||
use_wandb: true
|
||||
wandb_name: "endless-terminals-qwen3-4b"
|
||||
|
||||
# System prompt
|
||||
system_prompt: >
|
||||
You are a skilled Linux system administrator and programmer.
|
||||
You have access to a terminal and file tools to complete system administration
|
||||
and programming tasks. Use the tools effectively to solve the given task,
|
||||
and verify your solution works correctly before finishing.
|
||||
Keep each command short and focused — break complex tasks into multiple steps
|
||||
rather than writing long one-liners.
|
||||
|
||||
tinker:
|
||||
lora_rank: 32
|
||||
learning_rate: 0.0000005
|
||||
max_token_trainer_length: 32768
|
||||
checkpoint_dir: "./temp/"
|
||||
save_checkpoint_interval: 50
|
||||
wandb_project: "endless-terminals"
|
||||
wandb_group: null
|
||||
wandb_run_name: "qwen3-4b"
|
||||
tool_call_parser: "hermes"
|
||||
|
||||
openai:
|
||||
- model_name: "Qwen/Qwen3-4B-Instruct-2507"
|
||||
base_url: "http://localhost:8001/v1"
|
||||
api_key: "x"
|
||||
weight: 1.0
|
||||
num_requests_for_eval: 64
|
||||
timeout: 600
|
||||
server_type: "sglang"
|
||||
|
||||
slurm: false
|
||||
testing: false
|
||||
@@ -298,7 +298,6 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||
return False
|
||||
|
||||
server = self.server.servers[0]
|
||||
# If the server is an OpenAI server (not VLLM/SGLang), use direct mode
|
||||
from atroposlib.envs.server_handling.openai_server import OpenAIServer
|
||||
return not isinstance(server, OpenAIServer)
|
||||
|
||||
|
||||
@@ -48,7 +48,13 @@ class HermesToolCallParser(ToolCallParser):
|
||||
if not raw_json.strip():
|
||||
continue
|
||||
|
||||
tc_data = json.loads(raw_json)
|
||||
try:
|
||||
tc_data = json.loads(raw_json)
|
||||
except json.JSONDecodeError:
|
||||
# Fix invalid backslash escapes from shell commands in JSON strings
|
||||
# e.g. \s \w \d \n (unescaped) → \\s \\w \\d \\n
|
||||
fixed = re.sub(r'\\([^"\\/bfnrtu0-9\n])', r'\\\\\1', raw_json)
|
||||
tc_data = json.loads(fixed)
|
||||
tool_calls.append(
|
||||
ChatCompletionMessageToolCall(
|
||||
id=f"call_{uuid.uuid4().hex[:8]}",
|
||||
|
||||
Reference in New Issue
Block a user