findfile-xml-step240
WIP derivative checkpoint of EssentialAI/rnj-1-instruct.
RL-trained on file navigation task with XML tool-calling format.
Training Config
max_steps = 240
seq_len = 8192
[model]
name = "/home/ubuntu/models/hex-toolcall-base-v2"
[wandb]
project = "xml-tool-thinking"
name = "findfile-v2-from-hex140"
[orchestrator]
batch_size = 512
rollouts_per_example = 16
[orchestrator.sampling]
max_tokens = 512
temperature = 1.0
[[orchestrator.env]]
id = "findfile_xml"
args = { max_turns = 8, hint_chars = 40, noise_dirs = 3, max_read_chars = 1500, data_path = "/home/ubuntu/dataproc/consensus_pass_v2" }
[trainer.model]
ac = { freq = 1 }
[trainer.optim]
lr = 1e-6
max_norm = 0.001
[trainer.scheduler]
type = "linear"
warmup_steps = 5
decay_steps = 40
min_lr = 0
[inference.parallel]
tp = 4
[ckpt]
- Downloads last month
- 29
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support