config diff

Created Diff never expires
4 removals
Lines
Total
Removed
Words
Total
Removed
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
338 lines
5 additions
Lines
Total
Added
Words
Total
Added
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
339 lines
{
{
"data": {
"data": {
"value": {
"value": {
"shuffle": true,
"shuffle": true,
"image_key": "images",
"image_key": "images",
"tokenizer": null,
"tokenizer": null,
"val_files": "/root/data/gsm8k/test.parquet",
"val_files": "/root/data/gsm8k/test.parquet",
"video_key": "videos",
"video_key": "videos",
"custom_cls": {
"custom_cls": {
"name": null,
"name": null,
"path": null
"path": null
},
},
"prompt_key": "prompt",
"prompt_key": "prompt",
"truncation": "error",
"truncation": "error",
"train_files": "/root/data/gsm8k/train.parquet",
"train_files": "/root/data/gsm8k/train.parquet",
"reward_fn_key": "data_source",
"reward_fn_key": "data_source",
"val_batch_size": null,
"val_batch_size": null,
"return_raw_chat": true,
"return_raw_chat": true,
"train_batch_size": 256,
"train_batch_size": 256,
"max_prompt_length": 1024,
"max_prompt_length": 1024,
"max_response_length": 1024,
"max_response_length": 1024,
"return_raw_input_ids": false,
"return_raw_input_ids": false,
"filter_overlong_prompts": true,
"filter_overlong_prompts": true,
"filter_overlong_prompts_workers": 1
"filter_overlong_prompts_workers": 1
}
}
},
},
"_wandb": {
"_wandb": {
"value": {
"value": {
"m": [],
"m": [],
"t": {
"t": {
"1": [
"1": [
1,
1,
11,
11,
30,
30,
41,
41,
49,
49,
50,
50,
51,
51,
55,
55,
71,
71,
95,
95,
105
105
],
],
"2": [
"2": [
1,
1,
11,
11,
30,
30,
41,
41,
49,
49,
50,
50,
51,
51,
55,
55,
71,
71,
95,
95,
105
105
],
],
"3": [
"3": [
2,
13,
13,
16,
16,
23,
23,
55,
55,
61
61
],
],
"4": "3.10.12",
"4": "3.10.12",
"5": "0.19.11",
"5": "0.19.11",
"6": "4.51.1",
"6": "4.51.1",
"8": [
"8": [
5
5
],
],
"12": "0.19.11",
"12": "0.19.11",
"13": "linux-x86_64"
"13": "linux-x86_64"
},
},
"cli_version": "0.19.11",
"cli_version": "0.19.11",
"python_version": "3.10.12"
"python_version": "3.10.12"
}
}
},
},
"critic": {
"critic": {
"value": {
"value": {
"model": {
"model": {
"path": "~/models/deepseek-llm-7b-chat",
"path": "~/models/deepseek-llm-7b-chat",
"fsdp_config": {
"fsdp_config": {
"fsdp_size": -1,
"fsdp_size": -1,
"wrap_policy": {
"wrap_policy": {
"min_num_params": 0
"min_num_params": 0
},
},
"param_offload": false,
"param_offload": false,
"offload_policy": false,
"offload_policy": false,
"optimizer_offload": false,
"optimizer_offload": false,
"reshard_after_forward": true
"reshard_after_forward": true
},
},
"external_lib": null,
"external_lib": null,
"tokenizer_path": "Qwen/Qwen2.5-3B-Instruct",
"tokenizer_path": "Qwen/Qwen2.5-3B-Instruct",
"trust_remote_code": false,
"trust_remote_code": false,
"use_remove_padding": false,
"use_remove_padding": false,
"enable_gradient_checkpointing": true
"enable_gradient_checkpointing": true
},
},
"optim": {
"optim": {
"lr": 0.00001,
"lr": 0.00001,
"min_lr_ratio": null,
"min_lr_ratio": null,
"warmup_style": "constant",
"warmup_style": "constant",
"weight_decay": 0.01,
"weight_decay": 0.01,
"total_training_steps": 435,
"total_training_steps": 435,
"lr_warmup_steps_ratio": 0
"lr_warmup_steps_ratio": 0
},
},
"shuffle": false,
"shuffle": false,
"strategy": "fsdp",
"strategy": "fsdp",
"grad_clip": 1,
"grad_clip": 1,
"rollout_n": 16,
"rollout_n": 16,
"checkpoint": {
"checkpoint": {
"contents": [
"contents": [
"model",
"model",
"optimizer",
"optimizer",
"extra"
"extra"
]
]
},
},
"ppo_epochs": 1,
"ppo_epochs": 1,
"cliprange_value": 0.5,
"cliprange_value": 0.5,
"use_dynamic_bsz": false,
"use_dynamic_bsz": false,
"ppo_mini_batch_size": 256,
"ppo_mini_batch_size": 256,
"ppo_micro_batch_size": null,
"ppo_micro_batch_size": null,
"forward_micro_batch_size": null,
"forward_micro_batch_size": null,
"ppo_max_token_len_per_gpu": 32768,
"ppo_max_token_len_per_gpu": 32768,
"ppo_micro_batch_size_per_gpu": null,
"ppo_micro_batch_size_per_gpu": null,
"forward_max_token_len_per_gpu": 32768,
"forward_max_token_len_per_gpu": 32768,
"ulysses_sequence_parallel_size": 1,
"ulysses_sequence_parallel_size": 1,
"forward_micro_batch_size_per_gpu": null
"forward_micro_batch_size_per_gpu": null
}
}
},
},
"trainer": {
"trainer": {
"value": {
"value": {
"logger": [
"logger": [
"console",
"console",
"wandb"
"wandb"
],
],
"nnodes": 1,
"nnodes": 1,
"save_freq": -1,
"save_freq": -1,
"test_freq": 20,
"test_freq": 20,
"resume_mode": "auto",
"resume_mode": "auto",
"project_name": "gsm8k_async_rl",
"project_name": "gsm8k_async_rl",
"total_epochs": 15,
"total_epochs": 15,
"balance_batch": true,
"balance_batch": true,
"critic_warmup": 0,
"critic_warmup": 0,
"experiment_name": "qwen2.5-3b_rm-gsm8k-sgl-multiturn-2025-05-15-05-43",
"experiment_name": "qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16",
"n_gpus_per_node": 8,
"n_gpus_per_node": 8,
"default_hdfs_dir": null,
"default_hdfs_dir": null,
"resume_from_path": null,
"resume_from_path": null,
"rollout_data_dir": null,
"rollout_data_dir": null,
"val_before_train": true,
"val_before_train": true,
"default_local_dir": "checkpoints/gsm8k_async_rl/qwen2.5-3b_rm-gsm8k-sgl-multiturn-2025-05-15-05-43",
"default_local_dir": "checkpoints/gsm8k_async_rl/qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16",
"log_val_generations": 0,
"log_val_generations": 0,
"validation_data_dir": null,
"validation_data_dir": null,
"total_training_steps": null,
"total_training_steps": null,
"max_actor_ckpt_to_keep": null,
"max_actor_ckpt_to_keep": null,
"max_critic_ckpt_to_keep": null,
"max_critic_ckpt_to_keep": null,
"del_local_ckpt_after_load": false,
"del_local_ckpt_after_load": false,
"ray_wait_register_center_timeout": 300
"ray_wait_register_center_timeout": 300
}
}
},
},
"ray_init": {
"ray_init": {
"value": {
"value": {
"num_cpus": null
"num_cpus": null
}
}
},
},
"algorithm": {
"algorithm": {
"value": {
"value": {
"lam": 1,
"lam": 1,
"gamma": 1,
"gamma": 1,
"kl_ctrl": {
"kl_ctrl": {
"type": "fixed",
"type": "fixed",
"horizon": 10000,
"horizon": 10000,
"kl_coef": 0.001,
"kl_coef": 0.001,
"target_kl": 0.1
"target_kl": 0.1
},
},
"kl_penalty": "kl",
"kl_penalty": "kl",
"adv_estimator": "grpo",
"adv_estimator": "grpo",
"use_kl_in_reward": false,
"use_kl_in_reward": false,
"norm_adv_by_std_in_grpo": true
"norm_adv_by_std_in_grpo": true
}
}
},
},
"reward_model": {
"reward_model": {
"value": {
"value": {
"model": {
"model": {
"path": "~/models/FsfairX-LLaMA3-RM-v0.1",
"path": "~/models/FsfairX-LLaMA3-RM-v0.1",
"fsdp_config": {
"fsdp_config": {
"fsdp_size": -1,
"fsdp_size": -1,
"wrap_policy": {
"wrap_policy": {
"min_num_params": 0
"min_num_params": 0
},
},
"param_offload": false,
"param_offload": false,
"reshard_after_forward": true
"reshard_after_forward": true
},
},
"external_lib": null,
"external_lib": null,
"input_tokenizer": "Qwen/Qwen2.5-3B-Instruct",
"input_tokenizer": "Qwen/Qwen2.5-3B-Instruct",
"trust_remote_code": false,
"trust_remote_code": false,
"use_remove_padding": false
"use_remove_padding": false
},
},
"enable": false,
"enable": false,
"strategy": "fsdp",
"strategy": "fsdp",
"max_length": null,
"max_length": null,
"reward_manager": "naive",
"reward_manager": "naive",
"use_dynamic_bsz": false,
"use_dynamic_bsz": false,
"micro_batch_size": null,
"micro_batch_size": null,
"launch_reward_fn_async": false,
"launch_reward_fn_async": false,
"micro_batch_size_per_gpu": null,
"micro_batch_size_per_gpu": null,
"forward_max_token_len_per_gpu": 32768,
"forward_max_token_len_per_gpu": 32768,
"ulysses_sequence_parallel_size": 1
"ulysses_sequence_parallel_size": 1
}
}
},
},
"actor_rollout_ref": {
"actor_rollout_ref": {
"value": {
"value": {
"ref": {
"ref": {
"strategy": "fsdp",
"strategy": "fsdp",
"fsdp_config": {
"fsdp_config": {
"wrap_policy": {
"wrap_policy": {
"min_num_params": 0
"min_num_params": 0
},
},
"param_offload": true,
"param_offload": true,
"reshard_after_forward": true
"reshard_after_forward": true
},
},
"use_torch_compile": true,
"use_torch_compile": true,
"log_prob_use_dynamic_bsz": false,
"log_prob_use_dynamic_bsz": false,
"log_prob_micro_batch_size": null,
"log_prob_micro_batch_size": null,
"log_prob_max_token_len_per_gpu": 16384,
"log_prob_max_token_len_per_gpu": 16384,
"ulysses_sequence_parallel_size": 1,
"ulysses_sequence_parallel_size": 1,
"log_prob_micro_batch_size_per_gpu": 32
"log_prob_micro_batch_size_per_gpu": 32
},
},
"actor": {
"actor": {
"optim": {
"optim": {
"lr": 0.000001,
"lr": 0.000001,
"num_cycles": 0.5,
"num_cycles": 0.5,
"min_lr_ratio": 0,
"min_lr_ratio": 0,
"warmup_style": "constant",
"warmup_style": "constant",
"weight_decay": 0.01,
"weight_decay": 0.01,
"lr_warmup_steps": -1,
"lr_warmup_steps": -1,
"total_training_steps": 435,
"total_training_steps": 435,
"lr_warmup_steps_ratio": 0
"lr_warmup_steps_ratio": 0
},
},
"shuffle": false,
"shuffle": false,
"strategy": "fsdp",
"strategy": "fsdp",
"grad_clip": 1,
"grad_clip": 1,
"checkpoint": {
"checkpoint": {
"contents": [
"contents": [
"model",
"model",
"optimizer",
"optimizer",
"extra"
"extra"
]
]
},
},
"clip_ratio": 0.2,
"clip_ratio": 0.2,
"ppo_epochs": 1,
"ppo_epochs": 1,
"fsdp_config": {
"fsdp_config": {
"fsdp_size": -1,
"fsdp_size": -1,
"wrap_policy": {
"wrap_policy": {
"min_num_params": 0
"min_num_params": 0
},
},
"param_offload": false,
"param_offload": false,
"offload_policy": false,
"offload_policy": false,
"optimizer_offload": false,
"optimizer_offload": false,
"reshard_after_forward": true
"reshard_after_forward": true
},
},
"use_kl_loss": true,
"use_kl_loss": true,
"clip_ratio_c": 3,
"clip_ratio_c": 3,
"kl_loss_coef": 0.001,
"kl_loss_coef": 0.001,
"kl_loss_type": "low_var_kl",
"kl_loss_type": "low_var_kl",
"entropy_coeff": 0,
"entropy_coeff": 0,
"loss_agg_mode": "token-mean",
"loss_agg_mode": "token-mean",
"clip_ratio_low": 0.2,
"clip_ratio_low": 0.2,
"clip_ratio_high": 0.2,
"clip_ratio_high": 0.2,
"use_dynamic_bsz": false,
"use_dynamic_bsz": false,
"use_torch_compile": true,
"use_torch_compile": true,
"ppo_mini_batch_size": 256,
"ppo_mini_batch_size": 256,
"ppo_micro_batch_size": null,
"ppo_micro_batch_size": null,
"ppo_max_token_len_per_gpu": 16384,
"ppo_max_token_len_per_gpu": 16384,
"ppo_micro_batch_size_per_gpu": 32,
"ppo_micro_batch_size_per_gpu": 32,
"ulysses_sequence_parallel_size": 1
"ulysses_sequence_parallel_size": 1
},
},
"model": {
"model": {
"path": "Qwen/Qwen2.5-3B-Instruct",
"path": "Qwen/Qwen2.5-3B-Instruct",
"use_liger": false,
"use_liger": false,
"external_lib": null,
"external_lib": null,
"trust_remote_code": false,
"trust_remote_code": false,
"use_remove_padding": true,
"use_remove_padding": true,
"enable_gradient_checkpointing": true
"enable_gradient_checkpointing": true
},
},
"rollout": {
"rollout": {
"n": 16,
"n": 16,
"mode": "sync",
"mode": "sync",
"name": "sglang_async",
"name": "sglang_async",
"dtype": "bfloat16",
"dtype": "bfloat16",
"top_k": -1,
"top_k": -1,
"top_p": 1,
"top_p": 1,
"do_sample": true,
"do_sample": true,
"ignore_eos": false,
"ignore_eos": false,
"multi_turn": {
"multi_turn": {
"enable": true,
"enable": false,
"format": "chatml",
"format": "chatml",
"max_turns": 5,
"max_turns": 5,
"tool_config_path": "/root/verl/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml"
"tool_config_path": null
},
},
"val_kwargs": {
"val_kwargs": {
"n": 1,
"n": 1,
"top_k": -1,
"top_k": -1,
"top_p": 1,
"top_p": 1,
"do_sample": false,
"do_sample": false,
"temperature": 0
"temperature": 0
},
},
"load_format": "dummy_dtensor",
"load_format": "dummy_dtensor",
"temperature": 1,
"temperature": 1,
"max_num_seqs": 1024,
"max_num_seqs": 1024,
"enforce_eager": true,
"enforce_eager": true,
"engine_kwargs": {
"engine_kwargs": {
"swap_space": null
"swap_space": null
},
},
"max_model_len": null,
"max_model_len": null,
"prompt_length": 1024,
"prompt_length": 1024,
"chat_scheduler": null,
"chat_scheduler": null,
"response_length": 1024,
"response_length": 1024,
"disable_log_stats": true,
"disable_log_stats": true,
"free_cache_engine": true,
"free_cache_engine": true,
"use_fire_sampling": false,
"use_fire_sampling": false,
"enable_chunked_prefill": true,
"enable_chunked_prefill": true,
"gpu_memory_utilization": 0.5,
"gpu_memory_utilization": 0.5,
"max_num_batched_tokens": 8192,
"max_num_batched_tokens": 8192,
"log_prob_use_dynamic_bsz": false,
"log_prob_use_dynamic_bsz": false,
"log_prob_micro_batch_size": null,
"log_prob_micro_batch_size": null,
"tensor_model_parallel_size": 2,
"tensor_model_parallel_size": 2,
"log_prob_max_token_len_per_gpu": 16384,
"log_prob_max_token_len_per_gpu": 16384,
"log_prob_micro_batch_size_per_gpu": 32
"log_prob_micro_batch_size_per_gpu": 32
},
},
"hybrid_engine": true
"hybrid_engine": true
}
}
},
},
"custom_reward_function": {
"custom_reward_function": {
"value": {
"value": {
"name": "compute_score",
"name": "compute_score",
"path": null
"path": null
}
}
}
}
}
}