Compare text

Find the difference between two text files

Real-time diff

Unified diff

Collapse lines

Highlight change

Syntax highlighting

Tools

Diffchecker Desktop The most secure way to run Diffchecker. Get the Diffchecker Desktop app: your diffs never leave your computer!Get Desktop

config diff

Created 2 months agoDiff never expires

Lines
Total
Removed

Words
Total
Removed

To continue using this feature, upgrade to Diffchecker Pro View Pricing

338 lines

Lines
Total
Added

Words
Total
Added

To continue using this feature, upgrade to Diffchecker Pro View Pricing

339 lines

{

"data": {

"value": {

"shuffle": true,

"image_key": "images",

"tokenizer": null,

"val_files": "/root/data/gsm8k/test.parquet",

"video_key": "videos",

"custom_cls": {

"name": null,

"path": null

"prompt_key": "prompt",

"truncation": "error",

"train_files": "/root/data/gsm8k/train.parquet",

"reward_fn_key": "data_source",

"val_batch_size": null,

"return_raw_chat": true,

"train_batch_size": 256,

"max_prompt_length": 1024,

"max_response_length": 1024,

"return_raw_input_ids": false,

"filter_overlong_prompts": true,

"filter_overlong_prompts_workers": 1

}

"_wandb": {

"value": {

"m": [],

"t": {

"1": [

11,

30,

41,

49,

50,

51,

55,

71,

95,

105

"2": [

11,

30,

41,

49,

50,

51,

55,

71,

95,

105

"3": [

13,

16,

23,

55,

"4": "3.10.12",

"5": "0.19.11",

"6": "4.51.1",

"8": [

"12": "0.19.11",

"13": "linux-x86_64"

"cli_version": "0.19.11",

"python_version": "3.10.12"

}

"critic": {

"value": {

"model": {

"path": "~/models/deepseek-llm-7b-chat",

"fsdp_config": {

"fsdp_size": -1,

"wrap_policy": {

"min_num_params": 0

"param_offload": false,

"offload_policy": false,

"optimizer_offload": false,

"reshard_after_forward": true

"external_lib": null,

"tokenizer_path": "Qwen/Qwen2.5-3B-Instruct",

"trust_remote_code": false,

"use_remove_padding": false,

"enable_gradient_checkpointing": true

"optim": {

"lr": 0.00001,

"min_lr_ratio": null,

"warmup_style": "constant",

"weight_decay": 0.01,

"total_training_steps": 435,

"lr_warmup_steps_ratio": 0

"shuffle": false,

"strategy": "fsdp",

"grad_clip": 1,

"rollout_n": 16,

"checkpoint": {

"contents": [

"model",

"optimizer",

"extra"

]

"ppo_epochs": 1,

"cliprange_value": 0.5,

"use_dynamic_bsz": false,

"ppo_mini_batch_size": 256,

"ppo_micro_batch_size": null,

"forward_micro_batch_size": null,

"ppo_max_token_len_per_gpu": 32768,

"ppo_micro_batch_size_per_gpu": null,

"forward_max_token_len_per_gpu": 32768,

"ulysses_sequence_parallel_size": 1,

"forward_micro_batch_size_per_gpu": null

}

"trainer": {

"value": {

"logger": [

"console",

"wandb"

"nnodes": 1,

"save_freq": -1,

"test_freq": 20,

"resume_mode": "auto",

"project_name": "gsm8k_async_rl",

"total_epochs": 15,

"balance_batch": true,

"critic_warmup": 0,

"experiment_name": "qwen2.5-3b_rm-gsm8k-sgl-multiturn-2025-05-15-05-43",

"experiment_name": "qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16",

"n_gpus_per_node": 8,

"default_hdfs_dir": null,

"resume_from_path": null,

"rollout_data_dir": null,

"val_before_train": true,

"default_local_dir": "checkpoints/gsm8k_async_rl/qwen2.5-3b_rm-gsm8k-sgl-multiturn-2025-05-15-05-43",

"default_local_dir": "checkpoints/gsm8k_async_rl/qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16",

"log_val_generations": 0,

"validation_data_dir": null,

"total_training_steps": null,

"max_actor_ckpt_to_keep": null,

"max_critic_ckpt_to_keep": null,

"del_local_ckpt_after_load": false,

"ray_wait_register_center_timeout": 300

}

"ray_init": {

"value": {

"num_cpus": null

}

"algorithm": {

"value": {

"lam": 1,

"gamma": 1,

"kl_ctrl": {

"type": "fixed",

"horizon": 10000,

"kl_coef": 0.001,

"target_kl": 0.1

"kl_penalty": "kl",

"adv_estimator": "grpo",

"use_kl_in_reward": false,

"norm_adv_by_std_in_grpo": true

}

"reward_model": {

"value": {

"model": {

"path": "~/models/FsfairX-LLaMA3-RM-v0.1",

"fsdp_config": {

"fsdp_size": -1,

"wrap_policy": {

"min_num_params": 0

"param_offload": false,

"reshard_after_forward": true

"external_lib": null,

"input_tokenizer": "Qwen/Qwen2.5-3B-Instruct",

"trust_remote_code": false,

"use_remove_padding": false

"enable": false,

"strategy": "fsdp",

"max_length": null,

"reward_manager": "naive",

"use_dynamic_bsz": false,

"micro_batch_size": null,

"launch_reward_fn_async": false,

"micro_batch_size_per_gpu": null,

"forward_max_token_len_per_gpu": 32768,

"ulysses_sequence_parallel_size": 1

}

"actor_rollout_ref": {

"value": {

"ref": {

"strategy": "fsdp",

"fsdp_config": {

"wrap_policy": {

"min_num_params": 0

"param_offload": true,

"reshard_after_forward": true

"use_torch_compile": true,

"log_prob_use_dynamic_bsz": false,

"log_prob_micro_batch_size": null,

"log_prob_max_token_len_per_gpu": 16384,

"ulysses_sequence_parallel_size": 1,

"log_prob_micro_batch_size_per_gpu": 32

"actor": {

"optim": {

"lr": 0.000001,

"num_cycles": 0.5,

"min_lr_ratio": 0,

"warmup_style": "constant",

"weight_decay": 0.01,

"lr_warmup_steps": -1,

"total_training_steps": 435,

"lr_warmup_steps_ratio": 0

"shuffle": false,

"strategy": "fsdp",

"grad_clip": 1,

"checkpoint": {

"contents": [

"model",

"optimizer",

"extra"

]

"clip_ratio": 0.2,

"ppo_epochs": 1,

"fsdp_config": {

"fsdp_size": -1,

"wrap_policy": {

"min_num_params": 0

"param_offload": false,

"offload_policy": false,

"optimizer_offload": false,

"reshard_after_forward": true

"use_kl_loss": true,

"clip_ratio_c": 3,

"kl_loss_coef": 0.001,

"kl_loss_type": "low_var_kl",

"entropy_coeff": 0,

"loss_agg_mode": "token-mean",

"clip_ratio_low": 0.2,

"clip_ratio_high": 0.2,

"use_dynamic_bsz": false,

"use_torch_compile": true,

"ppo_mini_batch_size": 256,

"ppo_micro_batch_size": null,

"ppo_max_token_len_per_gpu": 16384,

"ppo_micro_batch_size_per_gpu": 32,

"ulysses_sequence_parallel_size": 1

"model": {

"path": "Qwen/Qwen2.5-3B-Instruct",

"use_liger": false,

"external_lib": null,

"trust_remote_code": false,

"use_remove_padding": true,

"enable_gradient_checkpointing": true

"rollout": {

"n": 16,

"mode": "sync",

"name": "sglang_async",

"dtype": "bfloat16",

"top_k": -1,

"top_p": 1,

"do_sample": true,

"ignore_eos": false,

"multi_turn": {

"enable": true,

"enable": false,

"format": "chatml",

"max_turns": 5,

"tool_config_path": "/root/verl/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml"

"tool_config_path": null

"val_kwargs": {

"n": 1,

"top_k": -1,

"top_p": 1,

"do_sample": false,

"temperature": 0

"load_format": "dummy_dtensor",

"temperature": 1,

"max_num_seqs": 1024,

"enforce_eager": true,

"engine_kwargs": {

"swap_space": null

"max_model_len": null,

"prompt_length": 1024,

"chat_scheduler": null,

"response_length": 1024,

"disable_log_stats": true,

"free_cache_engine": true,

"use_fire_sampling": false,

"enable_chunked_prefill": true,

"gpu_memory_utilization": 0.5,

"max_num_batched_tokens": 8192,

"log_prob_use_dynamic_bsz": false,

"log_prob_micro_batch_size": null,

"tensor_model_parallel_size": 2,

"log_prob_max_token_len_per_gpu": 16384,

"log_prob_micro_batch_size_per_gpu": 32

"hybrid_engine": true

}

"custom_reward_function": {

"value": {

"name": "compute_score",

"path": null

}

Saved diffs

Original text

Open file

{
  "data": {
    "value": {
      "shuffle": true,
      "image_key": "images",
      "tokenizer": null,
      "val_files": "/root/data/gsm8k/test.parquet",
      "video_key": "videos",
      "custom_cls": {
        "name": null,
        "path": null
      },
      "prompt_key": "prompt",
      "truncation": "error",
      "train_files": "/root/data/gsm8k/train.parquet",
      "reward_fn_key": "data_source",
      "val_batch_size": null,
      "return_raw_chat": true,
      "train_batch_size": 256,
      "max_prompt_length": 1024,
      "max_response_length": 1024,
      "return_raw_input_ids": false,
      "filter_overlong_prompts": true,
      "filter_overlong_prompts_workers": 1
    }
  },
  "_wandb": {
    "value": {
      "m": [],
      "t": {
        "1": [
          1,
          11,
          30,
          41,
          49,
          50,
          51,
          55,
          71,
          95,
          105
        ],
        "2": [
          1,
          11,
          30,
          41,
          49,
          50,
          51,
          55,
          71,
          95,
          105
        ],
        "3": [
          13,
          16,
          23,
          55,
          61
        ],
        "4": "3.10.12",
        "5": "0.19.11",
        "6": "4.51.1",
        "8": [
          5
        ],
        "12": "0.19.11",
        "13": "linux-x86_64"
      },
      "cli_version": "0.19.11",
      "python_version": "3.10.12"
    }
  },
  "critic": {
    "value": {
      "model": {
        "path": "~/models/deepseek-llm-7b-chat",
        "fsdp_config": {
          "fsdp_size": -1,
          "wrap_policy": {
            "min_num_params": 0
          },
          "param_offload": false,
          "offload_policy": false,
          "optimizer_offload": false,
          "reshard_after_forward": true
        },
        "external_lib": null,
        "tokenizer_path": "Qwen/Qwen2.5-3B-Instruct",
        "trust_remote_code": false,
        "use_remove_padding": false,
        "enable_gradient_checkpointing": true
      },
      "optim": {
        "lr": 0.00001,
        "min_lr_ratio": null,
        "warmup_style": "constant",
        "weight_decay": 0.01,
        "total_training_steps": 435,
        "lr_warmup_steps_ratio": 0
      },
      "shuffle": false,
      "strategy": "fsdp",
      "grad_clip": 1,
      "rollout_n": 16,
      "checkpoint": {
        "contents": [
          "model",
          "optimizer",
          "extra"
        ]
      },
      "ppo_epochs": 1,
      "cliprange_value": 0.5,
      "use_dynamic_bsz": false,
      "ppo_mini_batch_size": 256,
      "ppo_micro_batch_size": null,
      "forward_micro_batch_size": null,
      "ppo_max_token_len_per_gpu": 32768,
      "ppo_micro_batch_size_per_gpu": null,
      "forward_max_token_len_per_gpu": 32768,
      "ulysses_sequence_parallel_size": 1,
      "forward_micro_batch_size_per_gpu": null
    }
  },
  "trainer": {
    "value": {
      "logger": [
        "console",
        "wandb"
      ],
      "nnodes": 1,
      "save_freq": -1,
      "test_freq": 20,
      "resume_mode": "auto",
      "project_name": "gsm8k_async_rl",
      "total_epochs": 15,
      "balance_batch": true,
      "critic_warmup": 0,
      "experiment_name": "qwen2.5-3b_rm-gsm8k-sgl-multiturn-2025-05-15-05-43",
      "n_gpus_per_node": 8,
      "default_hdfs_dir": null,
      "resume_from_path": null,
      "rollout_data_dir": null,
      "val_before_train": true,
      "default_local_dir": "checkpoints/gsm8k_async_rl/qwen2.5-3b_rm-gsm8k-sgl-multiturn-2025-05-15-05-43",
      "log_val_generations": 0,
      "validation_data_dir": null,
      "total_training_steps": null,
      "max_actor_ckpt_to_keep": null,
      "max_critic_ckpt_to_keep": null,
      "del_local_ckpt_after_load": false,
      "ray_wait_register_center_timeout": 300
    }
  },
  "ray_init": {
    "value": {
      "num_cpus": null
    }
  },
  "algorithm": {
    "value": {
      "lam": 1,
      "gamma": 1,
      "kl_ctrl": {
        "type": "fixed",
        "horizon": 10000,
        "kl_coef": 0.001,
        "target_kl": 0.1
      },
      "kl_penalty": "kl",
      "adv_estimator": "grpo",
      "use_kl_in_reward": false,
      "norm_adv_by_std_in_grpo": true
    }
  },
  "reward_model": {
    "value": {
      "model": {
        "path": "~/models/FsfairX-LLaMA3-RM-v0.1",
        "fsdp_config": {
          "fsdp_size": -1,
          "wrap_policy": {
            "min_num_params": 0
          },
          "param_offload": false,
          "reshard_after_forward": true
        },
        "external_lib": null,
        "input_tokenizer": "Qwen/Qwen2.5-3B-Instruct",
        "trust_remote_code": false,
        "use_remove_padding": false
      },
      "enable": false,
      "strategy": "fsdp",
      "max_length": null,
      "reward_manager": "naive",
      "use_dynamic_bsz": false,
      "micro_batch_size": null,
      "launch_reward_fn_async": false,
      "micro_batch_size_per_gpu": null,
      "forward_max_token_len_per_gpu": 32768,
      "ulysses_sequence_parallel_size": 1
    }
  },
  "actor_rollout_ref": {
    "value": {
      "ref": {
        "strategy": "fsdp",
        "fsdp_config": {
          "wrap_policy": {
            "min_num_params": 0
          },
          "param_offload": true,
          "reshard_after_forward": true
        },
        "use_torch_compile": true,
        "log_prob_use_dynamic_bsz": false,
        "log_prob_micro_batch_size": null,
        "log_prob_max_token_len_per_gpu": 16384,
        "ulysses_sequence_parallel_size": 1,
        "log_prob_micro_batch_size_per_gpu": 32
      },
      "actor": {
        "optim": {
          "lr": 0.000001,
          "num_cycles": 0.5,
          "min_lr_ratio": 0,
          "warmup_style": "constant",
          "weight_decay": 0.01,
          "lr_warmup_steps": -1,
          "total_training_steps": 435,
          "lr_warmup_steps_ratio": 0
        },
        "shuffle": false,
        "strategy": "fsdp",
        "grad_clip": 1,
        "checkpoint": {
          "contents": [
            "model",
            "optimizer",
            "extra"
          ]
        },
        "clip_ratio": 0.2,
        "ppo_epochs": 1,
        "fsdp_config": {
          "fsdp_size": -1,
          "wrap_policy": {
            "min_num_params": 0
          },
          "param_offload": false,
          "offload_policy": false,
          "optimizer_offload": false,
          "reshard_after_forward": true
        },
        "use_kl_loss": true,
        "clip_ratio_c": 3,
        "kl_loss_coef": 0.001,
        "kl_loss_type": "low_var_kl",
        "entropy_coeff": 0,
        "loss_agg_mode": "token-mean",
        "clip_ratio_low": 0.2,
        "clip_ratio_high": 0.2,
        "use_dynamic_bsz": false,
        "use_torch_compile": true,
        "ppo_mini_batch_size": 256,
        "ppo_micro_batch_size": null,
        "ppo_max_token_len_per_gpu": 16384,
        "ppo_micro_batch_size_per_gpu": 32,
        "ulysses_sequence_parallel_size": 1
      },
      "model": {
        "path": "Qwen/Qwen2.5-3B-Instruct",
        "use_liger": false,
        "external_lib": null,
        "trust_remote_code": false,
        "use_remove_padding": true,
        "enable_gradient_checkpointing": true
      },
      "rollout": {
        "n": 16,
        "mode": "sync",
        "name": "sglang_async",
        "dtype": "bfloat16",
        "top_k": -1,
        "top_p": 1,
        "do_sample": true,
        "ignore_eos": false,
        "multi_turn": {
          "enable": true,
          "format": "chatml",
          "max_turns": 5,
          "tool_config_path": "/root/verl/examples/sglang_multiturn/config/tool_config/gsm8k_tool_config.yaml"
        },
        "val_kwargs": {
          "n": 1,
          "top_k": -1,
          "top_p": 1,
          "do_sample": false,
          "temperature": 0
        },
        "load_format": "dummy_dtensor",
        "temperature": 1,
        "max_num_seqs": 1024,
        "enforce_eager": true,
        "engine_kwargs": {
          "swap_space": null
        },
        "max_model_len": null,
        "prompt_length": 1024,
        "chat_scheduler": null,
        "response_length": 1024,
        "disable_log_stats": true,
        "free_cache_engine": true,
        "use_fire_sampling": false,
        "enable_chunked_prefill": true,
        "gpu_memory_utilization": 0.5,
        "max_num_batched_tokens": 8192,
        "log_prob_use_dynamic_bsz": false,
        "log_prob_micro_batch_size": null,
        "tensor_model_parallel_size": 2,
        "log_prob_max_token_len_per_gpu": 16384,
        "log_prob_micro_batch_size_per_gpu": 32
      },
      "hybrid_engine": true
    }
  },
  "custom_reward_function": {
    "value": {
      "name": "compute_score",
      "path": null
    }
  }
}

Changed text

Open file

{
  "data": {
    "value": {
      "shuffle": true,
      "image_key": "images",
      "tokenizer": null,
      "val_files": "/root/data/gsm8k/test.parquet",
      "video_key": "videos",
      "custom_cls": {
        "name": null,
        "path": null
      },
      "prompt_key": "prompt",
      "truncation": "error",
      "train_files": "/root/data/gsm8k/train.parquet",
      "reward_fn_key": "data_source",
      "val_batch_size": null,
      "return_raw_chat": true,
      "train_batch_size": 256,
      "max_prompt_length": 1024,
      "max_response_length": 1024,
      "return_raw_input_ids": false,
      "filter_overlong_prompts": true,
      "filter_overlong_prompts_workers": 1
    }
  },
  "_wandb": {
    "value": {
      "m": [],
      "t": {
        "1": [
          1,
          11,
          30,
          41,
          49,
          50,
          51,
          55,
          71,
          95,
          105
        ],
        "2": [
          1,
          11,
          30,
          41,
          49,
          50,
          51,
          55,
          71,
          95,
          105
        ],
        "3": [
          2,
          13,
          16,
          23,
          55,
          61
        ],
        "4": "3.10.12",
        "5": "0.19.11",
        "6": "4.51.1",
        "8": [
          5
        ],
        "12": "0.19.11",
        "13": "linux-x86_64"
      },
      "cli_version": "0.19.11",
      "python_version": "3.10.12"
    }
  },
  "critic": {
    "value": {
      "model": {
        "path": "~/models/deepseek-llm-7b-chat",
        "fsdp_config": {
          "fsdp_size": -1,
          "wrap_policy": {
            "min_num_params": 0
          },
          "param_offload": false,
          "offload_policy": false,
          "optimizer_offload": false,
          "reshard_after_forward": true
        },
        "external_lib": null,
        "tokenizer_path": "Qwen/Qwen2.5-3B-Instruct",
        "trust_remote_code": false,
        "use_remove_padding": false,
        "enable_gradient_checkpointing": true
      },
      "optim": {
        "lr": 0.00001,
        "min_lr_ratio": null,
        "warmup_style": "constant",
        "weight_decay": 0.01,
        "total_training_steps": 435,
        "lr_warmup_steps_ratio": 0
      },
      "shuffle": false,
      "strategy": "fsdp",
      "grad_clip": 1,
      "rollout_n": 16,
      "checkpoint": {
        "contents": [
          "model",
          "optimizer",
          "extra"
        ]
      },
      "ppo_epochs": 1,
      "cliprange_value": 0.5,
      "use_dynamic_bsz": false,
      "ppo_mini_batch_size": 256,
      "ppo_micro_batch_size": null,
      "forward_micro_batch_size": null,
      "ppo_max_token_len_per_gpu": 32768,
      "ppo_micro_batch_size_per_gpu": null,
      "forward_max_token_len_per_gpu": 32768,
      "ulysses_sequence_parallel_size": 1,
      "forward_micro_batch_size_per_gpu": null
    }
  },
  "trainer": {
    "value": {
      "logger": [
        "console",
        "wandb"
      ],
      "nnodes": 1,
      "save_freq": -1,
      "test_freq": 20,
      "resume_mode": "auto",
      "project_name": "gsm8k_async_rl",
      "total_epochs": 15,
      "balance_batch": true,
      "critic_warmup": 0,
      "experiment_name": "qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16",
      "n_gpus_per_node": 8,
      "default_hdfs_dir": null,
      "resume_from_path": null,
      "rollout_data_dir": null,
      "val_before_train": true,
      "default_local_dir": "checkpoints/gsm8k_async_rl/qwen2.5-3b_function_rm-gsm8k-async-sgl-multi-w-tool-verify-n16",
      "log_val_generations": 0,
      "validation_data_dir": null,
      "total_training_steps": null,
      "max_actor_ckpt_to_keep": null,
      "max_critic_ckpt_to_keep": null,
      "del_local_ckpt_after_load": false,
      "ray_wait_register_center_timeout": 300
    }
  },
  "ray_init": {
    "value": {
      "num_cpus": null
    }
  },
  "algorithm": {
    "value": {
      "lam": 1,
      "gamma": 1,
      "kl_ctrl": {
        "type": "fixed",
        "horizon": 10000,
        "kl_coef": 0.001,
        "target_kl": 0.1
      },
      "kl_penalty": "kl",
      "adv_estimator": "grpo",
      "use_kl_in_reward": false,
      "norm_adv_by_std_in_grpo": true
    }
  },
  "reward_model": {
    "value": {
      "model": {
        "path": "~/models/FsfairX-LLaMA3-RM-v0.1",
        "fsdp_config": {
          "fsdp_size": -1,
          "wrap_policy": {
            "min_num_params": 0
          },
          "param_offload": false,
          "reshard_after_forward": true
        },
        "external_lib": null,
        "input_tokenizer": "Qwen/Qwen2.5-3B-Instruct",
        "trust_remote_code": false,
        "use_remove_padding": false
      },
      "enable": false,
      "strategy": "fsdp",
      "max_length": null,
      "reward_manager": "naive",
      "use_dynamic_bsz": false,
      "micro_batch_size": null,
      "launch_reward_fn_async": false,
      "micro_batch_size_per_gpu": null,
      "forward_max_token_len_per_gpu": 32768,
      "ulysses_sequence_parallel_size": 1
    }
  },
  "actor_rollout_ref": {
    "value": {
      "ref": {
        "strategy": "fsdp",
        "fsdp_config": {
          "wrap_policy": {
            "min_num_params": 0
          },
          "param_offload": true,
          "reshard_after_forward": true
        },
        "use_torch_compile": true,
        "log_prob_use_dynamic_bsz": false,
        "log_prob_micro_batch_size": null,
        "log_prob_max_token_len_per_gpu": 16384,
        "ulysses_sequence_parallel_size": 1,
        "log_prob_micro_batch_size_per_gpu": 32
      },
      "actor": {
        "optim": {
          "lr": 0.000001,
          "num_cycles": 0.5,
          "min_lr_ratio": 0,
          "warmup_style": "constant",
          "weight_decay": 0.01,
          "lr_warmup_steps": -1,
          "total_training_steps": 435,
          "lr_warmup_steps_ratio": 0
        },
        "shuffle": false,
        "strategy": "fsdp",
        "grad_clip": 1,
        "checkpoint": {
          "contents": [
            "model",
            "optimizer",
            "extra"
          ]
        },
        "clip_ratio": 0.2,
        "ppo_epochs": 1,
        "fsdp_config": {
          "fsdp_size": -1,
          "wrap_policy": {
            "min_num_params": 0
          },
          "param_offload": false,
          "offload_policy": false,
          "optimizer_offload": false,
          "reshard_after_forward": true
        },
        "use_kl_loss": true,
        "clip_ratio_c": 3,
        "kl_loss_coef": 0.001,
        "kl_loss_type": "low_var_kl",
        "entropy_coeff": 0,
        "loss_agg_mode": "token-mean",
        "clip_ratio_low": 0.2,
        "clip_ratio_high": 0.2,
        "use_dynamic_bsz": false,
        "use_torch_compile": true,
        "ppo_mini_batch_size": 256,
        "ppo_micro_batch_size": null,
        "ppo_max_token_len_per_gpu": 16384,
        "ppo_micro_batch_size_per_gpu": 32,
        "ulysses_sequence_parallel_size": 1
      },
      "model": {
        "path": "Qwen/Qwen2.5-3B-Instruct",
        "use_liger": false,
        "external_lib": null,
        "trust_remote_code": false,
        "use_remove_padding": true,
        "enable_gradient_checkpointing": true
      },
      "rollout": {
        "n": 16,
        "mode": "sync",
        "name": "sglang_async",
        "dtype": "bfloat16",
        "top_k": -1,
        "top_p": 1,
        "do_sample": true,
        "ignore_eos": false,
        "multi_turn": {
          "enable": false,
          "format": "chatml",
          "max_turns": 5,
          "tool_config_path": null
        },
        "val_kwargs": {
          "n": 1,
          "top_k": -1,
          "top_p": 1,
          "do_sample": false,
          "temperature": 0
        },
        "load_format": "dummy_dtensor",
        "temperature": 1,
        "max_num_seqs": 1024,
        "enforce_eager": true,
        "engine_kwargs": {
          "swap_space": null
        },
        "max_model_len": null,
        "prompt_length": 1024,
        "chat_scheduler": null,
        "response_length": 1024,
        "disable_log_stats": true,
        "free_cache_engine": true,
        "use_fire_sampling": false,
        "enable_chunked_prefill": true,
        "gpu_memory_utilization": 0.5,
        "max_num_batched_tokens": 8192,
        "log_prob_use_dynamic_bsz": false,
        "log_prob_micro_batch_size": null,
        "tensor_model_parallel_size": 2,
        "log_prob_max_token_len_per_gpu": 16384,
        "log_prob_micro_batch_size_per_gpu": 32
      },
      "hybrid_engine": true
    }
  },
  "custom_reward_function": {
    "value": {
      "name": "compute_score",
      "path": null
    }
  }
}