adap_kl_ctrl: true backward_batch_size: 32 batch_size: 32 cliprange: 0.2 cliprange_value: 0.2 compare_steps: 1 early_stopping: false exp_name: 1faec1bd5b774ad4a761c6632b118326 forward_batch_size: null gamma: 1 global_backward_batch_size: 32 global_batch_size: 32 gradient_accumulation_steps: 1 gradient_checkpointing: false horizon: 10000 init_kl_coef: 0.2 is_encoder_decoder: false is_peft_model: false kl_penalty: kl lam: 0.95 learning_rate: 0.001 log_with: tensorboard max_grad_norm: null mini_batch_size: 32 model_name: gpt2 optimize_cuda_cache: null optimize_device_cache: false ppo_epochs: 4 project_kwargs/logging_dir: ./data/logs query_dataset: imdb ratio_threshold: 10.0 remove_unused_columns: true reward_model: sentiment-analysis:lvwerra/distilbert-imdb score_clip: null seed: 42 steps: 20000 target: 6 target_kl: 1 task_name: 1faec1bd5b774ad4a761c6632b118326 total_ppo_epochs: 625 tracker_project_name: trl use_score_norm: false use_score_scaling: false vf_coef: 0.1 whiten_rewards: false world_size: 1