adap_kl_ctrl: true
backward_batch_size: 32
batch_size: 32
cliprange: 0.2
cliprange_value: 0.2
compare_steps: 1
early_stopping: false
exp_name: a2b7701778d64d8395823410aec38be2
forward_batch_size: null
gamma: 1
global_backward_batch_size: 32
global_batch_size: 32
gradient_accumulation_steps: 1
gradient_checkpointing: false
horizon: 10000
init_kl_coef: 0.2
is_encoder_decoder: false
is_peft_model: false
kl_penalty: kl
lam: 0.95
learning_rate: 0.001
log_with: tensorboard
max_grad_norm: null
mini_batch_size: 32
model_name: gpt2
optimize_cuda_cache: null
optimize_device_cache: false
ppo_epochs: 4
project_kwargs/logging_dir: ./data/logs
query_dataset: imdb
ratio_threshold: 10.0
remove_unused_columns: true
reward_model: sentiment-analysis:lvwerra/distilbert-imdb
score_clip: null
seed: 42
steps: 20000
target: 6
target_kl: 1
task_name: a2b7701778d64d8395823410aec38be2
total_ppo_epochs: 625
tracker_project_name: trl
use_score_norm: false
use_score_scaling: false
vf_coef: 0.1
whiten_rewards: false
world_size: 1