HH q;aqA brain.Event:2R. ,tensorboard.summary.writer.event_file_writer) L"x= utqA*  objective/klks'F DvtqA*  objective/kl_coefL>#'F ivtqA*  objective/entropy6B#J/m]P vtqA*" ppo/mean_non_score_rewardoV%6 vtqA*  ppo/mean_scores=}M$B+M vtqA*  ppo/std_scores=-|'(pJ ?}tqA*  ppo/policy/entropy-V@)7_ R}tqA*  ppo/policy/approxkl?FO*)7_ ytqA*  ppo/policy/policyklww>|T)7_ tqA*  ppo/policy/clipfrac>0_ tqA*# ! ppo/policy/advantages_mean43w3$&sO ՂtqA*  ppo/returns/meant?Z%6 tqA*  ppo/returns/var1>,#wC tqA*  ppo/val/vpredc$C@e=}#wC tqA*  ppo/val/errorM@V&sO tqA*  ppo/val/clipfrac ?q~1"x= tqA*  ppo/val/mean@Z@Pq!{ tqA*  ppo/val/var$_(@+K ftqA*  ppo/val/var_explainedt74'F @tqA*  ppo/learning_rate l7p9+K tqA*  time/ppo/forward_pass0B>cЌ.W tqA*!  time/ppo/compute_rewardsx; =1 RtqA*$ " time/ppo/compute_advantagesP;,E tqA*  time/ppo/optimize_stepʁ?-)7_ tqA*  time/ppo/calc_statspb?>r$B+M 0tqA*  time/ppo/totalݳ?.%6 ύtqA*  env/reward_mean=;+3$B+M mtqA*  env/reward_std=d"x= WqA*  objective/kl?@  'F XqA*  objective/kl_coef?L>E'F )XqA*  objective/entropy*`B\b9/m]P ?XqA*" ppo/mean_non_score_rewardƼsTD%6 SXqA*  ppo/mean_scores=,$B+M fXqA*  ppo/std_scores V=S-V?(pJ aqA*  ppo/policy/entropyQ@|5)7_ (aqA*  ppo/policy/approxkl>}&#wC JgqA*  ppo/val/vpred"@_%#wC iqA*  ppo/val/error@&sO iqA*  ppo/val/clipfrac43>0pw"x= kqA*  ppo/val/meanT3@H!{ lqA*  ppo/val/var?г+K 1lqA*  ppo/val/var_explained/*gݟD'F nqA*  ppo/learning_rate l75!L{+K FnqA*  time/ppo/forward_passdB>omLJ.W _nqA*!  time/ppo/compute_rewards:!1 nqA*$ " time/ppo/compute_advantagesL$B+M pqA*  time/ppo/total֧?#,%6 rqqA*  env/reward_mean=g$B+M rqA*  env/reward_std V=Z`"x= tqA*  objective/kl@̃'F RtqA*  objective/kl_coefL>sqM'F utqA*  objective/entropyfB؜/m]P tqA*" ppo/mean_non_score_reward@`%6 tqA*  ppo/mean_scores=Zȣ$B+M tqA*  ppo/std_scoresm+= 08-db^V(pJ htqA*  ppo/policy/entropyJ@w“)7_ tqA*  ppo/policy/approxklI;)7_ +tqA*  ppo/policy/policykl@R;$i)7_ tqA*  ppo/policy/clipfracgY=B0_ tqA*# ! ppo/policy/advantages_mean.K3$ߎ&sO _#wC tqA*  ppo/val/vpredT`?2p#wC tqA*  ppo/val/errorI`@<&sO tqA*  ppo/val/clipfrac >vK("x= tqA*  ppo/val/mean@&i!{ tqA*  ppo/val/var&?ҹG+K tqA*  ppo/val/var_explainedrI"'F btqA*  ppo/learning_rate l7\+K StqA*  time/ppo/forward_passB>m.W tqA*!  time/ppo/compute_rewards@:b1 tqA*$ " time/ppo/compute_advantagesA;m,E tqA*  time/ppo/optimize_steplo?I)7_ tqA*  time/ppo/calc_stats 2>s$B+M tqA*  time/ppo/total(?8%6 tqA*  env/reward_mean= $B+M tqA*  env/reward_stdm+=v-k"x= 7qA*  objective/klW@гѤ'F a7qA*  objective/kl_coef3hL>M4'F 7qA*  objective/entropy\B"/m]P 7qA*" ppo/mean_non_score_reward %6 7qA*  ppo/mean_scoresT=$B+M 7qA*  ppo/std_scores =";-+K '7qA*  time/ppo/forward_passbD>&.W '7qA*!  time/ppo/compute_rewardsP:3mؑ1 '7qA*$ " time/ppo/compute_advantages0@;,E '7qA*  time/ppo/optimize_step"?eT)7_ '7qA*  time/ppo/calc_statsH1>y$B+M '7qA*  time/ppo/totalݮ?pKt%6 ^(7qA*  env/reward_meanT=_ņ$B+M (7qA*  env/reward_std =#]"x= \qA*  objective/klG @'F }\qA*  objective/kl_coefFL>Q:'F \qA*  objective/entropyXYBq/m]P \qA*" ppo/mean_non_score_reward%6 \qA*  ppo/mean_scoresއ=P$?k$B+M \qA*  ppo/std_scoresf=WT-(r%6 dqA*  ppo/returns/varU*>#wC dqA*  ppo/val/vpredC?q #wC XeqA*  ppo/val/error=@yF&sO eqA*  ppo/val/clipfracg>[ "x= fqA*  ppo/val/meanR?A k!{ dfqA*  ppo/val/varƷ?jK+K fqA*  ppo/val/var_explained Tpˤ7'F gqA*  ppo/learning_rate l7q D+K wgqA*  time/ppo/forward_passzC>.W gqA*!  time/ppo/compute_rewards:L1 3hqA*$ " time/ppo/compute_advantagesF;>S,E hqA*  time/ppo/optimize_stepL~?Z )7_ hqA*  time/ppo/calc_statsP1> h$$B+M =iqA*  time/ppo/total?q%6 iqA*  env/reward_meanއ=鑟n$B+M iqA*  env/reward_stdf=Z"x= .qA*  objective/kl m@?'F R.qA*  objective/kl_coef>%L>z 'F o.qA*  objective/entropyܾB/m]P .qA*" ppo/mean_non_score_reward4&+%6 .qA*  ppo/mean_scores:=tS$B+M .qA*  ppo/std_scores s=!-{%6 .qA*  ppo/returns/var@>2\#wC .qA*  ppo/val/vpred)8?Z\#wC .qA*  ppo/val/error*?{ &sO .qA*  ppo/val/clipfracffD>X\"x= .qA*  ppo/val/meanμU?e/!{ .qA*  ppo/val/var)?+K ).qA*  ppo/val/var_explained[8'F 9.qA*  ppo/learning_rate l7SL+K I.qA*  time/ppo/forward_passPIC>.W Z.qA*!  time/ppo/compute_rewards`:[<91 j.qA*$ " time/ppo/compute_advantagesE;'},E .qA*  time/ppo/optimize_step~?ybϫ)7_ .qA*  time/ppo/calc_statsPp2>l$B+M ).qA*  time/ppo/totalҮ?%6 .qA*  env/reward_mean:=4]$B+M a.qA*  env/reward_std s=*"x= 0wqA*  objective/klE,@q'F wqA*  objective/kl_coefL>Z׫'F wqA*  objective/entropyjB0uo/m]P wqA*" ppo/mean_non_score_rewardۼ=Y%6 wqA*  ppo/mean_scoresw=dq$B+M wqA*  ppo/std_scoresk<>-,C#wC EwqA*  ppo/val/vpredy>SJC#wC wqA*  ppo/val/errorJ?We&sO zwqA*  ppo/val/clipfrac33 >S"x= wqA*  ppo/val/meanjx?T!{ wqA*  ppo/val/varʟ?R*+K NwqA*  ppo/val/var_explained5('F wqA*  ppo/learning_rate l7n+K wqA*  time/ppo/forward_passC>.I9.W 5wqA*!  time/ppo/compute_rewards:D|1 wqA*$ " time/ppo/compute_advantages>;yg,E fwqA*  time/ppo/optimize_step$Q?)7_ wqA*  time/ppo/calc_stats2>_\$B+M wqA*  time/ppo/total6?4%6 ,wqA*  env/reward_meanw=yT$B+M wqA*  env/reward_stdk<>ﻛ|"x= LqA*  objective/klì?4E['F \MqA*  objective/kl_coef_K>Sk'F MqA*  objective/entropy)BJu/m]P MqA*" ppo/mean_non_score_reward&\w%6 MqA*  ppo/mean_scores =ގ&$B+M MqA*  ppo/std_scoresh=tOJ-=Q6%6 G^qA*  ppo/returns/var$>#wC q^qA*  ppo/val/vpredqY>h#wC uaqA*  ppo/val/error ?ů*Z&sO ٘D!{ fqA*  ppo/val/var?+K gqA*  ppo/val/var_explainedtaמ'F 8iqA*  ppo/learning_rate l7ֽ+K eiqA*  time/ppo/forward_passB>.W |iqA*!  time/ppo/compute_rewards:Ro1 ckqA*$ " time/ppo/compute_advantagesE;MB,E kqA*  time/ppo/optimize_step~?{)7_ kqA*  time/ppo/calc_stats'2>8I$B+M UlqA*  time/ppo/total?FGa%6 lqA*  env/reward_mean =Ѧb$B+M mqA*  env/reward_stdh=1"x= qA *  objective/kl?1Y?'F b qA *  objective/kl_coefK>'F qA *  objective/entropy B|/m]P qA *" ppo/mean_non_score_reward{ƙ:u%6 qA *  ppo/mean_scores=ȭ$B+M qA *  ppo/std_scores-p=K-$B+M R qA *  ppo/loss/totalAR<f(pJ qA *  ppo/policy/entropyzw@t)7_ qA *  ppo/policy/approxklʬ;0)7_ qA *  ppo/policy/policykl Z; l)7_ ݚ qA *  ppo/policy/clipfracY=fu+0_ qA *# ! ppo/policy/advantages_meanffvy&sO qA *  ppo/returns/mean2=~{{%6 . qA *  ppo/returns/varW=D#wC G qA *  ppo/val/vpred΃e><#wC Z qA *  ppo/val/errorp?jM&sO  qA *  ppo/val/clipfrac;/y'"x= qA *  ppo/val/meanK>ˆ !{ qA *  ppo/val/var:j?Pi+K i qA *  ppo/val/var_explainedI5@'F  qA *  ppo/learning_rate l7@+K qA *  time/ppo/forward_pass0B>#XS.W [ qA *!  time/ppo/compute_rewards:&E W1 qA *$ " time/ppo/compute_advantagesI;$<,E qA *  time/ppo/optimize_step0~?,,k)7_ < qA *  time/ppo/calc_statse2>#F/$B+M ڨ qA *  time/ppo/totalF?ؠ.%6 z qA *  env/reward_mean=A/$B+M  qA *  env/reward_std-p=r"x= bWrA *  objective/kla]?T @+'F wbWrA *  objective/kl_coefK>|'F bWrA *  objective/entropy쓝Bk /m]P cWrA *" ppo/mean_non_score_rewardP~<%6 cWrA *  ppo/mean_scores;= -$B+M 1cWrA *  ppo/std_scoresR=S,- #wC rWrA *  ppo/val/vpred'>[#wC rWrA *  ppo/val/error?"[&sO uWrA *  ppo/val/clipfrač;?&"x= uWrA *  ppo/val/meanH>]!{ 'yWrA *  ppo/val/var? !+K RyWrA *  ppo/val/var_explainedƹ~ 'F iyWrA *  ppo/learning_rate l7F+K {yWrA *  time/ppo/forward_passA>j..W zWrA *!  time/ppo/compute_rewards:'-1 {WrA *$ " time/ppo/compute_advantagesE;@c,E s|WrA *  time/ppo/optimize_stepP~?9$N)7_ }WrA *  time/ppo/calc_stats1>]y_o$B+M }WrA *  time/ppo/total? :%6 i~WrA *  env/reward_mean;=f$B+M WrA *  env/reward_stdR= "x= rA *  objective/klq.d@ v/'F ]rA *  objective/kl_coef8~K> J'F ~rA *  objective/entropyBQ q/m]P rA *" ppo/mean_non_score_reward´%6 rA *  ppo/mean_scores=xpC$B+M rA *  ppo/std_scores4=1-j$B+M rA *  ppo/loss/value ?>e$B+M rA *  ppo/loss/total#wC _rA *  ppo/val/vpred->Q#wC rA *  ppo/val/error4I?&sO XrA *  ppo/val/clipfracff;ܜ"x= rA *  ppo/val/meanI>c3!{ rA *  ppo/val/varB!?pq+K grA *  ppo/val/var_explainedcV_?'F 5rA *  ppo/learning_rate l7{>X+K ۜrA *  time/ppo/forward_passpB>.W rA *!  time/ppo/compute_rewards:lۖ1 &rA *$ " time/ppo/compute_advantages;\1,E ǞrA *  time/ppo/optimize_stepK?)7_ drA *  time/ppo/calc_stats3>̚W$B+M rA *  time/ppo/total*?Y!%6 rA *  env/reward_mean=i5$B+M <rA *  env/reward_std4=H(o"x= tP/rA *  objective/kl?[$'F P/rA *  objective/kl_coef\K>['F iP/rA *  objective/entropy aBY/m]P P/rA *" ppo/mean_non_score_rewardeai%6 P/rA *  ppo/mean_scores=nH$B+M P/rA *  ppo/std_scoresf=}V-f$B+M P/rA *  ppo/loss/value7>ܜW$B+M xP/rA *  ppo/loss/totalDPG#wC WP/rA *  ppo/val/vpredj>.W Q/rA *!  time/ppo/compute_rewards:Hv1 IQ/rA *$ " time/ppo/compute_advantagesHJ;psK,E Q/rA *  time/ppo/optimize_stepr?k)7_ Q/rA *  time/ppo/calc_stats@N1>`b$B+M ,Q/rA *  time/ppo/totalZ?ǝ@%6 Q/rA *  env/reward_mean=>F$B+M jQ/rA *  env/reward_stdf=tW