HH *JepA brain.Event:2R. ,tensorboard.summary.writer.event_file_writerQ#"x= 9{pA*  objective/kl ؗ'F {pA*  objective/kl_coefL>uUX'F ({pA*  objective/entropy.B/m]P @{pA*" ppo/mean_non_score_rewardez%6 U{pA*  ppo/mean_scoresͯ=$B+M h{pA*  ppo/std_scores+_=/l[-;U`7$B+M Y{pA*  ppo/loss/value7A8JJ$B+M ,{pA*  ppo/loss/total*?=(pJ {pA*  ppo/policy/entropyy@ <)7_ {pA*  ppo/policy/approxkluHAc)7_ ۤ{pA*  ppo/policy/policyklF@K)7_ {pA*  ppo/policy/clipfracff>,E0_ {pA*# ! ppo/policy/advantages_mean43d7u&sO {pA*  ppo/returns/mean蛞?7%6 +{pA*  ppo/returns/var>y#wC ;{pA*  ppo/val/vpred>`y#wC {pA*  ppo/val/error6AL&sO R{pA*  ppo/val/clipfrac>'w*"x= {pA*  ppo/val/meanN@ Zc!{ {pA*  ppo/val/var4@91+K D{pA*  ppo/val/var_explained'/ˆ'F ꩯ{pA*  ppo/learning_rateϸ:+K {pA*  time/ppo/forward_passP3>6d.W 4{pA*!  time/ppo/compute_rewardsԟ;Ky1 ֫{pA*$ " time/ppo/compute_advantagesx;AwU,E s{pA*  time/ppo/optimize_step?JT)7_ {pA*  time/ppo/calc_statsPl>)!$B+M {pA*  time/ppo/total/?﹂%6 E{pA*  env/reward_meanͯ=6EP$B+M 㮯{pA*  env/reward_std+_="x= sڜpA*  objective/kl{B_ 'F LtڜpA*  objective/kl_coef?L>[W'F tڜpA*  objective/entropy/B/m]P tڜpA*" ppo/mean_non_score_reward } %6 tڜpA*  ppo/mean_scores =oI$B+M uڜpA*  ppo/std_scores>u9-ɿO8(pJ G{ڜpA*  ppo/policy/entropy3?R;)7_ ~ڜpA*  ppo/policy/approxklOAH6b)7_ ڜpA*  ppo/policy/policykl|5X@P~)7_ ڜpA*  ppo/policy/clipfrac>0_ /ڜpA*# ! ppo/policy/advantages_meanff3V&sO EڜpA*  ppo/returns/meanqx%6 VڜpA*  ppo/returns/varAg#wC օڜpA*  ppo/val/vpred4{:#wC ڜpA*  ppo/val/error\kBT&sO ڜpA*  ppo/val/clipfracT>u"x= ڜpA*  ppo/val/meanj?{!{ -ڜpA*  ppo/val/vars>*L;+K ڜpA*  ppo/val/var_explained z,'F @ڜpA*  ppo/learning_rateϸ:C(T+K XڜpA*  time/ppo/forward_pass~>X5G.W ڜpA*!  time/ppo/compute_rewards:91 ڜpA*$ " time/ppo/compute_advantagesm;Pp,E GڜpA*  time/ppo/optimize_stepR?S)7_ ڜpA*  time/ppo/calc_stats0_>- T$B+M ڜpA*  time/ppo/total?J%6 ڜpA*  env/reward_mean =˜I$B+M ڜpA*  env/reward_std>C,"x= PpA*  objective/klvRB)'F xPpA*  objective/kl_coefL>쌷P'F PpA*  objective/entropyjA/m]P PpA*" ppo/mean_non_score_reward=e%6 $PpA*  ppo/mean_scorescD>}8$B+M 7PpA*  ppo/std_scores>;-{0_ PpA*# ! ppo/policy/advantages_mean2327U&sO PpA*  ppo/returns/meanzz;%6 -PpA*  ppo/returns/varPjA`H#wC PpA*  ppo/val/vpredoe#wC _PpA*  ppo/val/errorMHBV&sO PpA*  ppo/val/clipfrac43>]~"x= PpA*  ppo/val/meanT2!{ !PpA*  ppo/val/vare:4+K PpA*  ppo/val/var_explainedl''F YPpA*  ppo/learning_rateϸ:Xc+K PpA*  time/ppo/forward_pass0c>9.W PpA*!  time/ppo/compute_rewards:ei1 *PpA*$ " time/ppo/compute_advantagesV;w7,E PpA*  time/ppo/optimize_step&ӕ? m)7_ PPpA*  time/ppo/calc_statsPO>)$B+M PpA*  time/ppo/total?{(r%6 yPpA*  env/reward_meancD>ee<$B+M PpA*  env/reward_std>b2"x= 2.pA*  objective/kltEAb.'F U3.pA*  objective/kl_coefUL>ۻ'F x3.pA*  objective/entropy:N%!/m]P 3.pA*" ppo/mean_non_score_reward涝$%6 3.pA*  ppo/mean_scoresoF>@$B+M 3.pA*  ppo/std_scoresƔ;^`-.W v@.pA*!  time/ppo/compute_rewards:&1 XB.pA*$ " time/ppo/compute_advantagesU;dHVD,E B.pA*  time/ppo/optimize_step>ޕ?}d)7_ B.pA*  time/ppo/calc_stats0R>>$B+M B.pA*  time/ppo/totalq?%6 B.pA*  env/reward_meanoF>yy$B+M B.pA*  env/reward_stdƔ;t"x=  pA*  objective/klEA@'F o pA*  objective/kl_coefM>tr6'F pA*  objective/entropyO9PB =/m]P pA*" ppo/mean_non_score_rewardНs%6 pA*  ppo/mean_scores>q o$B+M pA*  ppo/std_scores9p:N-dV%6 pA*  ppo/returns/var@ x#wC pA*  ppo/val/vpred&#wC pA*  ppo/val/errorSA1&sO pA*  ppo/val/clipfracff&?^"x= pA*  ppo/val/mean f!{ pA*  ppo/val/varxp:ڽe+K pA*  ppo/val/var_explainedIK'F *pA*  ppo/learning_rateϸ:?H;+K pA*  time/ppo/forward_pass?e>)_.W $pA*!  time/ppo/compute_rewards8:Jݰ1 BpA*$ " time/ppo/compute_advantagesZ;e,E pA*  time/ppo/optimize_step$?~ )7_ pA*  time/ppo/calc_statsP>s Q$B+M pA*  time/ppo/totalN$?%6 pA*  env/reward_mean>7$B+M pA*  env/reward_std9p:o"x= A/pA*  objective/klEAg^'F /pA*  objective/kl_coef1M>U'F /pA*  objective/entropy83:/m]P /pA*" ppo/mean_non_score_rewardZꝾ46%6 /pA*  ppo/mean_scoresoF>eγv$B+M 0pA*  ppo/std_scoresƔ;J1-$B+M 66pA*  ppo/loss/total>\D!(pJ e6pA*  ppo/policy/entropyͼ:z)7_ 6pA*  ppo/policy/approxkl\.7fE)7_ 6pA*  ppo/policy/policykl/X 7(g)7_ 6pA*  ppo/policy/clipfrac0_ 6pA*# ! ppo/policy/advantages_meanff37 b&sO 6pA*  ppo/returns/meanjmL%6 6pA*  ppo/returns/varM@<#wC 8pA*  ppo/val/vpredm[a#wC 9pA*  ppo/val/error5@`_&sO :9pA*  ppo/val/clipfrac43#?z:Yy"x= L9pA*  ppo/val/mean/r!{ ]9pA*  ppo/val/varȑ:&\+K n9pA*  ppo/val/var_explained%_'F ~9pA*  ppo/learning_rateϸ:9+K 9pA*  time/ppo/forward_pass0^>{k.W 9pA*!  time/ppo/compute_rewards:mX1 X:pA*$ " time/ppo/compute_advantagesh\;K,E :pA*  time/ppo/optimize_stepz?JY )7_ ;pA*  time/ppo/calc_stats@`K>N $B+M |;pA*  time/ppo/total?) %6 ;pA*  env/reward_meanoF>@$B+M *'F %6 [$B+M >o(pJ ?.W Ak($B+M 2$B+M 6z'F -pA*  objective/entropy6:(%/m]P -pA*" ppo/mean_non_score_rewardV%6 -pA*  ppo/mean_scores>3.o$B+M -pA*  ppo/std_scores9p:3$^:-9(pJ -pA*  ppo/policy/entropy3Cf:l R)7_  -pA*  ppo/policy/approxkli-])7_ H -pA*  ppo/policy/policyklW6o)7_ -pA*  ppo/policy/clipfracP؜D0_ 0-pA*# ! ppo/policy/advantages_meanp2^\9&sO M-pA*  ppo/returns/meanz=7%6 b-pA*  ppo/returns/var\?@ð[#wC r-pA*  ppo/val/vpred6ӿ+#wC -pA*  ppo/val/errorB@F@]&sO -pA*  ppo/val/clipfracff>G`"x= -pA*  ppo/val/meancX&!{ -pA*  ppo/val/varM:+K -pA*  ppo/val/var_explainedck^w'F 0-pA*  ppo/learning_rateϸ:|l+K \-pA*  time/ppo/forward_passP]>KfX.W t-pA*!  time/ppo/compute_rewards`:a1 -pA*$ " time/ppo/compute_advantages<_;G7o,E 8-pA*  time/ppo/optimize_stepd=?]ۙ])7_ Q-pA*  time/ppo/calc_statsJ>̋$B+M -pA*  time/ppo/totalV?ߐ%6 ,-pA*  env/reward_mean>nY$B+M -pA*  env/reward_std9p:C,"x= "pA *  objective/klCA*'F  "pA *  objective/kl_coefmM>g8U'F 1 "pA *  objective/entropyN:&{/m]P F "pA *" ppo/mean_non_score_reward 7%6 Y "pA *  ppo/mean_scores>%$B+M j "pA *  ppo/std_scores9p::&T-3tx(pJ ('"pA *  ppo/policy/entropyϕ8"x= *"pA *  ppo/val/meanr!{ -"pA *  ppo/val/var3;;'/+K J-"pA *  ppo/val/var_explainedK=%Z'F c-"pA *  ppo/learning_rateϸ:+K v-"pA *  time/ppo/forward_pass\>a6.W -"pA *!  time/ppo/compute_rewards:ڃ1 -"pA *$ " time/ppo/compute_advantagesQ;L7,E P0"pA *  time/ppo/optimize_stepJL?)7_ }0"pA *  time/ppo/calc_statspI>r+$B+M 0"pA *  time/ppo/total6?}I%6 0"pA *  env/reward_mean>-D'F j qA *  objective/entropy :%o/m]P j qA *" ppo/mean_non_score_rewardP|%6 j qA *  ppo/mean_scores>l$B+M k qA *  ppo/std_scores9p:#-\D"x= Bw qA *  ppo/val/mean!{ Rw qA *  ppo/val/var;e+K dw qA *  ppo/val/var_explainedXv="U'F uw qA *  ppo/learning_rateϸ:'+K w qA *  time/ppo/forward_pass_>.W Tx qA *!  time/ppo/compute_rewardsp:DG1 x qA *$ " time/ppo/compute_advantagesa;S,E y qA *  time/ppo/optimize_step8Q?)7_ Yy qA *  time/ppo/calc_stats*H>r6T $B+M y qA *  time/ppo/total?P%6 z qA *  env/reward_mean>6 ON$B+M Yz qA *  env/reward_std9p:_K"x= qA *  objective/kl-CAR['F _qA *  objective/kl_coefM>''F qA *  objective/entropyY:q/m]P qA *" ppo/mean_non_score_rewardj#H%6 qA *  ppo/mean_scoresoF>baZ$B+M qA *  ppo/std_scoresƔ;Q&-"x= qA *  ppo/val/meanlP!{ @qA *  ppo/val/var6Xl'F %qA *  ppo/learning_rateϸ:k+K qA *  time/ppo/forward_pass@ ^>ߡ(.W qA *!  time/ppo/compute_rewards`:n1 JqA *$ " time/ppo/compute_advantagesT;3I!,E qA *  time/ppo/optimize_step|?)7_ qA *  time/ppo/calc_statspJ> "Φ$B+M dqA *  time/ppo/totalz?3%6 qA *  env/reward_meanoF>xf$B+M !qA *  env/reward_stdƔ;O%"x= %b qA *  objective/klBA݄^~'F b qA *  objective/kl_coefM>ǖO$B+M b qA *  ppo/std_scoresƔ;7-6.wC $B+M e qA *  ppo/loss/value2r?&'$B+M h qA *  ppo/loss/total=&f(pJ h qA *  ppo/policy/entropy :Xt)7_ h qA *  ppo/policy/approxkl->})7_ m qA *  ppo/policy/policykl6nb)7_ Pm qA *  ppo/policy/clipfracu.Y0_ m qA *# ! ppo/policy/advantages_mean1[ &sO m qA *  ppo/returns/meanV/%6 m qA *  ppo/returns/var"@ #wC m qA *  ppo/val/vpred*E~lf,#wC m qA *  ppo/val/error ,?&sO n qA *  ppo/val/clipfrac=T2Z"x= n qA *  ppo/val/meanU~{!{ 'n qA *  ppo/val/var=ȶ+K q qA *  ppo/val/var_explainedn>k'F %r qA *  ppo/learning_rateϸ:ݶ +K >r qA *  time/ppo/forward_passB]>8<.W Rr qA *!  time/ppo/compute_rewards:y61 dr qA *$ " time/ppo/compute_advantages a;I.?,E vr qA *  time/ppo/optimize_step~?@Z3)7_ r qA *  time/ppo/calc_statsI>N$B+M r qA *  time/ppo/total(?%6 r qA *  env/reward_meanoF>vS$B+M s qA *  env/reward_stdƔ;/N'F ,qA *  objective/entropy:@gU/m]P ,qA *" ppo/mean_non_score_reward%M@%6 ,qA *  ppo/mean_scoresoF>$B+M ,qA *  ppo/std_scoresƔ;[Y-"x= ,qA *  ppo/val/meanf93~8!{ },qA *  ppo/val/varZ=ry\+K q,qA *  ppo/val/var_explained:>_'F ,qA *  ppo/learning_rateϸ:mu+K ,qA *  time/ppo/forward_pass:`>Ek.W ,qA *!  time/ppo/compute_rewards:F1 ,qA *$ " time/ppo/compute_advantages0^;,E ,qA *  time/ppo/optimize_step??C)7_ `,qA *  time/ppo/calc_statsI>~ $B+M N,qA *  time/ppo/total )?5 %6 ~,qA *  env/reward_meanoF>jD$B+M ,qA *  env/reward_stdƔ; z"x= `8qA*  objective/kl>AA׽`'F `8qA*  objective/kl_coef?N>'F `8qA*  objective/entropy';/m]P `8qA*" ppo/mean_non_score_rewardSy8%6 `8qA*  ppo/mean_scoresoF>/ʑ$B+M `8qA*  ppo/std_scoresƔ;=-(pJ Hg8qA*  ppo/policy/entropy: !)7_ dg8qA*  ppo/policy/approxkl[033)7_ yg8qA*  ppo/policy/policyklK7x)7_ g8qA*  ppo/policy/clipfrac0_ i8qA*# ! ppo/policy/advantages_meangfg&sO i8qA*  ppo/returns/meanu)@%6 i8qA*  ppo/returns/var*@))#wC j8qA*  ppo/val/vpredξ&I#wC j8qA*  ppo/val/errortz?&sO ,j8qA*  ppo/val/clipfrac>R"x= >j8qA*  ppo/val/mean(u v!{ j8qA*  ppo/val/varWһ>N:m+K j8qA*  ppo/val/var_explained2?H'F Rk8qA*  ppo/learning_rateϸ:{+K k8qA*  time/ppo/forward_pass^>.W l8qA*!  time/ppo/compute_rewards:sh1 `l8qA*$ " time/ppo/compute_advantagesZ;,E l8qA*  time/ppo/optimize_step?͙f)7_ m8qA*  time/ppo/calc_stats I>z$B+M fm8qA*  time/ppo/totala?%6 m8qA*  env/reward_meanoF>[M>$B+M n8qA*  env/reward_stdƔ;׿'"x= .|DqA*  objective/klw A§'F |DqA*  objective/kl_coef`N>*.'F |DqA*  objective/entropy>/m]P |DqA*" ppo/mean_non_score_rewardR%6 |DqA*  ppo/mean_scores㯚>$r$B+M |DqA*  ppo/std_scores'y=9q-$B+M |DqA*  ppo/loss/value)>%M$B+M }DqA*  ppo/loss/total6{=蹩(pJ I}DqA*  ppo/policy/entropy;;`+)7_ c}DqA*  ppo/policy/approxkl>9{+֚.W }DqA*!  time/ppo/compute_rewards0: :1 }DqA*$ " time/ppo/compute_advantagesDX;0. ,E 7 }DqA*  time/ppo/optimize_step.?#l')7_ e }DqA*  time/ppo/calc_statsJ>螘$B+M O}DqA*  time/ppo/totalA?=%6 |}DqA*  env/reward_mean㯚>+C$B+M }DqA*  env/reward_std'y=?f