Fix ppo pendulum example (#165)

albheim · web-flow · commit bc64e422fe72 · 2021-04-07T22:52:04.000+08:00
* fix action_space name conflict problem

* add ppo pendulum to tests
diff --git a/src/experiments/rl_envs/JuliaRL_PPO_Pendulum.jl b/src/experiments/rl_envs/JuliaRL_PPO_Pendulum.jl
@@ -14,16 +14,16 @@ function RLCore.Experiment(
     lg = TBLogger(joinpath(save_dir, "tb_log"), min_level = Logging.Info)
     rng = StableRNG(seed)
     inner_env = PendulumEnv(T = Float32, rng = rng)
-    action_space = action_space(inner_env)
-    low = action_space.low
-    high = action_space.high
+    A = action_space(inner_env)
+    low = A.left
+    high = A.right
     ns = length(state(inner_env))
 
     N_ENV = 8
     UPDATE_FREQ = 2048
     env = MultiThreadEnv([
         PendulumEnv(T = Float32, rng = StableRNG(hash(seed + i))) |>
-        ActionTransformedEnv(x -> clamp(x * 2, low, high)) for i in 1:N_ENV
+        env -> ActionTransformedEnv(env, action_mapping = x -> clamp(x * 2, low, high)) for i in 1:N_ENV
     ])
 
     init = glorot_uniform(rng)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -75,7 +75,7 @@ end
                     mean(Iterators.flatten(res.hook[1].rewards))
             end
 
-            for method in (:DDPG, :SAC, :TD3)
+            for method in (:DDPG, :SAC, :TD3, :PPO)
                 res = run(
                     Experiment(
                         Val(:JuliaRL),