From ee36effd8e34b56290d9873e80525939556e9764 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Tue, 23 Jan 2018 10:31:19 -0800 Subject: [PATCH] [rllib] Add n-step Q learning for DQN (#1439) * n-step * add sample adjustm * Oops * fix nstep * metric adjustment * Sat Jan 20 23:30:34 PST 2018 * Sun Jan 21 16:40:46 PST 2018 * Mon Jan 22 22:24:57 PST 2018 --- .travis.yml | 1 + doc/source/rllib.rst | 2 +- python/ray/rllib/dqn/dqn.py | 5 ++- python/ray/rllib/dqn/dqn_evaluator.py | 44 ++++++++++++++++++++++-- python/ray/rllib/dqn/models.py | 3 +- python/ray/rllib/test/test_evaluators.py | 18 +++++++++- python/ray/rllib/utils/sampler.py | 1 + python/ray/tune/trial.py | 3 ++ python/ray/tune/trial_runner.py | 9 +++-- 9 files changed, 77 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index becf1cf5f..4a1985666 100644 --- a/.travis.yml +++ b/.travis.yml @@ -130,6 +130,7 @@ script: - python -m pytest python/ray/rllib/test/test_catalog.py - python -m pytest python/ray/rllib/test/test_filters.py - python -m pytest python/ray/rllib/test/test_optimizers.py + - python -m pytest python/ray/rllib/test/test_evaluators.py deploy: provider: s3 diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index e482a5739..1a8b86236 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -184,7 +184,7 @@ environment to be configured. The return type should be an `OpenAI gym.Env