diff --git a/ci/jenkins_tests/run_rllib_tests.sh b/ci/jenkins_tests/run_rllib_tests.sh
index 273c6fbcd..5f433e2b2 100644
--- a/ci/jenkins_tests/run_rllib_tests.sh
+++ b/ci/jenkins_tests/run_rllib_tests.sh
@@ -288,6 +288,9 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
 
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_local.py
+    
+docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_reproducibility.py
 
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_dependency.py
diff --git a/python/ray/rllib/agents/trainer.py b/python/ray/rllib/agents/trainer.py
index 965014c32..1d707649d 100644
--- a/python/ray/rllib/agents/trainer.py
+++ b/python/ray/rllib/agents/trainer.py
@@ -193,6 +193,10 @@ COMMON_CONFIG = {
     # Minimum env steps to optimize for per train call. This value does
     # not affect learning, only the length of iterations.
     "timesteps_per_iteration": 0,
+    # This argument, in conjunction with worker_index, sets the random seed of
+    # each worker, so that identically configured trials will have identical
+    # results. This makes experiments reproducible.
+    "seed": None,
 
     # === Offline Datasets ===
     # Specify how to generate experiences:
diff --git a/python/ray/rllib/evaluation/rollout_worker.py b/python/ray/rllib/evaluation/rollout_worker.py
index 410718c0e..d3e97ec26 100644
--- a/python/ray/rllib/evaluation/rollout_worker.py
+++ b/python/ray/rllib/evaluation/rollout_worker.py
@@ -2,6 +2,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import random
+import numpy as np
 import gym
 import logging
 import pickle
@@ -130,6 +132,7 @@ class RolloutWorker(EvaluatorInterface):
                  remote_worker_envs=False,
                  remote_env_batch_wait_ms=0,
                  soft_horizon=False,
+                 seed=None,
                  _fake_sampler=False):
         """Initialize a rollout worker.
 
@@ -215,6 +218,8 @@ class RolloutWorker(EvaluatorInterface):
                 step / reset and model inference perf.
             soft_horizon (bool): Calculate rewards but don't reset the
                 environment when the horizon is hit.
+            seed (int): Set the seed of both np and tf to this value to
+                to ensure each remote worker has unique exploration behavior.
             _fake_sampler (bool): Use a fake (inf speed) sampler for testing.
         """
 
@@ -292,6 +297,10 @@ class RolloutWorker(EvaluatorInterface):
         self.tf_sess = None
         policy_dict = _validate_and_canonicalize(policy, self.env)
         self.policies_to_train = policies_to_train or list(policy_dict.keys())
+        # set numpy and python seed
+        if seed is not None:
+            np.random.seed(seed)
+            random.seed(seed)
         if _has_tensorflow_graph(policy_dict):
             if (ray.is_initialized()
                     and ray.worker._mode() != ray.worker.LOCAL_MODE
@@ -309,6 +318,9 @@ class RolloutWorker(EvaluatorInterface):
                         config=tf.ConfigProto(
                             gpu_options=tf.GPUOptions(allow_growth=True)))
                 with self.tf_sess.as_default():
+                    # set graph-level seed
+                    if seed is not None:
+                        tf.set_random_seed(seed)
                     self.policy_map, self.preprocessors = \
                         self._build_policy_map(policy_dict, policy_config)
         else:
diff --git a/python/ray/rllib/evaluation/worker_set.py b/python/ray/rllib/evaluation/worker_set.py
index 90d3c13c2..58100d8b5 100644
--- a/python/ray/rllib/evaluation/worker_set.py
+++ b/python/ray/rllib/evaluation/worker_set.py
@@ -211,4 +211,6 @@ class WorkerSet(object):
             remote_worker_envs=config["remote_worker_envs"],
             remote_env_batch_wait_ms=config["remote_env_batch_wait_ms"],
             soft_horizon=config["soft_horizon"],
+            seed=(config["seed"] + worker_index)
+            if config["seed"] is not None else None,
             _fake_sampler=config.get("_fake_sampler", False))
diff --git a/python/ray/rllib/tests/test_reproducibility.py b/python/ray/rllib/tests/test_reproducibility.py
new file mode 100644
index 000000000..1cc034759
--- /dev/null
+++ b/python/ray/rllib/tests/test_reproducibility.py
@@ -0,0 +1,68 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+
+import ray
+from ray.rllib.agents.dqn import DQNTrainer
+from ray.tune.registry import register_env
+import numpy as np
+import gym
+
+
+class TestReproducibility(unittest.TestCase):
+    def testReproducingTrajectory(self):
+        class PickLargest(gym.Env):
+            def __init__(self):
+                self.observation_space = gym.spaces.Box(
+                    low=float("-inf"), high=float("inf"), shape=(4, ))
+                self.action_space = gym.spaces.Discrete(4)
+
+            def reset(self, **kwargs):
+                self.obs = np.random.randn(4)
+                return self.obs
+
+            def step(self, action):
+                reward = self.obs[action]
+                return self.obs, reward, True, {}
+
+        def env_creator(env_config):
+            return PickLargest()
+
+        trajs = list()
+        for trial in range(3):
+            ray.init()
+            register_env("PickLargest", env_creator)
+            agent = DQNTrainer(
+                env="PickLargest",
+                config={"seed": 666 if trial in [0, 1] else 999})
+
+            trajectory = list()
+            for _ in range(8):
+                r = agent.train()
+                trajectory.append(r["episode_reward_max"])
+                trajectory.append(r["episode_reward_min"])
+            trajs.append(trajectory)
+
+            ray.shutdown()
+
+        # trial0 and trial1 use same seed and thus
+        # expect identical trajectories.
+        all_same = True
+        for v0, v1 in zip(trajs[0], trajs[1]):
+            if v0 != v1:
+                all_same = False
+        self.assertTrue(all_same)
+
+        # trial1 and trial2 use different seeds and thus
+        # most rewards tend to be different.
+        diff_cnt = 0
+        for v1, v2 in zip(trajs[1], trajs[2]):
+            if v1 != v2:
+                diff_cnt += 1
+        self.assertTrue(diff_cnt > 8)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)