diff --git a/python/ray/tune/utils/mock.py b/python/ray/tune/utils/mock.py
index 1ec925519..cc92fae26 100644
--- a/python/ray/tune/utils/mock.py
+++ b/python/ray/tune/utils/mock.py
@@ -102,11 +102,11 @@ class FailureInjectorCallback(Callback):
     """Adds random failure injection to the TrialExecutor."""
 
     def __init__(self,
-                 config_path="/home/ubuntu/ray_bootstrap_config.yaml",
+                 config_path="~/ray_bootstrap_config.yaml",
                  probability=0.1,
                  disable=False):
         self.probability = probability
-        self.config_path = config_path
+        self.config_path = os.path.expanduser(config_path)
         self.disable = disable
 
     def on_step_begin(self, **info):
diff --git a/release/long_running_distributed_tests/cluster.yaml b/release/long_running_distributed_tests/cluster.yaml
index a2ed252a4..152642d55 100644
--- a/release/long_running_distributed_tests/cluster.yaml
+++ b/release/long_running_distributed_tests/cluster.yaml
@@ -1,64 +1,36 @@
-# This file is generated by `ray project create`.
-
-# A unique identifier for the head node and workers of this cluster.
 cluster_name: long-running-distributed-tests
 
-# The minimum number of workers nodes to launch in addition to the head
-# node. This number should be >= 0.
 min_workers: 3
-# The maximum number of workers nodes to launch in addition to the head
-# node. This takes precedence over min_workers. min_workers defaults to 0.
 max_workers: 3
 
-# The autoscaler will scale up the cluster to this target fraction of resource
-# usage. For example, if a cluster of 10 nodes is 100% busy and
-# target_utilization is 0.8, it would resize the cluster to 13. This fraction
-# can be decreased to increase the aggressiveness of upscaling.
-# This value must be less than 1.0 for scaling to happen.
 target_utilization_fraction: 0.8
+idle_timeout_minutes: 15
 
-# If a node is idle for this many minutes, it will be removed.
-idle_timeout_minutes: 5
+docker:
+    image: anyscale/ray-ml:latest-gpu
+    container_name: ray_container
+    pull_before_run: True
 
-# Cloud-provider specific configuration.
 provider:
     type: aws
     region: us-west-2
     availability_zone: us-west-2a
     cache_stopped_nodes: False
 
-# How Ray will authenticate with newly launched nodes.
 auth:
     ssh_user: ubuntu
 
-# By default Ray creates a new private keypair, but you can also use your own.
-# If you do so, make sure to also set "KeyName" in the head and worker node
-# configurations below.
-#    ssh_private_key: /path/to/your/key.pem
-
-# Provider-specific config for the head node, e.g. instance type. By default
-# Ray will auto-configure unspecified fields such as SubnetId and KeyName.
-# For more documentation on available fields, see:
-# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 head_node:
     InstanceType: g3.8xlarge
-    ImageId: ami-0888a3b5189309429  # DLAMI 7/1/19
-    BlockDeviceMappings:
-        - DeviceName: /dev/sda1
-          Ebs:
-              VolumeSize: 150
 
 worker_nodes:
   InstanceType: g3.8xlarge
-  ImageId: ami-0888a3b5189309429  # DLAMI 7/1/19
-  BlockDeviceMappings:
-        - DeviceName: /dev/sda1
-          Ebs:
-              VolumeSize: 150
   InstanceMarketOptions:
     MarketType: spot
 
-setup_commands: []
+setup_commands:
+  - apt-get install -y libglib2.0-0 libcudnn7=7.6.5.32-1+cuda10.1
+  - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl
 
 # Command to start ray on the head node. You don't need to change this.
 head_start_ray_commands:
diff --git a/release/long_running_distributed_tests/run.sh b/release/long_running_distributed_tests/run.sh
index 386416a08..d0fa4a6c4 100755
--- a/release/long_running_distributed_tests/run.sh
+++ b/release/long_running_distributed_tests/run.sh
@@ -42,7 +42,7 @@ echo "commit: $commit"
 echo "branch: $ray_branch"
 echo "workload: $workload"
 
-wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux2014_x86_64.whl"
+wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp37-cp37m-manylinux2014_x86_64.whl"
 
 conda uninstall -y terminado || true
 pip install -U pip
diff --git a/release/long_running_distributed_tests/workloads/pytorch_pbt_failure.py b/release/long_running_distributed_tests/workloads/pytorch_pbt_failure.py
index 0fa94cb44..2451fe4a2 100644
--- a/release/long_running_distributed_tests/workloads/pytorch_pbt_failure.py
+++ b/release/long_running_distributed_tests/workloads/pytorch_pbt_failure.py
@@ -13,7 +13,7 @@ from ray.tune import CLIReporter
 from ray.tune.schedulers import PopulationBasedTraining
 from ray.tune.utils.util import merge_dicts
 from ray.tune.utils.mock import FailureInjectorCallback
-from ray.util.sgd.torch import TorchTrainer
+from ray.util.sgd.torch import TorchTrainer, TrainingOperator
 from ray.util.sgd.torch.resnet import ResNet18
 from ray.util.sgd.utils import BATCH_SIZE
 
@@ -74,13 +74,17 @@ def optimizer_creator(model, config):
         momentum=config.get("momentum", 0.9))
 
 
-ray.init(address="auto" if not args.smoke_test else None, _log_to_driver=True)
+ray.init(address="auto" if not args.smoke_test else None, log_to_driver=True)
 num_training_workers = 1 if args.smoke_test else 3
-TorchTrainable = TorchTrainer.as_trainable(
+
+CustomTrainingOperator = TrainingOperator.from_creators(
     model_creator=ResNet18,
-    data_creator=cifar_creator,
     optimizer_creator=optimizer_creator,
-    loss_creator=nn.CrossEntropyLoss,
+    data_creator=cifar_creator,
+    loss_creator=nn.CrossEntropyLoss)
+
+TorchTrainable = TorchTrainer.as_trainable(
+    training_operator_cls=CustomTrainingOperator,
     initialization_hook=initialization_hook,
     num_workers=num_training_workers,
     config={
diff --git a/release/rllib_tests/regression_tests/cluster.yaml b/release/rllib_tests/regression_tests/cluster.yaml
index d0aa94e8c..6a80c80d8 100644
--- a/release/rllib_tests/regression_tests/cluster.yaml
+++ b/release/rllib_tests/regression_tests/cluster.yaml
@@ -3,6 +3,11 @@ cluster_name: ray-rllib-regression-tests
 min_workers: 0
 max_workers: 0
 
+docker:
+    image: anyscale/ray-ml:latest-gpu
+    container_name: ray_container
+    pull_before_run: True
+
 # Cloud-provider specific configuration.
 provider:
     type: aws
@@ -16,24 +21,18 @@ auth:
 
 head_node:
     InstanceType: p3.16xlarge
-    ImageId: ami-07728e9e2742b0662 # Deep Learning AMI (Ubuntu 16.04)
-
-    # Set primary volume to 25 GiB
-    BlockDeviceMappings:
-        - DeviceName: /dev/sda1
-          Ebs:
-              VolumeSize: 100
-
 
 # List of shell commands to run to set up nodes.
-setup_commands: []
+setup_commands:
+    - apt-get install -y libglib2.0-0 libcudnn7=7.6.5.32-1+cuda10.1
+    - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl
 
 # Command to start ray on the head node. You don't need to change this.
 head_start_ray_commands:
-    - source activate tensorflow_p36 && ray stop
-    - ulimit -n 65536; source activate tensorflow_p36 && OMP_NUM_THREADS=1 ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
+    - ray stop
+    - ulimit -n 65536; OMP_NUM_THREADS=1 ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
 
 # Command to start ray on worker nodes. You don't need to change this.
 worker_start_ray_commands:
-    - source activate tensorflow_p36 && ray stop
-    - ulimit -n 65536; source activate tensorflow_p36 && OMP_NUM_THREADS=1 ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
+    - ray stop
+    - ulimit -n 65536; OMP_NUM_THREADS=1 ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
diff --git a/release/rllib_tests/regression_tests/run.sh b/release/rllib_tests/regression_tests/run.sh
index 4a692c2e5..abbabcb04 100755
--- a/release/rllib_tests/regression_tests/run.sh
+++ b/release/rllib_tests/regression_tests/run.sh
@@ -41,15 +41,18 @@ echo "commit: $commit"
 echo "branch: $ray_branch"
 echo "workload: ignored"
 
-wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux2014_x86_64.whl"
+wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp37-cp37m-manylinux2014_x86_64.whl"
 
 conda uninstall -y terminado
-source activate tensorflow_p36 && pip install -U pip
-source activate tensorflow_p36 && pip install -U "$wheel"
-source activate tensorflow_p36 && pip install "ray[rllib]" "ray[debug]"
-source activate tensorflow_p36 && pip install torch==1.6 torchvision
-source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0
+pip install -U pip
+pip install -U "$wheel"
+pip install "ray[rllib]" "ray[debug]"
+pip install terminado
+pip install torch==1.6 torchvision
+pip install boto3==1.4.8 cython==0.29.0
+
 # Run tf learning tests.
-source activate tensorflow_p36 && rllib train -f compact-regression-tests-tf.yaml
+rllib train -f compact-regression-tests-tf.yaml
+
 # Run torch learning tests.
-source activate tensorflow_p36 && rllib train -f compact-regression-tests-torch.yaml
+rllib train -f compact-regression-tests-torch.yaml
diff --git a/release/rllib_tests/stress_tests/cluster.yaml b/release/rllib_tests/stress_tests/cluster.yaml
index e4fd2d26d..e31ecbdd9 100644
--- a/release/rllib_tests/stress_tests/cluster.yaml
+++ b/release/rllib_tests/stress_tests/cluster.yaml
@@ -1,105 +1,46 @@
-####################################################################
-# All nodes in this cluster will auto-terminate in 1 hour
-####################################################################
-
-# An unique identifier for the head node and workers of this cluster.
 cluster_name: ray-rllib-stress-tests
 
-# The minimum number of workers nodes to launch in addition to the head
-# node. This number should be >= 0.
 min_workers: 9
-
-# The maximum number of workers nodes to launch in addition to the head
-# node. This takes precedence over min_workers.
 max_workers: 9
 
-# The autoscaler will scale up the cluster to this target fraction of resource
-# usage. For example, if a cluster of 10 nodes is 100% busy and
-# target_utilization is 0.8, it would resize the cluster to 13. This fraction
-# can be decreased to increase the aggressiveness of upscaling.
-# This value must be less than 1.0 for scaling to happen.
 target_utilization_fraction: 0.8
+idle_timeout_minutes: 15
 
-# If a node is idle for this many minutes, it will be removed.
-idle_timeout_minutes: 5
+docker:
+    image: anyscale/ray-ml:latest-gpu
+    container_name: ray_container
+    pull_before_run: True
 
-# Cloud-provider specific configuration.
 provider:
     type: aws
     region: us-west-2
     availability_zone: us-west-2a
     cache_stopped_nodes: False
 
-# How Ray will authenticate with newly launched nodes.
 auth:
     ssh_user: ubuntu
-# By default Ray creates a new private keypair, but you can also use your own.
-# If you do so, make sure to also set "KeyName" in the head and worker node
-# configurations below.
-#    ssh_private_key: /path/to/your/key.pem
 
-# Provider-specific config for the head node, e.g. instance type. By default
-# Ray will auto-configure unspecified fields such as SubnetId and KeyName.
-# For more documentation on available fields, see:
-# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 head_node:
     InstanceType: p3.16xlarge
-    ImageId: ami-07728e9e2742b0662 # Deep Learning AMI (Ubuntu 16.04)
 
-    # Set primary volume to 25 GiB
-    BlockDeviceMappings:
-        - DeviceName: /dev/sda1
-          Ebs:
-              VolumeSize: 100
-
-    # Additional options in the boto docs.
-
-# Provider-specific config for worker nodes, e.g. instance type. By default
-# Ray will auto-configure unspecified fields such as SubnetId and KeyName.
-# For more documentation on available fields, see:
-# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
 worker_nodes:
-    InstanceType: m4.16xlarge
-    ImageId: ami-07728e9e2742b0662 # Deep Learning AMI (Ubuntu 16.04)
+    InstanceType: m5.16xlarge
 
-
-    # Set primary volume to 25 GiB
-    BlockDeviceMappings:
-        - DeviceName: /dev/sda1
-          Ebs:
-              VolumeSize: 100
-
-    # Run workers on spot by default. Comment this out to use on-demand.
-    # InstanceMarketOptions:
-    #     MarketType: spot
-        # Additional options can be found in the boto docs, e.g.
-        #   SpotOptions:
-        #       MaxPrice: MAX_HOURLY_PRICE
-
-    # Additional options in the boto docs.
-
-# Files or directories to copy to the head and worker nodes. The format is a
-# dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
 file_mounts: {
 #    "/path1/on/remote/machine": "/path1/on/local/machine",
 #    "/path2/on/remote/machine": "/path2/on/local/machine",
 }
 
-# List of shell commands to run to set up nodes.
-setup_commands: []
+setup_commands:
+    - apt-get install -y libglib2.0-0 libcudnn7=7.6.5.32-1+cuda10.1
+    - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl
 
-# Custom commands that will be run on the head node after common setup.
-head_setup_commands: []
-
-# Custom commands that will be run on worker nodes after common setup.
 worker_setup_commands: []
 
-# Command to start ray on the head node. You don't need to change this.
 head_start_ray_commands:
-    - source activate tensorflow_p36 && ray stop
-    - ulimit -n 65536; source activate tensorflow_p36 && OMP_NUM_THREADS=1 ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
+    - ray stop
+    - ulimit -n 65536; OMP_NUM_THREADS=1 ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
 
-# Command to start ray on worker nodes. You don't need to change this.
 worker_start_ray_commands:
-    - source activate tensorflow_p36 && ray stop
-    - ulimit -n 65536; source activate tensorflow_p36 && OMP_NUM_THREADS=1 ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
+    - ray stop
+    - ulimit -n 65536; OMP_NUM_THREADS=1 ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
diff --git a/release/rllib_tests/stress_tests/run.sh b/release/rllib_tests/stress_tests/run.sh
index b038de9fb..704d013a7 100755
--- a/release/rllib_tests/stress_tests/run.sh
+++ b/release/rllib_tests/stress_tests/run.sh
@@ -42,14 +42,14 @@ echo "commit: $commit"
 echo "branch: $ray_branch"
 echo "workload: ignored"
 
-wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux2014_x86_64.whl"
+wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp37-cp37m-manylinux2014_x86_64.whl"
 
 conda uninstall -y terminado
-source activate tensorflow_p36 && pip install -U pip
-source activate tensorflow_p36 && pip install -U "$wheel"
-source activate tensorflow_p36 && pip install "ray[rllib]" "ray[debug]"
-source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0
-source activate tensorflow_p36
+pip install -U pip
+pip install -U "$wheel"
+pip install "ray[rllib]" "ray[debug]"
+pip install terminado
+pip install boto3==1.4.8 cython==0.29.0
 
 python3 wait_cluster.py
 
diff --git a/release/rllib_tests/unit_gpu_tests/cluster.yaml b/release/rllib_tests/unit_gpu_tests/cluster.yaml
index 2030bb2ac..23e59b788 100644
--- a/release/rllib_tests/unit_gpu_tests/cluster.yaml
+++ b/release/rllib_tests/unit_gpu_tests/cluster.yaml
@@ -3,6 +3,11 @@ cluster_name: ray-rllib-regression-tests
 min_workers: 0
 max_workers: 0
 
+docker:
+    image: anyscale/ray-ml:latest-gpu
+    container_name: ray_container
+    pull_before_run: True
+
 # Cloud-provider specific configuration.
 provider:
     type: aws
@@ -16,7 +21,6 @@ auth:
 
 head_node:
     InstanceType: p2.xlarge  # Cheaper 1GPU K80 instance
-    ImageId: ami-07728e9e2742b0662 # Deep Learning AMI (Ubuntu 16.04)
 
     # Set primary volume to 25 GiB
     BlockDeviceMappings:
@@ -26,14 +30,15 @@ head_node:
 
 
 # List of shell commands to run to set up nodes.
-setup_commands: []
+setup_commands:
+    - apt-get install -y libglib2.0-0 libcudnn7=7.6.5.32-1+cuda10.1 curl unzip gcc python3-dev
 
 # Command to start ray on the head node. You don't need to change this.
 head_start_ray_commands:
-    - source activate tensorflow_p36 && ray stop
-    - ulimit -n 65536; source activate tensorflow_p36 && OMP_NUM_THREADS=1 ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
+    - ray stop
+    - ulimit -n 65536; OMP_NUM_THREADS=1 ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml
 
 # Command to start ray on worker nodes. You don't need to change this.
 worker_start_ray_commands:
-    - source activate tensorflow_p36 && ray stop
-    - ulimit -n 65536; source activate tensorflow_p36 && OMP_NUM_THREADS=1 ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
+    - ray stop
+    - ulimit -n 65536; OMP_NUM_THREADS=1 ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
diff --git a/release/rllib_tests/unit_gpu_tests/requirements.txt b/release/rllib_tests/unit_gpu_tests/requirements.txt
index a86eb47e6..e63556bc1 100644
--- a/release/rllib_tests/unit_gpu_tests/requirements.txt
+++ b/release/rllib_tests/unit_gpu_tests/requirements.txt
@@ -4,3 +4,4 @@ torch==1.6+cu101
 torchvision==0.7.0+cu101
 boto3==1.4.8
 cython==0.29.0
+pytest
diff --git a/release/rllib_tests/unit_gpu_tests/run.sh b/release/rllib_tests/unit_gpu_tests/run.sh
index db468e789..ff93e5164 100755
--- a/release/rllib_tests/unit_gpu_tests/run.sh
+++ b/release/rllib_tests/unit_gpu_tests/run.sh
@@ -42,12 +42,33 @@ echo "commit: $commit"
 echo "branch: $ray_branch"
 echo "workload: ignored"
 
-wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux2014_x86_64.whl"
+wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp37-cp37m-manylinux2014_x86_64.whl"
 
 conda uninstall -y terminado
-source activate tensorflow_p36 && pip install -U pip
-source activate tensorflow_p36 && pip install -U "$wheel"
+pip install -U pip
+pip install -U "$wheel"
+pip install -U pytest
+pip install terminado
+pip install torch>=1.6 torchvision
+pip install -U tensorflow-gpu
+
+if [ -z "$commit" ]; then
+  cob="origin/$ray_branch"
+else
+  cob="$commit"
+fi
+
+git clone https://github.com/ray-project/ray.git ray
+pushd ray || true
+git checkout "$cob"
+
+bash ./ci/travis/install-bazel.sh
+BAZEL_PATH=$HOME/bin/bazel
 
 # Run all test cases, but with a forced num_gpus=1.
 # TODO: (sven) chose correct dir and run over all RLlib tests and example scripts!
-source activate tensorflow_p36 && export RAY_FORCE_NUM_GPUS=1 && cd ~ && python -m pytest test_attention_net_learning.py
+export RLLIB_NUM_GPUS=1 && $BAZEL_PATH test --config="ci $(./scripts/bazel_export_options)" --build_tests_only --test_tag_filters=examples_A,examples_B --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
+export RLLIB_NUM_GPUS=1 && $BAZEL_PATH test --config="ci $(./scripts/bazel_export_options)" --build_tests_only --test_tag_filters=examples_C,examples_D --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
+export RLLIB_NUM_GPUS=1 && $BAZEL_PATH test --config="ci $(./scripts/bazel_export_options)" --build_tests_only --test_tag_filters=examples_E,examples_F,examples_G,examples_H,examples_I,examples_J,examples_K,examples_L,examples_M,examples_N,examples_O,examples_P --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1  rllib/...
+export RLLIB_NUM_GPUS=1 && $BAZEL_PATH test --config="ci $(./scripts/bazel_export_options)" --build_tests_only --test_tag_filters=examples_Q,examples_R,examples_S,examples_T,examples_U,examples_V,examples_W,examples_X,examples_Y,examples_Z --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
+popd || true