From 8e76796fd0535d8fbc9934e7f6604a1abcfb119e Mon Sep 17 00:00:00 2001
From: Barak Michener <barakmich@anyscale.com>
Date: Fri, 7 Aug 2020 16:49:49 -0700
Subject: [PATCH] ci: Redo `format.sh --all` script & backfill lint fixes
 (#9956)

---
 ci/travis/check_typing_bad.py                 |   1 -
 ci/travis/check_typing_good.py                |   1 -
 ci/travis/ci.sh                               |   2 +-
 ci/travis/format.sh                           | 100 ++++++++++-------
 cpp/include/ray/api.h                         |  14 ++-
 cpp/include/ray/api/arguments.h               |   1 +
 cpp/include/ray/api/object_ref.h              |   3 +-
 cpp/include/ray/api/ray_runtime.h             |   3 +-
 cpp/include/ray/api/serializer.h              |   1 +
 cpp/include/ray/api/wait_result.h             |   1 +
 cpp/src/ray/api.cc                            |   2 +-
 cpp/src/ray/runtime/abstract_ray_runtime.cc   |   5 +-
 cpp/src/ray/runtime/abstract_ray_runtime.h    |   5 +-
 cpp/src/ray/runtime/local_mode_ray_runtime.cc |   1 +
 cpp/src/ray/runtime/local_mode_ray_runtime.h  |   1 +
 cpp/src/ray/runtime/native_ray_runtime.cc     |   1 +
 cpp/src/ray/runtime/native_ray_runtime.h      |   1 +
 .../runtime/object/local_mode_object_store.cc |   6 +-
 .../runtime/object/local_mode_object_store.h  |   2 +-
 .../ray/runtime/object/native_object_store.cc |   6 +-
 .../ray/runtime/object/native_object_store.h  |   2 +-
 cpp/src/ray/runtime/object/object_store.h     |   4 +-
 cpp/src/ray/runtime/task/invocation_spec.h    |   1 +
 .../runtime/task/local_mode_task_submitter.cc |   6 +-
 .../runtime/task/local_mode_task_submitter.h  |   1 +
 .../ray/runtime/task/native_task_submitter.cc |   2 +
 .../ray/runtime/task/native_task_submitter.h  |   1 +
 cpp/src/ray/runtime/task/task_executor.cc     |   3 +-
 cpp/src/ray/runtime/task/task_executor.h      |   1 +
 cpp/src/ray/runtime/task/task_submitter.h     |   3 +-
 cpp/src/ray/test/api_test.cc                  |   1 +
 cpp/src/ray/test/slow_function_test.cc        |   1 +
 doc/examples/newsreader/server.py             |  43 ++++----
 doc/examples/streaming/streaming.py           |  32 +++---
 doc/source/tune/_tutorials/tune-sklearn.py    |   5 +-
 python/ray/includes/metric.pxd                |  12 +--
 rllib/agents/ddpg/ddpg_tf_policy.py           |  27 ++---
 rllib/agents/ddpg/ddpg_torch_policy.py        |   3 +-
 rllib/agents/ddpg/tests/test_ddpg.py          |   8 +-
 rllib/agents/dqn/distributional_q_tf_model.py |  10 +-
 rllib/agents/dqn/dqn_tf_policy.py             |  15 ++-
 rllib/agents/dqn/dqn_torch_model.py           |  31 +++---
 rllib/agents/dqn/dqn_torch_policy.py          |  12 +--
 rllib/agents/impala/vtrace_tf.py              |  20 ++--
 rllib/agents/impala/vtrace_tf_policy.py       |  12 +--
 rllib/agents/marwil/marwil_tf_policy.py       |  18 ++--
 rllib/agents/marwil/tests/test_marwil.py      |   4 +-
 rllib/agents/mbmpo/model_ensemble.py          |   1 +
 rllib/agents/pg/pg_tf_policy.py               |   4 +-
 rllib/agents/pg/tests/test_pg.py              |  13 ++-
 rllib/agents/ppo/tests/test_ppo.py            |   1 -
 rllib/agents/qmix/qmix_policy.py              |   6 +-
 rllib/agents/sac/sac_tf_policy.py             |  63 ++++++-----
 rllib/evaluation/sample_batch_builder.py      |   2 +
 rllib/evaluation/sample_collector.py          |   9 +-
 rllib/evaluation/sampler.py                   |  59 +++++-----
 rllib/examples/env/random_env.py              |   6 +-
 rllib/examples/policy/random_policy.py        |   6 +-
 .../rollout_worker_custom_workflow.py         |   4 +-
 rllib/execution/train_ops.py                  |  16 ++-
 rllib/models/tf/layers/__init__.py            |   7 +-
 rllib/models/tf/layers/noisy_layer.py         |   9 +-
 rllib/models/tf/visionnet.py                  |   6 +-
 rllib/models/torch/modules/noisy_layer.py     |  18 ++--
 rllib/models/torch/visionnet.py               |   5 +-
 rllib/policy/dynamic_tf_policy.py             |  60 +++++------
 rllib/policy/eager_tf_policy.py               |   4 +-
 rllib/policy/policy.py                        |  25 ++---
 rllib/policy/sample_batch.py                  |  32 +++---
 .../policy/tests/test_trajectory_view_api.py  |  19 ++--
 rllib/policy/tf_policy.py                     |  34 +++---
 rllib/policy/tf_policy_template.py            |  98 ++++++++---------
 rllib/policy/torch_policy_template.py         | 101 ++++++++----------
 rllib/tests/test_catalog.py                   |   4 +-
 rllib/tests/test_eager_support.py             |  10 +-
 rllib/tests/test_multi_agent_pendulum.py      |  52 ++++-----
 rllib/tests/test_rollout_worker.py            |   2 +-
 rllib/tests/test_supported_multi_agent.py     |   5 +-
 rllib/tests/test_supported_spaces.py          |   5 +-
 rllib/utils/exploration/epsilon_greedy.py     |   4 +-
 rllib/utils/exploration/gaussian_noise.py     |   2 +-
 .../exploration/tests/test_parameter_noise.py |  15 +--
 rllib/utils/framework.py                      |  10 +-
 rllib/utils/schedules/tests/test_schedules.py |   3 +-
 rllib/utils/spaces/flexdict.py                |   1 +
 rllib/utils/test_utils.py                     |   4 +-
 rllib/utils/tf_ops.py                         |   4 +-
 src/ray/common/common_protocol.h              |   1 +
 src/ray/common/id_test.cc                     |   1 -
 src/ray/common/task/scheduling_resources.cc   |   1 -
 src/ray/common/task/task_execution_spec.cc    |   4 +-
 src/ray/common/test/client_connection_test.cc |   4 +-
 .../io_ray_runtime_actor_NativeActorHandle.cc |   2 +-
 ...ray_runtime_context_NativeWorkerContext.cc |   4 +-
 .../io_ray_runtime_gcs_GlobalStateAccessor.cc |   2 +-
 .../io_ray_runtime_metric_NativeMetric.cc     |   5 +-
 ...io_ray_runtime_object_NativeObjectStore.cc |   2 +
 .../io_ray_runtime_task_NativeTaskExecutor.cc |   4 +-
 src/ray/core_worker/lib/java/jni_utils.h      |   3 +-
 .../core_worker/test/scheduling_queue_test.cc |   2 +-
 src/ray/gcs/asio.h                            |   4 +-
 src/ray/gcs/gcs_client.h                      |   1 +
 .../test/service_based_gcs_client_test.cc     |   1 +
 .../gcs_server/gcs_redis_failure_detector.h   |   1 +
 src/ray/gcs/pubsub/test/gcs_pub_sub_test.cc   |   3 +-
 src/ray/gcs/redis_async_context.h             |   2 +
 .../gcs/store_client/redis_store_client.cc    |   1 +
 .../test/in_memory_store_client_test.cc       |   1 +
 .../test/redis_store_client_test.cc           |   1 +
 src/ray/gcs/subscription_executor.h           |   1 +
 src/ray/gcs/test/redis_gcs_client_test.cc     |   4 +-
 .../gcs/test/redis_node_info_accessor_test.cc |   1 +
 .../object_store_notification_manager.h       |   4 +-
 src/ray/object_manager/object_buffer_pool.h   |   7 +-
 src/ray/object_manager/object_manager.h       |   7 +-
 src/ray/plasma/store_exec.cc                  |   3 +-
 src/ray/raylet/raylet.h                       |   3 +-
 src/ray/raylet/reconstruction_policy.h        |   4 +-
 src/ray/raylet/reconstruction_policy_test.cc  |  11 +-
 src/ray/rpc/client_call.h                     |   5 +-
 src/ray/rpc/grpc_client.h                     |   1 +
 src/ray/rpc/server_call.h                     |   1 +
 src/ray/stats/metric_exporter.cc              |   4 +-
 src/ray/stats/metric_exporter.h               |   1 -
 src/ray/stats/metric_exporter_client.cc       |   4 +-
 src/ray/stats/metric_exporter_client_test.cc  |   6 +-
 src/ray/stats/stats.h                         |   1 -
 src/ray/stats/stats_test.cc                   |   6 +-
 src/ray/thirdparty/dlmalloc.c                 |   2 +-
 src/ray/thirdparty/sha256.c                   |   4 +-
 src/ray/util/io_service_pool.cc               |   1 +
 src/ray/util/sample_test.cc                   |   3 +-
 src/ray/util/sequencer.h                      |   1 +
 src/ray/util/signal_test.cc                   |   1 +
 src/ray/util/util.cc                          |   3 +-
 ...ray_streaming_runtime_transfer_ChannelId.h |   8 +-
 ...y_streaming_runtime_transfer_DataReader.cc |   1 +
 ...ay_streaming_runtime_transfer_DataReader.h |   3 +-
 ...ay_streaming_runtime_transfer_DataWriter.h |   3 +-
 ...reaming_runtime_transfer_TransferHandler.h |  10 +-
 streaming/src/lib/java/streaming_jni_common.h |   2 +
 streaming/src/message/priority_queue.h        |   1 +
 streaming/src/queue/utils.h                   |   1 +
 streaming/src/test/event_service_tests.cc     |   1 +
 .../src/test/message_serialization_tests.cc   |   1 -
 streaming/src/test/queue_tests_base.h         |   3 +-
 streaming/src/test/streaming_util_tests.cc    |   1 -
 147 files changed, 702 insertions(+), 636 deletions(-)

diff --git a/ci/travis/check_typing_bad.py b/ci/travis/check_typing_bad.py
index 3df119cfe..2ed95d033 100644
--- a/ci/travis/check_typing_bad.py
+++ b/ci/travis/check_typing_bad.py
@@ -1,6 +1,5 @@
 import ray
 
-
 ray.init()
 
 
diff --git a/ci/travis/check_typing_good.py b/ci/travis/check_typing_good.py
index d83bf1ce3..81128ff05 100644
--- a/ci/travis/check_typing_good.py
+++ b/ci/travis/check_typing_good.py
@@ -1,6 +1,5 @@
 import ray
 
-
 ray.init()
 
 
diff --git a/ci/travis/ci.sh b/ci/travis/ci.sh
index 4c78ee1be..c2e37cd54 100755
--- a/ci/travis/ci.sh
+++ b/ci/travis/ci.sh
@@ -313,7 +313,7 @@ lint_readme() {
 }
 
 lint_scripts() {
-  "${ROOT_DIR}"/format.sh --all
+  FORMAT_SH_PRINT_DIFF=1 "${ROOT_DIR}"/format.sh --all
 }
 
 lint_bazel() {
diff --git a/ci/travis/format.sh b/ci/travis/format.sh
index d1860976b..3c21bc9bc 100755
--- a/ci/travis/format.sh
+++ b/ci/travis/format.sh
@@ -46,11 +46,6 @@ builtin cd "$(dirname "${BASH_SOURCE:-$0}")"
 ROOT="$(git rev-parse --show-toplevel)"
 builtin cd "$ROOT" || exit 1
 
-# Add the upstream remote if it doesn't exist
-if ! git remote -v | grep -q upstream; then
-    git remote add 'upstream' 'https://github.com/ray-project/ray.git'
-fi
-
 FLAKE8_VERSION=$(flake8 --version | awk '{print $1}')
 YAPF_VERSION=$(yapf --version | awk '{print $2}')
 SHELLCHECK_VERSION=$(shellcheck --version | awk '/^version:/ {print $2}')
@@ -73,9 +68,6 @@ else
     echo "WARNING: clang-format is not installed!"
 fi
 
-# Only fetch master since that's the branch we're diffing against.
-git fetch upstream master || true
-
 SHELLCHECK_FLAGS=(
   --exclude=1090  # "Can't follow non-constant source. Use a directive to specify location."
   --exclude=1091  # "Not following {file} due to some error"
@@ -99,7 +91,16 @@ YAPF_EXCLUDES=(
     '--exclude' 'python/ray/thirdparty_files/*'
 )
 
-FLAKE8_EXCLUDES="python/ray/core/generated/,streaming/python/generated,doc/source/conf.py,python/ray/cloudpickle/,python/ray/thirdparty_files/"
+GIT_LS_EXCLUDES=(
+  ':(exclude)python/ray/cloudpickle/'
+)
+
+# TODO(barakmich): This should be cleaned up. I've at least excised the copies
+# of these arguments to this location, but the long-term answer is to actually
+# make a flake8 config file
+FLAKE8_EXCLUDE="--exclude=python/ray/core/generated/,streaming/python/generated,doc/source/conf.py,python/ray/cloudpickle/,python/ray/thirdparty_files/,python/build/,python/.eggs/"
+FLAKE8_IGNORES="--ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605"
+FLAKE8_PYX_IGNORES="--ignore=C408,E121,E123,E126,E211,E225,E226,E227,E24,E704,E999,W503,W504,W605"
 
 shellcheck_scripts() {
   shellcheck "${SHELLCHECK_FLAGS[@]}" "$@"
@@ -110,7 +111,7 @@ shellcheck_bazel() {
 }
 
 # Format specified files
-format() {
+format_files() {
     local shell_files=() python_files=() bazel_files=()
 
     local name
@@ -162,6 +163,46 @@ format() {
     fi
 }
 
+# Format all files, and print the diff to stdout for travis.
+format_all() {
+    command -v flake8 &> /dev/null;
+    HAS_FLAKE8=$?
+
+    echo "$(date)" "YAPF...."
+    git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 10 \
+      yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
+    if [ $HAS_FLAKE8 ]; then
+      echo "$(date)" "Flake8...."
+      git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 \
+        flake8 --inline-quotes '"' --no-avoid-escape  "$FLAKE8_EXCLUDE" "$FLAKE8_IGNORES"
+
+      git ls-files -- '*.pyx' '*.pxd' '*.pxi' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 \
+        flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_PYX_IGNORES"
+    fi
+
+    echo "$(date)" "clang-format...."
+    if command -v clang-format >/dev/null; then
+      git ls-files -- '*.cc' '*.h' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 clang-format -i
+    fi
+
+    if command -v shellcheck >/dev/null; then
+      echo "$(date)" "shellcheck bazel...."
+      shellcheck_bazel
+
+      local shell_files non_shell_files
+      non_shell_files=($(git ls-files -- ':(exclude)*.sh'))
+      shell_files=($(git ls-files -- '*.sh'))
+      if [ 0 -lt "${#non_shell_files[@]}" ]; then
+        shell_files+=($(git --no-pager grep -l -- '^#!\(/usr\)\?/bin/\(env \+\)\?\(ba\)\?sh' "${non_shell_files[@]}" || true))
+      fi
+      if [ 0 -lt "${#shell_files[@]}" ]; then
+        echo "$(date)" "shellcheck scripts...."
+        shellcheck_scripts "${shell_files[@]}"
+      fi
+    fi
+    echo "$(date)" "done!"
+}
+
 # Format files that differ from main branch. Ignores dirs that are not slated
 # for autoformat yet.
 format_changed() {
@@ -178,17 +219,14 @@ format_changed() {
              yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
         if which flake8 >/dev/null; then
             git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
-                 flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES,rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
-            # Ignore F821 for rllib flake8 checking (produces errors for type annotations using quotes (non-imported classes)).
-            git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
-                 flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --filename="rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605,F821
+                 flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_IGNORES"
         fi
     fi
 
     if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' &>/dev/null; then
         if which flake8 >/dev/null; then
             git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' | xargs -P 5 \
-                 flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --ignore=C408,E121,E123,E126,E211,E225,E226,E227,E24,E704,E999,W503,W504,W605
+                 flake8 --inline-quotes '"' --no-avoid-escape "$FLAKE8_EXCLUDE" "$FLAKE8_PYX_IGNORES"
         fi
     fi
 
@@ -216,35 +254,25 @@ format_changed() {
     fi
 }
 
-# Format all files, and print the diff to stdout for travis.
-format_all() {
-    # Ignore F821 for rllib flake8 checking (produces errors for type annotations using quotes (non-imported classes)).
-    flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES,rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
-    flake8 --inline-quotes '"' --no-avoid-escape --exclude="$FLAKE8_EXCLUDES" --filename="rllib/" --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605,F821
-
-    yapf --diff "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" test python
-
-    local shell_files
-    # shellcheck disable=SC2207
-    shell_files=($(
-      git -C "${ROOT}" ls-files --exclude-standard HEAD -- "*.sh" &&
-      { git -C "${ROOT}" --no-pager grep -l '^#!\(/usr\)\?/bin/\(env \+\)\?\(ba\)\?sh' ":(exclude)*.sh" || true; }
-    ))
-    if [ 0 -lt "${#shell_files[@]}" ]; then
-      shellcheck_scripts "${shell_files[@]}"
-    fi
-    shellcheck_bazel
-}
 
 # This flag formats individual files. --files *must* be the first command line
 # arg to use this option.
 if [ "${1-}" == '--files' ]; then
-    format "${@:2}"
+    format_files "${@:2}"
     # If `--all` is passed, then any further arguments are ignored and the
     # entire python directory is formatted.
 elif [ "${1-}" == '--all' ]; then
-    format_all
+    format_all "${@}"
+    if [ -n "${FORMAT_SH_PRINT_DIFF-}" ]; then git --no-pager diff; fi
 else
+    # Add the upstream remote if it doesn't exist
+    if ! git remote -v | grep -q upstream; then
+        git remote add 'upstream' 'https://github.com/ray-project/ray.git'
+    fi
+
+    # Only fetch master since that's the branch we're diffing against.
+    git fetch upstream master || true
+
     # Format only the files that changed in last commit.
     format_changed
 fi
diff --git a/cpp/include/ray/api.h b/cpp/include/ray/api.h
index f2aa0cf07..de97ce5dd 100644
--- a/cpp/include/ray/api.h
+++ b/cpp/include/ray/api.h
@@ -1,13 +1,14 @@
 
 #pragma once
 
-#include <memory>
-
 #include <ray/api/generated/actor_funcs.generated.h>
 #include <ray/api/generated/create_funcs.generated.h>
 #include <ray/api/generated/funcs.generated.h>
 #include <ray/api/ray_runtime.h>
+
+#include <memory>
 #include <msgpack.hpp>
+
 #include "ray/core.h"
 namespace ray {
 namespace api {
@@ -232,13 +233,10 @@ inline ActorTaskCaller<ReturnType> Ray::CallActorInternal(FuncType &actor_func,
   return ActorTaskCaller<ReturnType>(runtime_, actor.ID(), ptr, buffer);
 }
 
-#include <ray/api/generated/exec_funcs.generated.h>
-
-#include <ray/api/generated/call_funcs_impl.generated.h>
-
-#include <ray/api/generated/create_actors_impl.generated.h>
-
 #include <ray/api/generated/call_actors_impl.generated.h>
+#include <ray/api/generated/call_funcs_impl.generated.h>
+#include <ray/api/generated/create_actors_impl.generated.h>
+#include <ray/api/generated/exec_funcs.generated.h>
 
 }  // namespace api
 }  // namespace ray
diff --git a/cpp/include/ray/api/arguments.h b/cpp/include/ray/api/arguments.h
index 597184061..090c8e25d 100644
--- a/cpp/include/ray/api/arguments.h
+++ b/cpp/include/ray/api/arguments.h
@@ -2,6 +2,7 @@
 #pragma once
 
 #include <ray/api/serializer.h>
+
 #include <msgpack.hpp>
 
 namespace ray {
diff --git a/cpp/include/ray/api/object_ref.h b/cpp/include/ray/api/object_ref.h
index 54f7823ae..71500202b 100644
--- a/cpp/include/ray/api/object_ref.h
+++ b/cpp/include/ray/api/object_ref.h
@@ -2,9 +2,8 @@
 #pragma once
 
 #include <memory>
-#include <utility>
-
 #include <msgpack.hpp>
+#include <utility>
 
 #include "ray/core.h"
 
diff --git a/cpp/include/ray/api/ray_runtime.h b/cpp/include/ray/api/ray_runtime.h
index f87771cd2..ff39c5b42 100644
--- a/cpp/include/ray/api/ray_runtime.h
+++ b/cpp/include/ray/api/ray_runtime.h
@@ -1,13 +1,14 @@
 
 #pragma once
 
+#include <ray/api/wait_result.h>
+
 #include <cstdint>
 #include <memory>
 #include <msgpack.hpp>
 #include <typeinfo>
 #include <vector>
 
-#include <ray/api/wait_result.h>
 #include "ray/core.h"
 
 namespace ray {
diff --git a/cpp/include/ray/api/serializer.h b/cpp/include/ray/api/serializer.h
index c891e691d..57429611f 100644
--- a/cpp/include/ray/api/serializer.h
+++ b/cpp/include/ray/api/serializer.h
@@ -2,6 +2,7 @@
 #pragma once
 
 #include <ray/api/ray_exception.h>
+
 #include <msgpack.hpp>
 
 namespace ray {
diff --git a/cpp/include/ray/api/wait_result.h b/cpp/include/ray/api/wait_result.h
index 2fb0036eb..56528c11d 100644
--- a/cpp/include/ray/api/wait_result.h
+++ b/cpp/include/ray/api/wait_result.h
@@ -2,6 +2,7 @@
 #pragma once
 
 #include <vector>
+
 #include "ray/core.h"
 
 namespace ray {
diff --git a/cpp/src/ray/api.cc b/cpp/src/ray/api.cc
index 22a7f1d27..9c84865b4 100644
--- a/cpp/src/ray/api.cc
+++ b/cpp/src/ray/api.cc
@@ -1,7 +1,7 @@
 
 #include <ray/api.h>
-
 #include <ray/api/ray_config.h>
+
 #include "runtime/abstract_ray_runtime.h"
 
 namespace ray {
diff --git a/cpp/src/ray/runtime/abstract_ray_runtime.cc b/cpp/src/ray/runtime/abstract_ray_runtime.cc
index c7cfcb5d9..ce8ebee56 100644
--- a/cpp/src/ray/runtime/abstract_ray_runtime.cc
+++ b/cpp/src/ray/runtime/abstract_ray_runtime.cc
@@ -1,11 +1,12 @@
 
 #include "abstract_ray_runtime.h"
 
-#include <cassert>
-
 #include <ray/api.h>
 #include <ray/api/ray_config.h>
 #include <ray/api/ray_exception.h>
+
+#include <cassert>
+
 #include "../util/address_helper.h"
 #include "../util/process_helper.h"
 #include "local_mode_ray_runtime.h"
diff --git a/cpp/src/ray/runtime/abstract_ray_runtime.h b/cpp/src/ray/runtime/abstract_ray_runtime.h
index a093c1ec8..f0bd0e23e 100644
--- a/cpp/src/ray/runtime/abstract_ray_runtime.h
+++ b/cpp/src/ray/runtime/abstract_ray_runtime.h
@@ -1,11 +1,12 @@
 
 #pragma once
 
-#include <mutex>
-
 #include <ray/api/ray_config.h>
 #include <ray/api/ray_runtime.h>
+
 #include <msgpack.hpp>
+#include <mutex>
+
 #include "./object/object_store.h"
 #include "./task/task_executor.h"
 #include "./task/task_submitter.h"
diff --git a/cpp/src/ray/runtime/local_mode_ray_runtime.cc b/cpp/src/ray/runtime/local_mode_ray_runtime.cc
index 113f7b148..fa671bf70 100644
--- a/cpp/src/ray/runtime/local_mode_ray_runtime.cc
+++ b/cpp/src/ray/runtime/local_mode_ray_runtime.cc
@@ -2,6 +2,7 @@
 #include "local_mode_ray_runtime.h"
 
 #include <ray/api.h>
+
 #include "../util/address_helper.h"
 #include "./object/local_mode_object_store.h"
 #include "./object/object_store.h"
diff --git a/cpp/src/ray/runtime/local_mode_ray_runtime.h b/cpp/src/ray/runtime/local_mode_ray_runtime.h
index 281d0f571..7d0eed0ad 100644
--- a/cpp/src/ray/runtime/local_mode_ray_runtime.h
+++ b/cpp/src/ray/runtime/local_mode_ray_runtime.h
@@ -2,6 +2,7 @@
 #pragma once
 
 #include <unordered_map>
+
 #include "abstract_ray_runtime.h"
 #include "ray/core.h"
 
diff --git a/cpp/src/ray/runtime/native_ray_runtime.cc b/cpp/src/ray/runtime/native_ray_runtime.cc
index d07189b6b..e494e663d 100644
--- a/cpp/src/ray/runtime/native_ray_runtime.cc
+++ b/cpp/src/ray/runtime/native_ray_runtime.cc
@@ -2,6 +2,7 @@
 #include "native_ray_runtime.h"
 
 #include <ray/api.h>
+
 #include "../util/address_helper.h"
 #include "./object/native_object_store.h"
 #include "./object/object_store.h"
diff --git a/cpp/src/ray/runtime/native_ray_runtime.h b/cpp/src/ray/runtime/native_ray_runtime.h
index c2d907a18..2e1a8c184 100644
--- a/cpp/src/ray/runtime/native_ray_runtime.h
+++ b/cpp/src/ray/runtime/native_ray_runtime.h
@@ -2,6 +2,7 @@
 #pragma once
 
 #include <unordered_map>
+
 #include "abstract_ray_runtime.h"
 #include "ray/core.h"
 
diff --git a/cpp/src/ray/runtime/object/local_mode_object_store.cc b/cpp/src/ray/runtime/object/local_mode_object_store.cc
index cb0268116..66ca5764e 100644
--- a/cpp/src/ray/runtime/object/local_mode_object_store.cc
+++ b/cpp/src/ray/runtime/object/local_mode_object_store.cc
@@ -1,12 +1,14 @@
 
+#include "local_mode_object_store.h"
+
+#include <ray/api/ray_exception.h>
+
 #include <algorithm>
 #include <chrono>
 #include <list>
 #include <thread>
 
-#include <ray/api/ray_exception.h>
 #include "../abstract_ray_runtime.h"
-#include "local_mode_object_store.h"
 
 namespace ray {
 namespace api {
diff --git a/cpp/src/ray/runtime/object/local_mode_object_store.h b/cpp/src/ray/runtime/object/local_mode_object_store.h
index 3d4f947f4..ee5c6b0db 100644
--- a/cpp/src/ray/runtime/object/local_mode_object_store.h
+++ b/cpp/src/ray/runtime/object/local_mode_object_store.h
@@ -2,10 +2,10 @@
 #pragma once
 
 #include <unordered_map>
-#include "ray/core.h"
 
 #include "../local_mode_ray_runtime.h"
 #include "object_store.h"
+#include "ray/core.h"
 
 namespace ray {
 namespace api {
diff --git a/cpp/src/ray/runtime/object/native_object_store.cc b/cpp/src/ray/runtime/object/native_object_store.cc
index b22278abd..631c7c139 100644
--- a/cpp/src/ray/runtime/object/native_object_store.cc
+++ b/cpp/src/ray/runtime/object/native_object_store.cc
@@ -1,12 +1,14 @@
 
+#include "native_object_store.h"
+
+#include <ray/api/ray_exception.h>
+
 #include <algorithm>
 #include <chrono>
 #include <list>
 #include <thread>
 
-#include <ray/api/ray_exception.h>
 #include "../abstract_ray_runtime.h"
-#include "native_object_store.h"
 
 namespace ray {
 namespace api {
diff --git a/cpp/src/ray/runtime/object/native_object_store.h b/cpp/src/ray/runtime/object/native_object_store.h
index 0f633452c..5e0a5a1f9 100644
--- a/cpp/src/ray/runtime/object/native_object_store.h
+++ b/cpp/src/ray/runtime/object/native_object_store.h
@@ -2,10 +2,10 @@
 #pragma once
 
 #include <unordered_map>
-#include "ray/core.h"
 
 #include "../native_ray_runtime.h"
 #include "object_store.h"
+#include "ray/core.h"
 
 namespace ray {
 namespace api {
diff --git a/cpp/src/ray/runtime/object/object_store.h b/cpp/src/ray/runtime/object/object_store.h
index cc7839df4..856ede5d7 100644
--- a/cpp/src/ray/runtime/object/object_store.h
+++ b/cpp/src/ray/runtime/object/object_store.h
@@ -1,9 +1,9 @@
 
 #pragma once
 
-#include <memory>
-
 #include <ray/api/wait_result.h>
+
+#include <memory>
 #include <msgpack.hpp>
 
 namespace ray {
diff --git a/cpp/src/ray/runtime/task/invocation_spec.h b/cpp/src/ray/runtime/task/invocation_spec.h
index 9aa80a33b..b131f2fd0 100644
--- a/cpp/src/ray/runtime/task/invocation_spec.h
+++ b/cpp/src/ray/runtime/task/invocation_spec.h
@@ -2,6 +2,7 @@
 #pragma once
 
 #include <msgpack.hpp>
+
 #include "ray/core.h"
 
 namespace ray {
diff --git a/cpp/src/ray/runtime/task/local_mode_task_submitter.cc b/cpp/src/ray/runtime/task/local_mode_task_submitter.cc
index 37e20fc10..3e7864f63 100644
--- a/cpp/src/ray/runtime/task/local_mode_task_submitter.cc
+++ b/cpp/src/ray/runtime/task/local_mode_task_submitter.cc
@@ -1,11 +1,13 @@
 
+#include "local_mode_task_submitter.h"
+
+#include <ray/api/ray_exception.h>
+
 #include <boost/asio/post.hpp>
 #include <memory>
 
-#include <ray/api/ray_exception.h>
 #include "../../util/address_helper.h"
 #include "../abstract_ray_runtime.h"
-#include "local_mode_task_submitter.h"
 
 namespace ray {
 namespace api {
diff --git a/cpp/src/ray/runtime/task/local_mode_task_submitter.h b/cpp/src/ray/runtime/task/local_mode_task_submitter.h
index 858d2d989..e2470df62 100644
--- a/cpp/src/ray/runtime/task/local_mode_task_submitter.h
+++ b/cpp/src/ray/runtime/task/local_mode_task_submitter.h
@@ -3,6 +3,7 @@
 #include <boost/asio/thread_pool.hpp>
 #include <memory>
 #include <queue>
+
 #include "../local_mode_ray_runtime.h"
 #include "absl/synchronization/mutex.h"
 #include "invocation_spec.h"
diff --git a/cpp/src/ray/runtime/task/native_task_submitter.cc b/cpp/src/ray/runtime/task/native_task_submitter.cc
index a929aaece..a162035b3 100644
--- a/cpp/src/ray/runtime/task/native_task_submitter.cc
+++ b/cpp/src/ray/runtime/task/native_task_submitter.cc
@@ -1,5 +1,7 @@
 #include "native_task_submitter.h"
+
 #include <ray/api/ray_exception.h>
+
 #include "../../util/address_helper.h"
 #include "../abstract_ray_runtime.h"
 
diff --git a/cpp/src/ray/runtime/task/native_task_submitter.h b/cpp/src/ray/runtime/task/native_task_submitter.h
index 5131b5c7b..3812f3d47 100644
--- a/cpp/src/ray/runtime/task/native_task_submitter.h
+++ b/cpp/src/ray/runtime/task/native_task_submitter.h
@@ -3,6 +3,7 @@
 #include <boost/asio/thread_pool.hpp>
 #include <memory>
 #include <queue>
+
 #include "../native_ray_runtime.h"
 #include "invocation_spec.h"
 #include "ray/core.h"
diff --git a/cpp/src/ray/runtime/task/task_executor.cc b/cpp/src/ray/runtime/task/task_executor.cc
index 23f7b6657..cdbda5a1d 100644
--- a/cpp/src/ray/runtime/task/task_executor.cc
+++ b/cpp/src/ray/runtime/task/task_executor.cc
@@ -1,9 +1,10 @@
 
+#include "task_executor.h"
+
 #include <memory>
 
 #include "../../util/address_helper.h"
 #include "../abstract_ray_runtime.h"
-#include "task_executor.h"
 
 namespace ray {
 namespace api {
diff --git a/cpp/src/ray/runtime/task/task_executor.h b/cpp/src/ray/runtime/task/task_executor.h
index f7a8675a1..40e7acf3c 100644
--- a/cpp/src/ray/runtime/task/task_executor.h
+++ b/cpp/src/ray/runtime/task/task_executor.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <memory>
+
 #include "absl/synchronization/mutex.h"
 #include "invocation_spec.h"
 #include "ray/core.h"
diff --git a/cpp/src/ray/runtime/task/task_submitter.h b/cpp/src/ray/runtime/task/task_submitter.h
index 6352608a2..36e6555c3 100644
--- a/cpp/src/ray/runtime/task/task_submitter.h
+++ b/cpp/src/ray/runtime/task/task_submitter.h
@@ -1,8 +1,9 @@
 #pragma once
 
+#include <ray/api/ray_runtime.h>
+
 #include <memory>
 
-#include <ray/api/ray_runtime.h>
 #include "invocation_spec.h"
 
 namespace ray {
diff --git a/cpp/src/ray/test/api_test.cc b/cpp/src/ray/test/api_test.cc
index f10b68a79..91eeee00b 100644
--- a/cpp/src/ray/test/api_test.cc
+++ b/cpp/src/ray/test/api_test.cc
@@ -1,6 +1,7 @@
 
 #include <gtest/gtest.h>
 #include <ray/api.h>
+
 #include <future>
 #include <thread>
 
diff --git a/cpp/src/ray/test/slow_function_test.cc b/cpp/src/ray/test/slow_function_test.cc
index 7fceace71..8bc5e555a 100644
--- a/cpp/src/ray/test/slow_function_test.cc
+++ b/cpp/src/ray/test/slow_function_test.cc
@@ -1,6 +1,7 @@
 
 #include <gtest/gtest.h>
 #include <ray/api.h>
+
 #include <chrono>
 #include <thread>
 
diff --git a/doc/examples/newsreader/server.py b/doc/examples/newsreader/server.py
index 97222d620..fa9434966 100644
--- a/doc/examples/newsreader/server.py
+++ b/doc/examples/newsreader/server.py
@@ -9,7 +9,6 @@ import ray
 
 @ray.remote
 class NewsServer(object):
-
     def __init__(self):
         self.conn = sqlite3.connect("newsreader.db")
         c = self.conn.cursor()
@@ -25,29 +24,36 @@ class NewsServer(object):
         items = []
         c = self.conn.cursor()
         for item in feed.items:
-            items.append({"title": item.title,
-                          "link": item.link,
-                          "description": item.description,
-                          "description_text": item.description,
-                          "pubDate": str(item.pub_date)})
-            c.execute("""INSERT INTO news (title, link, description,
+            items.append({
+                "title": item.title,
+                "link": item.link,
+                "description": item.description,
+                "description_text": item.description,
+                "pubDate": str(item.pub_date)
+            })
+            c.execute(
+                """INSERT INTO news (title, link, description,
                          published, feed, liked) values
-                         (?, ?, ?, ?, ?, ?)""", (
-                         item.title, item.link, item.description,
-                         item.pub_date, feed.link, False))
+                         (?, ?, ?, ?, ?, ?)""",
+                (item.title, item.link, item.description, item.pub_date,
+                 feed.link, False))
         self.conn.commit()
 
-        return {"channel": {"title": feed.title,
-                            "link": feed.link,
-                            "url": feed.link},
-                "items": items}
+        return {
+            "channel": {
+                "title": feed.title,
+                "link": feed.link,
+                "url": feed.link
+            },
+            "items": items
+        }
 
     def like_item(self, url, is_faved):
         c = self.conn.cursor()
         if is_faved:
-            c.execute("UPDATE news SET liked = 1 WHERE link = ?", (url,))
+            c.execute("UPDATE news SET liked = 1 WHERE link = ?", (url, ))
         else:
-            c.execute("UPDATE news SET liked = 0 WHERE link = ?", (url,))
+            c.execute("UPDATE news SET liked = 0 WHERE link = ?", (url, ))
         self.conn.commit()
 
 
@@ -71,8 +77,9 @@ def dispatcher():
         result = ray.get(method.remote(*method_args))
         return jsonify(result)
     else:
-        return jsonify(
-            {"error": "method_name '" + method_name + "' not found"})
+        return jsonify({
+            "error": "method_name '" + method_name + "' not found"
+        })
 
 
 if __name__ == "__main__":
diff --git a/doc/examples/streaming/streaming.py b/doc/examples/streaming/streaming.py
index e4434bbe1..f5c9bd3fe 100644
--- a/doc/examples/streaming/streaming.py
+++ b/doc/examples/streaming/streaming.py
@@ -7,10 +7,13 @@ import ray
 import wikipedia
 
 parser = argparse.ArgumentParser()
-parser.add_argument("--num-mappers",
-                    help="number of mapper actors used", default=3, type=int)
-parser.add_argument("--num-reducers",
-                    help="number of reducer actors used", default=4, type=int)
+parser.add_argument(
+    "--num-mappers", help="number of mapper actors used", default=3, type=int)
+parser.add_argument(
+    "--num-reducers",
+    help="number of reducer actors used",
+    default=4,
+    type=int)
 
 
 @ray.remote
@@ -47,8 +50,10 @@ class Reducer(object):
         word_count_sum = defaultdict(lambda: 0)
         # Get the word counts for this Reducer's keys from all of the Mappers
         # and aggregate the results.
-        count_ids = [mapper.get_range.remote(article_index, self.keys)
-                     for mapper in self.mappers]
+        count_ids = [
+            mapper.get_range.remote(article_index, self.keys)
+            for mapper in self.mappers
+        ]
         # TODO(rkn): We should process these out of order using ray.wait.
         for count_id in count_ids:
             for k, v in ray.get(count_id):
@@ -78,8 +83,9 @@ if __name__ == "__main__":
             streams.append(Stream([line.strip() for line in f.readlines()]))
 
     # Partition the keys among the reducers.
-    chunks = np.array_split([chr(i) for i in range(ord("a"), ord("z") + 1)],
-                            args.num_reducers)
+    chunks = np.array_split([chr(i)
+                             for i in range(ord("a"),
+                                            ord("z") + 1)], args.num_reducers)
     keys = [[chunk[0], chunk[-1]] for chunk in chunks]
 
     # Create a number of mappers.
@@ -93,12 +99,14 @@ if __name__ == "__main__":
     while True:
         print("article index = {}".format(article_index))
         wordcounts = {}
-        counts = ray.get([reducer.next_reduce_result.remote(article_index)
-                          for reducer in reducers])
+        counts = ray.get([
+            reducer.next_reduce_result.remote(article_index)
+            for reducer in reducers
+        ])
         for count in counts:
             wordcounts.update(count)
-        most_frequent_words = heapq.nlargest(10, wordcounts,
-                                             key=wordcounts.get)
+        most_frequent_words = heapq.nlargest(
+            10, wordcounts, key=wordcounts.get)
         for word in most_frequent_words:
             print("  ", word, wordcounts[word])
         article_index += 1
diff --git a/doc/source/tune/_tutorials/tune-sklearn.py b/doc/source/tune/_tutorials/tune-sklearn.py
index d155ae5b4..7edd15938 100644
--- a/doc/source/tune/_tutorials/tune-sklearn.py
+++ b/doc/source/tune/_tutorials/tune-sklearn.py
@@ -68,10 +68,7 @@ parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}
 # As you can see, the setup here is exactly how you would do it for Scikit-Learn. Now, let's try fitting a model.
 
 tune_search = TuneGridSearchCV(
-    SGDClassifier(),
-    parameter_grid,
-    early_stopping=True,
-    max_iters=10)
+    SGDClassifier(), parameter_grid, early_stopping=True, max_iters=10)
 
 import time  # Just to compare fit times
 start = time.time()
diff --git a/python/ray/includes/metric.pxd b/python/ray/includes/metric.pxd
index 018c85231..5d705f0fd 100644
--- a/python/ray/includes/metric.pxd
+++ b/python/ray/includes/metric.pxd
@@ -11,13 +11,13 @@ cdef extern from "opencensus/tags/tag_key.h" nogil:
 cdef extern from "ray/stats/metric.h" nogil:
     cdef cppclass CMetric "ray::stats::Metric":
         CMetric(const c_string &name,
-               const c_string &description,
-               const c_string &unit,
-               const c_vector[CTagKey] &tag_keys)
+                const c_string &description,
+                const c_string &unit,
+                const c_vector[CTagKey] &tag_keys)
         c_string GetName() const
         void Record(double value)
-        void Record(double value, 
-                unordered_map[c_string, c_string] &tags)
+        void Record(double value,
+                    unordered_map[c_string, c_string] &tags)
 
     cdef cppclass CGauge "ray::stats::Gauge":
         CGauge(const c_string &name,
@@ -42,4 +42,4 @@ cdef extern from "ray/stats/metric.h" nogil:
                    const c_string &description,
                    const c_string &unit,
                    const c_vector[double] &boundaries,
-                   const c_vector[CTagKey] &tag_keys)
\ No newline at end of file
+                   const c_vector[CTagKey] &tag_keys)
diff --git a/rllib/agents/ddpg/ddpg_tf_policy.py b/rllib/agents/ddpg/ddpg_tf_policy.py
index 71f715170..12568e230 100644
--- a/rllib/agents/ddpg/ddpg_tf_policy.py
+++ b/rllib/agents/ddpg/ddpg_tf_policy.py
@@ -126,9 +126,8 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
         target_noise_clip = policy.config["target_noise_clip"]
         clipped_normal_sample = tf.clip_by_value(
             tf.random.normal(
-                tf.shape(policy_tp1),
-                stddev=policy.config["target_noise"]), -target_noise_clip,
-            target_noise_clip)
+                tf.shape(policy_tp1), stddev=policy.config["target_noise"]),
+            -target_noise_clip, target_noise_clip)
         policy_tp1_smoothed = tf.clip_by_value(
             policy_tp1 + clipped_normal_sample,
             policy.action_space.low * tf.ones_like(policy_tp1),
@@ -146,8 +145,8 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
     q_t_det_policy = model.get_q_values(model_out_t, policy_t)
 
     if twin_q:
-        twin_q_t = model.get_twin_q_values(
-            model_out_t, train_batch[SampleBatch.ACTIONS])
+        twin_q_t = model.get_twin_q_values(model_out_t,
+                                           train_batch[SampleBatch.ACTIONS])
 
     # Target q-net(s) evaluation.
     q_tp1 = policy.target_model.get_q_values(target_model_out_tp1,
@@ -278,11 +277,11 @@ def gradients_fn(policy, optimizer, loss):
     if policy.config["framework"] in ["tf2", "tfe"]:
         tape = optimizer.tape
         pol_weights = policy.model.policy_variables()
-        actor_grads_and_vars = list(zip(tape.gradient(
-            policy.actor_loss, pol_weights), pol_weights))
+        actor_grads_and_vars = list(
+            zip(tape.gradient(policy.actor_loss, pol_weights), pol_weights))
         q_weights = policy.model.q_variables()
-        critic_grads_and_vars = list(zip(tape.gradient(
-            policy.critic_loss, q_weights), q_weights))
+        critic_grads_and_vars = list(
+            zip(tape.gradient(policy.critic_loss, q_weights), q_weights))
     else:
         actor_grads_and_vars = policy._actor_optimizer.compute_gradients(
             policy.actor_loss, var_list=policy.model.policy_variables())
@@ -296,10 +295,12 @@ def gradients_fn(policy, optimizer, loss):
         clip_func = tf.identity
 
     # Save grads and vars for later use in `build_apply_op`.
-    policy._actor_grads_and_vars = [
-        (clip_func(g), v) for (g, v) in actor_grads_and_vars if g is not None]
-    policy._critic_grads_and_vars = [
-        (clip_func(g), v) for (g, v) in critic_grads_and_vars if g is not None]
+    policy._actor_grads_and_vars = [(clip_func(g), v)
+                                    for (g, v) in actor_grads_and_vars
+                                    if g is not None]
+    policy._critic_grads_and_vars = [(clip_func(g), v)
+                                     for (g, v) in critic_grads_and_vars
+                                     if g is not None]
 
     grads_and_vars = policy._actor_grads_and_vars + \
         policy._critic_grads_and_vars
diff --git a/rllib/agents/ddpg/ddpg_torch_policy.py b/rllib/agents/ddpg/ddpg_torch_policy.py
index 1b3b1e078..68d91a5c9 100644
--- a/rllib/agents/ddpg/ddpg_torch_policy.py
+++ b/rllib/agents/ddpg/ddpg_torch_policy.py
@@ -65,8 +65,7 @@ def ddpg_actor_critic_loss(policy, model, _, train_batch):
             torch.normal(
                 mean=torch.zeros(policy_tp1.size()),
                 std=policy.config["target_noise"]).to(policy_tp1.device),
-            -target_noise_clip,
-            target_noise_clip)
+            -target_noise_clip, target_noise_clip)
 
         policy_tp1_smoothed = torch.min(
             torch.max(
diff --git a/rllib/agents/ddpg/tests/test_ddpg.py b/rllib/agents/ddpg/tests/test_ddpg.py
index 49c52e942..91b499207 100644
--- a/rllib/agents/ddpg/tests/test_ddpg.py
+++ b/rllib/agents/ddpg/tests/test_ddpg.py
@@ -405,7 +405,9 @@ class TestDDPG(unittest.TestCase):
         policy_t = sigmoid(2.0 * fc(
             relu(
                 fc(model_out_t, weights[ks[1]], weights[ks[0]], framework=fw)),
-            weights[ks[5]], weights[ks[4]], framework=fw))
+            weights[ks[5]],
+            weights[ks[4]],
+            framework=fw))
         # Get policy output for t+1 (target model).
         policy_tp1 = sigmoid(2.0 * fc(
             relu(
@@ -413,7 +415,9 @@ class TestDDPG(unittest.TestCase):
                    weights[ks[3]],
                    weights[ks[2]],
                    framework=fw)),
-            weights[ks[7]], weights[ks[6]], framework=fw))
+            weights[ks[7]],
+            weights[ks[6]],
+            framework=fw))
         # Assume no smooth target policy.
         policy_tp1_smoothed = policy_tp1
 
diff --git a/rllib/agents/dqn/distributional_q_tf_model.py b/rllib/agents/dqn/distributional_q_tf_model.py
index 61045ae1c..169fa1ed6 100644
--- a/rllib/agents/dqn/distributional_q_tf_model.py
+++ b/rllib/agents/dqn/distributional_q_tf_model.py
@@ -74,8 +74,7 @@ class DistributionalQTFModel(TFModelV2):
                 for i in range(len(q_hiddens)):
                     if use_noisy:
                         action_out = NoisyLayer(
-                            "{}hidden_{}".format(prefix, i),
-                            q_hiddens[i],
+                            "{}hidden_{}".format(prefix, i), q_hiddens[i],
                             sigma0)(action_out)
                     elif add_layer_norm:
                         action_out = tf.keras.layers.Dense(
@@ -135,8 +134,7 @@ class DistributionalQTFModel(TFModelV2):
             for i in range(len(q_hiddens)):
                 if use_noisy:
                     state_out = NoisyLayer(
-                        "{}dueling_hidden_{}".format(prefix, i),
-                        q_hiddens[i],
+                        "{}dueling_hidden_{}".format(prefix, i), q_hiddens[i],
                         sigma0)(state_out)
                 else:
                     state_out = tf.keras.layers.Dense(
@@ -160,8 +158,8 @@ class DistributionalQTFModel(TFModelV2):
         self.register_variables(self.q_value_head.variables)
 
         if dueling:
-            state_out = build_state_score(
-                name + "/state_value/", self.model_out)
+            state_out = build_state_score(name + "/state_value/",
+                                          self.model_out)
             self.state_value_head = tf.keras.Model(self.model_out, state_out)
             self.register_variables(self.state_value_head.variables)
 
diff --git a/rllib/agents/dqn/dqn_tf_policy.py b/rllib/agents/dqn/dqn_tf_policy.py
index 46ec8e28e..b2a30088a 100644
--- a/rllib/agents/dqn/dqn_tf_policy.py
+++ b/rllib/agents/dqn/dqn_tf_policy.py
@@ -231,8 +231,8 @@ def build_q_losses(policy, model, _, train_batch):
                 train_batch[SampleBatch.NEXT_OBS],
                 explore=False)
         q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1)
-        q_tp1_best_one_hot_selection = tf.one_hot(
-            q_tp1_best_using_online_net, policy.action_space.n)
+        q_tp1_best_one_hot_selection = tf.one_hot(q_tp1_best_using_online_net,
+                                                  policy.action_space.n)
         q_tp1_best = tf.reduce_sum(q_tp1 * q_tp1_best_one_hot_selection, 1)
         q_dist_tp1_best = tf.reduce_sum(
             q_dist_tp1 * tf.expand_dims(q_tp1_best_one_hot_selection, -1), 1)
@@ -246,9 +246,9 @@ def build_q_losses(policy, model, _, train_batch):
     policy.q_loss = QLoss(
         q_t_selected, q_logits_t_selected, q_tp1_best, q_dist_tp1_best,
         train_batch[PRIO_WEIGHTS], train_batch[SampleBatch.REWARDS],
-        tf.cast(train_batch[SampleBatch.DONES], tf.float32), config["gamma"],
-        config["n_step"], config["num_atoms"],
-        config["v_min"], config["v_max"])
+        tf.cast(train_batch[SampleBatch.DONES],
+                tf.float32), config["gamma"], config["n_step"],
+        config["num_atoms"], config["v_min"], config["v_max"])
 
     return policy.q_loss.loss
 
@@ -378,9 +378,8 @@ def postprocess_nstep_and_prio(policy, batch, other_agent=None, episode=None):
             batch[SampleBatch.CUR_OBS], batch[SampleBatch.ACTIONS],
             batch[SampleBatch.REWARDS], batch[SampleBatch.NEXT_OBS],
             batch[SampleBatch.DONES], batch[PRIO_WEIGHTS])
-        new_priorities = (
-            np.abs(convert_to_numpy(td_errors)) +
-            policy.config["prioritized_replay_eps"])
+        new_priorities = (np.abs(convert_to_numpy(td_errors)) +
+                          policy.config["prioritized_replay_eps"])
         batch.data[PRIO_WEIGHTS] = new_priorities
 
     return batch
diff --git a/rllib/agents/dqn/dqn_torch_model.py b/rllib/agents/dqn/dqn_torch_model.py
index d10c4f013..6fcf6f077 100644
--- a/rllib/agents/dqn/dqn_torch_model.py
+++ b/rllib/agents/dqn/dqn_torch_model.py
@@ -72,12 +72,16 @@ class DQNTorchModel(TorchModelV2, nn.Module):
                 advantage_module.add_module(
                     "dueling_A_{}".format(i),
                     NoisyLayer(
-                        ins, n, sigma0=self.sigma0,
+                        ins,
+                        n,
+                        sigma0=self.sigma0,
                         activation=dueling_activation))
                 value_module.add_module(
                     "dueling_V_{}".format(i),
                     NoisyLayer(
-                        ins, n, sigma0=self.sigma0,
+                        ins,
+                        n,
+                        sigma0=self.sigma0,
                         activation=dueling_activation))
             else:
                 advantage_module.add_module(
@@ -88,25 +92,26 @@ class DQNTorchModel(TorchModelV2, nn.Module):
                     SlimFC(ins, n, activation_fn=dueling_activation))
                 # Add LayerNorm after each Dense.
                 if add_layer_norm:
-                    advantage_module.add_module(
-                        "LayerNorm_A_{}".format(i), nn.LayerNorm(n))
-                    value_module.add_module(
-                        "LayerNorm_V_{}".format(i), nn.LayerNorm(n))
+                    advantage_module.add_module("LayerNorm_A_{}".format(i),
+                                                nn.LayerNorm(n))
+                    value_module.add_module("LayerNorm_V_{}".format(i),
+                                            nn.LayerNorm(n))
             ins = n
 
         # Actual Advantages layer (nodes=num-actions).
         if use_noisy:
-            advantage_module.add_module("A", NoisyLayer(
-                ins,
-                self.action_space.n * self.num_atoms,
-                sigma0,
-                activation=None))
+            advantage_module.add_module(
+                "A",
+                NoisyLayer(
+                    ins,
+                    self.action_space.n * self.num_atoms,
+                    sigma0,
+                    activation=None))
         elif q_hiddens:
             advantage_module.add_module(
                 "A",
                 SlimFC(
-                    ins, action_space.n * self.num_atoms,
-                    activation_fn=None))
+                    ins, action_space.n * self.num_atoms, activation_fn=None))
 
         self.advantage_module = advantage_module
 
diff --git a/rllib/agents/dqn/dqn_torch_policy.py b/rllib/agents/dqn/dqn_torch_policy.py
index 2965d1ac2..d1669b73b 100644
--- a/rllib/agents/dqn/dqn_torch_policy.py
+++ b/rllib/agents/dqn/dqn_torch_policy.py
@@ -212,8 +212,8 @@ def build_q_losses(policy, model, _, train_batch):
         is_training=True)
 
     # Q scores for actions which we know were selected in the given state.
-    one_hot_selection = F.one_hot(
-        train_batch[SampleBatch.ACTIONS], policy.action_space.n)
+    one_hot_selection = F.one_hot(train_batch[SampleBatch.ACTIONS],
+                                  policy.action_space.n)
     q_t_selected = torch.sum(
         torch.where(q_t > -float("inf"), q_t, torch.tensor(0.0)) *
         one_hot_selection, 1)
@@ -230,8 +230,8 @@ def build_q_losses(policy, model, _, train_batch):
                 explore=False,
                 is_training=True)
         q_tp1_best_using_online_net = torch.argmax(q_tp1_using_online_net, 1)
-        q_tp1_best_one_hot_selection = F.one_hot(
-            q_tp1_best_using_online_net, policy.action_space.n)
+        q_tp1_best_one_hot_selection = F.one_hot(q_tp1_best_using_online_net,
+                                                 policy.action_space.n)
         q_tp1_best = torch.sum(
             torch.where(q_tp1 > -float("inf"), q_tp1, torch.tensor(0.0)) *
             q_tp1_best_one_hot_selection, 1)
@@ -250,8 +250,8 @@ def build_q_losses(policy, model, _, train_batch):
         q_t_selected, q_logits_t_selected, q_tp1_best, q_probs_tp1_best,
         train_batch[PRIO_WEIGHTS], train_batch[SampleBatch.REWARDS],
         train_batch[SampleBatch.DONES].float(), config["gamma"],
-        config["n_step"], config["num_atoms"],
-        config["v_min"], config["v_max"])
+        config["n_step"], config["num_atoms"], config["v_min"],
+        config["v_max"])
 
     return policy.q_loss.loss
 
diff --git a/rllib/agents/impala/vtrace_tf.py b/rllib/agents/impala/vtrace_tf.py
index ab184a236..19b3d3472 100644
--- a/rllib/agents/impala/vtrace_tf.py
+++ b/rllib/agents/impala/vtrace_tf.py
@@ -222,10 +222,12 @@ def multi_from_logits(behaviour_policy_logits,
         behaviour_policy_logits[i].shape.assert_has_rank(3)
         target_policy_logits[i].shape.assert_has_rank(3)
 
-    with tf1.name_scope(name, values=[
-        behaviour_policy_logits, target_policy_logits, actions,
-        discounts, rewards, values, bootstrap_value
-    ]):
+    with tf1.name_scope(
+            name,
+            values=[
+                behaviour_policy_logits, target_policy_logits, actions,
+                discounts, rewards, values, bootstrap_value
+            ]):
         target_action_log_probs = multi_log_probs_from_logits_and_actions(
             target_policy_logits, actions, dist_class, model)
 
@@ -330,16 +332,16 @@ def from_importance_weights(log_rhos,
     if clip_pg_rho_threshold is not None:
         clip_pg_rho_threshold.shape.assert_has_rank(0)
 
-    with tf1.name_scope(name, values=[
-        log_rhos, discounts, rewards, values, bootstrap_value
-    ]):
+    with tf1.name_scope(
+            name,
+            values=[log_rhos, discounts, rewards, values, bootstrap_value]):
         rhos = tf.math.exp(log_rhos)
         if clip_rho_threshold is not None:
             clipped_rhos = tf.minimum(
                 clip_rho_threshold, rhos, name="clipped_rhos")
 
-            tf1.summary.histogram(
-                    "clipped_rhos_1000", tf.minimum(1000.0, rhos))
+            tf1.summary.histogram("clipped_rhos_1000", tf.minimum(
+                1000.0, rhos))
             tf1.summary.scalar(
                 "num_of_clipped_rhos",
                 tf.reduce_sum(
diff --git a/rllib/agents/impala/vtrace_tf_policy.py b/rllib/agents/impala/vtrace_tf_policy.py
index a6e11a940..fb68617d3 100644
--- a/rllib/agents/impala/vtrace_tf_policy.py
+++ b/rllib/agents/impala/vtrace_tf_policy.py
@@ -259,13 +259,13 @@ def choose_optimizer(policy, config):
             return tf1.train.AdamOptimizer(policy.cur_lr)
     else:
         if tfv == 2:
-            return tf.keras.optimizers.RMSprop(
-                policy.cur_lr, config["decay"], config["momentum"],
-                config["epsilon"])
+            return tf.keras.optimizers.RMSprop(policy.cur_lr, config["decay"],
+                                               config["momentum"],
+                                               config["epsilon"])
         else:
-            return tf1.train.RMSPropOptimizer(
-                policy.cur_lr, config["decay"], config["momentum"],
-                config["epsilon"])
+            return tf1.train.RMSPropOptimizer(policy.cur_lr, config["decay"],
+                                              config["momentum"],
+                                              config["epsilon"])
 
 
 def clip_gradients(policy, optimizer, loss):
diff --git a/rllib/agents/marwil/marwil_tf_policy.py b/rllib/agents/marwil/marwil_tf_policy.py
index a148025a1..d9db41be1 100644
--- a/rllib/agents/marwil/marwil_tf_policy.py
+++ b/rllib/agents/marwil/marwil_tf_policy.py
@@ -40,23 +40,21 @@ class ReweightedImitationLoss:
         # update averaged advantage norm
         if policy.config["framework"] in ["tf2", "tfe"]:
             policy._ma_adv_norm.assign_add(
-                1e-6 * (tf.reduce_mean(
-                    tf.math.square(adv)) - policy._ma_adv_norm))
+                1e-6 *
+                (tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
             # Exponentially weighted advantages.
-            exp_advs = tf.math.exp(
-                beta * tf.math.divide(
-                    adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
+            exp_advs = tf.math.exp(beta * tf.math.divide(
+                adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
         else:
             update_adv_norm = tf1.assign_add(
                 ref=policy._ma_adv_norm,
-                value=1e-6 * (
-                    tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
+                value=1e-6 *
+                (tf.reduce_mean(tf.math.square(adv)) - policy._ma_adv_norm))
 
             # exponentially weighted advantages
             with tf1.control_dependencies([update_adv_norm]):
-                exp_advs = tf.math.exp(
-                    beta * tf.math.divide(
-                        adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
+                exp_advs = tf.math.exp(beta * tf.math.divide(
+                    adv, 1e-8 + tf.math.sqrt(policy._ma_adv_norm)))
 
         # log\pi_\theta(a|s)
         logprobs = action_dist.logp(actions)
diff --git a/rllib/agents/marwil/tests/test_marwil.py b/rllib/agents/marwil/tests/test_marwil.py
index 19dcefeb9..49a223da1 100644
--- a/rllib/agents/marwil/tests/test_marwil.py
+++ b/rllib/agents/marwil/tests/test_marwil.py
@@ -28,8 +28,8 @@ class TestMARWIL(unittest.TestCase):
         rllib_dir = Path(__file__).parent.parent.parent.parent
         print("rllib dir={}".format(rllib_dir))
         data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json")
-        print("data_file={} exists={}".format(
-            data_file, os.path.isfile(data_file)))
+        print("data_file={} exists={}".format(data_file,
+                                              os.path.isfile(data_file)))
 
         config = marwil.DEFAULT_CONFIG.copy()
         config["num_workers"] = 0  # Run locally.
diff --git a/rllib/agents/mbmpo/model_ensemble.py b/rllib/agents/mbmpo/model_ensemble.py
index 7caada9dd..bf37cff5a 100644
--- a/rllib/agents/mbmpo/model_ensemble.py
+++ b/rllib/agents/mbmpo/model_ensemble.py
@@ -59,6 +59,7 @@ class TDModel(nn.Module):
 
 
 if torch:
+
     class TDDataset(torch.utils.data.Dataset):
         def __init__(self, dataset: SampleBatchType, norms):
             self.count = dataset.count
diff --git a/rllib/agents/pg/pg_tf_policy.py b/rllib/agents/pg/pg_tf_policy.py
index 944556281..3b1aa8374 100644
--- a/rllib/agents/pg/pg_tf_policy.py
+++ b/rllib/agents/pg/pg_tf_policy.py
@@ -26,8 +26,8 @@ def pg_tf_loss(policy, model, dist_class, train_batch):
     logits, _ = model.from_batch(train_batch)
     action_dist = dist_class(logits, model)
     return -tf.reduce_mean(
-        action_dist.logp(train_batch[SampleBatch.ACTIONS]) *
-        tf.cast(train_batch[Postprocessing.ADVANTAGES], dtype=tf.float32))
+        action_dist.logp(train_batch[SampleBatch.ACTIONS]) * tf.cast(
+            train_batch[Postprocessing.ADVANTAGES], dtype=tf.float32))
 
 
 PGTFPolicy = build_tf_policy(
diff --git a/rllib/agents/pg/tests/test_pg.py b/rllib/agents/pg/tests/test_pg.py
index 3f4cb23cb..500e29cbd 100644
--- a/rllib/agents/pg/tests/test_pg.py
+++ b/rllib/agents/pg/tests/test_pg.py
@@ -77,13 +77,12 @@ class TestPG(unittest.TestCase):
                     feed_dict=policy._get_loss_inputs_dict(
                         train_batch, shuffle=False))
             else:
-                results = (
-                    pg.pg_tf_loss if fw in ["tf2", "tfe"] else pg.pg_torch_loss
-                )(
-                    policy,
-                    policy.model,
-                    dist_class=dist_cls,
-                    train_batch=train_batch)
+                results = (pg.pg_tf_loss
+                           if fw in ["tf2", "tfe"] else pg.pg_torch_loss)(
+                               policy,
+                               policy.model,
+                               dist_class=dist_cls,
+                               train_batch=train_batch)
 
             # Calculate expected results.
             if fw != "torch":
diff --git a/rllib/agents/ppo/tests/test_ppo.py b/rllib/agents/ppo/tests/test_ppo.py
index 13aa9a5d5..61afe95dd 100644
--- a/rllib/agents/ppo/tests/test_ppo.py
+++ b/rllib/agents/ppo/tests/test_ppo.py
@@ -17,7 +17,6 @@ from ray.rllib.utils.numpy import fc
 from ray.rllib.utils.test_utils import check, framework_iterator, \
     check_compute_single_action
 
-
 # Fake CartPole episode of n time steps.
 FAKE_BATCH = {
     SampleBatch.CUR_OBS: np.array(
diff --git a/rllib/agents/qmix/qmix_policy.py b/rllib/agents/qmix/qmix_policy.py
index 5633c2077..cc13ff46b 100644
--- a/rllib/agents/qmix/qmix_policy.py
+++ b/rllib/agents/qmix/qmix_policy.py
@@ -280,14 +280,14 @@ class QMixTorchPolicy(Policy):
             masked_q_values = q_values.clone()
             masked_q_values[avail == 0.0] = -float("inf")
             masked_q_values_folded = torch.reshape(
-                masked_q_values,
-                [-1] + list(masked_q_values.shape)[2:])
+                masked_q_values, [-1] + list(masked_q_values.shape)[2:])
             actions, _ = self.exploration.get_exploration_action(
                 action_distribution=TorchCategorical(masked_q_values_folded),
                 timestep=timestep,
                 explore=explore)
             actions = torch.reshape(
-                actions, list(masked_q_values.shape)[:-1]).cpu().numpy()
+                actions,
+                list(masked_q_values.shape)[:-1]).cpu().numpy()
             hiddens = [s.cpu().numpy() for s in hiddens]
 
         return tuple(actions.transpose([1, 0])), hiddens, {}
diff --git a/rllib/agents/sac/sac_tf_policy.py b/rllib/agents/sac/sac_tf_policy.py
index 3c52eba99..987b73ced 100644
--- a/rllib/agents/sac/sac_tf_policy.py
+++ b/rllib/agents/sac/sac_tf_policy.py
@@ -231,10 +231,8 @@ def sac_actor_critic_loss(policy, model, _, train_batch):
             y_true=q_t_selected_target, y_pred=q_t_selected)
     ]
     if policy.config["twin_q"]:
-        critic_loss.append(
-            0.5 * tf.keras.losses.MSE(
-                y_true=q_t_selected_target,
-                y_pred=twin_q_t_selected))
+        critic_loss.append(0.5 * tf.keras.losses.MSE(
+            y_true=q_t_selected_target, y_pred=twin_q_t_selected))
 
     # Alpha- and actor losses.
     # Note: In the papers, alpha is used directly, here we take the log.
@@ -281,25 +279,27 @@ def gradients_fn(policy, optimizer, loss):
     if policy.config["framework"] in ["tf2", "tfe"]:
         tape = optimizer.tape
         pol_weights = policy.model.policy_variables()
-        actor_grads_and_vars = list(zip(tape.gradient(
-            policy.actor_loss, pol_weights), pol_weights))
+        actor_grads_and_vars = list(
+            zip(tape.gradient(policy.actor_loss, pol_weights), pol_weights))
         q_weights = policy.model.q_variables()
         if policy.config["twin_q"]:
             half_cutoff = len(q_weights) // 2
-            grads_1 = tape.gradient(
-                policy.critic_loss[0], q_weights[:half_cutoff])
-            grads_2 = tape.gradient(
-                policy.critic_loss[1], q_weights[half_cutoff:])
+            grads_1 = tape.gradient(policy.critic_loss[0],
+                                    q_weights[:half_cutoff])
+            grads_2 = tape.gradient(policy.critic_loss[1],
+                                    q_weights[half_cutoff:])
             critic_grads_and_vars = \
                 list(zip(grads_1, q_weights[:half_cutoff])) + \
                 list(zip(grads_2, q_weights[half_cutoff:]))
         else:
-            critic_grads_and_vars = list(zip(tape.gradient(
-                policy.critic_loss[0], q_weights), q_weights))
+            critic_grads_and_vars = list(
+                zip(
+                    tape.gradient(policy.critic_loss[0], q_weights),
+                    q_weights))
 
         alpha_vars = [policy.model.log_alpha]
-        alpha_grads_and_vars = list(zip(tape.gradient(
-            policy.alpha_loss, alpha_vars), alpha_vars))
+        alpha_grads_and_vars = list(
+            zip(tape.gradient(policy.alpha_loss, alpha_vars), alpha_vars))
     # Tf1.x: Use optimizer.compute_gradients()
     else:
         actor_grads_and_vars = policy._actor_optimizer.compute_gradients(
@@ -327,12 +327,15 @@ def gradients_fn(policy, optimizer, loss):
         clip_func = tf.identity
 
     # Save grads and vars for later use in `build_apply_op`.
-    policy._actor_grads_and_vars = [
-        (clip_func(g), v) for (g, v) in actor_grads_and_vars if g is not None]
-    policy._critic_grads_and_vars = [
-        (clip_func(g), v) for (g, v) in critic_grads_and_vars if g is not None]
-    policy._alpha_grads_and_vars = [
-        (clip_func(g), v) for (g, v) in alpha_grads_and_vars if g is not None]
+    policy._actor_grads_and_vars = [(clip_func(g), v)
+                                    for (g, v) in actor_grads_and_vars
+                                    if g is not None]
+    policy._critic_grads_and_vars = [(clip_func(g), v)
+                                     for (g, v) in critic_grads_and_vars
+                                     if g is not None]
+    policy._alpha_grads_and_vars = [(clip_func(g), v)
+                                    for (g, v) in alpha_grads_and_vars
+                                    if g is not None]
 
     grads_and_vars = (
         policy._actor_grads_and_vars + policy._critic_grads_and_vars +
@@ -391,15 +394,13 @@ class ActorCriticOptimizerMixin:
             self._actor_optimizer = tf.keras.optimizers.Adam(
                 learning_rate=config["optimization"]["actor_learning_rate"])
             self._critic_optimizer = [
-                tf.keras.optimizers.Adam(
-                    learning_rate=config["optimization"][
-                        "critic_learning_rate"])
+                tf.keras.optimizers.Adam(learning_rate=config["optimization"][
+                    "critic_learning_rate"])
             ]
             if config["twin_q"]:
                 self._critic_optimizer.append(
-                    tf.keras.optimizers.Adam(
-                        learning_rate=config["optimization"][
-                            "critic_learning_rate"]))
+                    tf.keras.optimizers.Adam(learning_rate=config[
+                        "optimization"]["critic_learning_rate"]))
             self._alpha_optimizer = tf.keras.optimizers.Adam(
                 learning_rate=config["optimization"]["entropy_learning_rate"])
         else:
@@ -407,15 +408,13 @@ class ActorCriticOptimizerMixin:
             self._actor_optimizer = tf1.train.AdamOptimizer(
                 learning_rate=config["optimization"]["actor_learning_rate"])
             self._critic_optimizer = [
-                tf1.train.AdamOptimizer(
-                    learning_rate=config["optimization"][
-                        "critic_learning_rate"])
+                tf1.train.AdamOptimizer(learning_rate=config["optimization"][
+                    "critic_learning_rate"])
             ]
             if config["twin_q"]:
                 self._critic_optimizer.append(
-                    tf1.train.AdamOptimizer(
-                        learning_rate=config["optimization"][
-                            "critic_learning_rate"]))
+                    tf1.train.AdamOptimizer(learning_rate=config[
+                        "optimization"]["critic_learning_rate"]))
             self._alpha_optimizer = tf1.train.AdamOptimizer(
                 learning_rate=config["optimization"]["entropy_learning_rate"])
 
diff --git a/rllib/evaluation/sample_batch_builder.py b/rllib/evaluation/sample_batch_builder.py
index bf1888d07..ae68d2f5a 100644
--- a/rllib/evaluation/sample_batch_builder.py
+++ b/rllib/evaluation/sample_batch_builder.py
@@ -27,6 +27,7 @@ def to_float_array(v: List[Any]) -> np.ndarray:
 
 # TODO(sven): Remove the following class once we switch to trajectory view API.
 
+
 @PublicAPI
 class SampleBatchBuilder:
     """Util to build a SampleBatch incrementally.
@@ -76,6 +77,7 @@ class SampleBatchBuilder:
 
 # TODO(sven): Remove the following class once we switch to trajectory view API.
 
+
 @DeveloperAPI
 class MultiAgentSampleBatchBuilder:
     """Util to build SampleBatches for each policy in a multi-agent env.
diff --git a/rllib/evaluation/sample_collector.py b/rllib/evaluation/sample_collector.py
index 6880e8bd0..af17fae9d 100644
--- a/rllib/evaluation/sample_collector.py
+++ b/rllib/evaluation/sample_collector.py
@@ -60,12 +60,9 @@ class _SampleCollector(metaclass=ABCMeta):
         raise NotImplementedError
 
     @abstractmethod
-    def add_action_reward_next_obs(
-            self,
-            episode_id: EpisodeID,
-            agent_id: AgentID,
-            policy_id: PolicyID,
-            values: Dict[str, TensorType]) -> None:
+    def add_action_reward_next_obs(self, episode_id: EpisodeID,
+                                   agent_id: AgentID, policy_id: PolicyID,
+                                   values: Dict[str, TensorType]) -> None:
         """Add the given dictionary (row) of values to this collector.
 
         The incoming data (`values`) must include action, reward, done, and
diff --git a/rllib/evaluation/sampler.py b/rllib/evaluation/sampler.py
index 402b25769..0cb9f9b93 100644
--- a/rllib/evaluation/sampler.py
+++ b/rllib/evaluation/sampler.py
@@ -373,26 +373,26 @@ class AsyncSampler(threading.Thread, SamplerInput):
         return extra
 
 
-def _env_runner(worker: "RolloutWorker",
-                base_env: BaseEnv,
-                extra_batch_callback: Callable[[SampleBatchType], None],
-                policies: Dict[PolicyID, Policy],
-                policy_mapping_fn: Callable[[AgentID], PolicyID],
-                rollout_fragment_length: int,
-                horizon: int,
-                preprocessors: Dict[PolicyID, Preprocessor],
-                obs_filters: Dict[PolicyID, Filter],
-                clip_rewards: bool,
-                clip_actions: bool,
-                pack_multiple_episodes_in_batch: bool,
-                callbacks: "DefaultCallbacks",
-                tf_sess: Optional["tf.Session"],
-                perf_stats: _PerfStats,
-                soft_horizon: bool,
-                no_done_at_end: bool,
-                observation_fn: "ObservationFunction",
-                _use_trajectory_view_api: bool = False
-                ) -> Iterable[SampleBatchType]:
+def _env_runner(
+        worker: "RolloutWorker",
+        base_env: BaseEnv,
+        extra_batch_callback: Callable[[SampleBatchType], None],
+        policies: Dict[PolicyID, Policy],
+        policy_mapping_fn: Callable[[AgentID], PolicyID],
+        rollout_fragment_length: int,
+        horizon: int,
+        preprocessors: Dict[PolicyID, Preprocessor],
+        obs_filters: Dict[PolicyID, Filter],
+        clip_rewards: bool,
+        clip_actions: bool,
+        pack_multiple_episodes_in_batch: bool,
+        callbacks: "DefaultCallbacks",
+        tf_sess: Optional["tf.Session"],
+        perf_stats: _PerfStats,
+        soft_horizon: bool,
+        no_done_at_end: bool,
+        observation_fn: "ObservationFunction",
+        _use_trajectory_view_api: bool = False) -> Iterable[SampleBatchType]:
     """This implements the common experience collection logic.
 
     Args:
@@ -571,18 +571,23 @@ def _env_runner(worker: "RolloutWorker",
 
 
 def _process_observations(
-        worker: "RolloutWorker", base_env: BaseEnv,
+        worker: "RolloutWorker",
+        base_env: BaseEnv,
         policies: Dict[PolicyID, Policy],
         batch_builder_pool: List[MultiAgentSampleBatchBuilder],
         active_episodes: Dict[str, MultiAgentEpisode],
         unfiltered_obs: Dict[EnvID, Dict[AgentID, EnvObsType]],
         rewards: Dict[EnvID, Dict[AgentID, float]],
         dones: Dict[EnvID, Dict[AgentID, bool]],
-        infos: Dict[EnvID, Dict[AgentID, EnvInfoDict]], horizon: int,
+        infos: Dict[EnvID, Dict[AgentID, EnvInfoDict]],
+        horizon: int,
         preprocessors: Dict[PolicyID, Preprocessor],
-        obs_filters: Dict[PolicyID, Filter], rollout_fragment_length: int,
-        pack_multiple_episodes_in_batch: bool, callbacks: "DefaultCallbacks",
-        soft_horizon: bool, no_done_at_end: bool,
+        obs_filters: Dict[PolicyID, Filter],
+        rollout_fragment_length: int,
+        pack_multiple_episodes_in_batch: bool,
+        callbacks: "DefaultCallbacks",
+        soft_horizon: bool,
+        no_done_at_end: bool,
         observation_fn: "ObservationFunction",
         _use_trajectory_view_api: bool = False
 ) -> Tuple[Set[EnvID], Dict[PolicyID, List[PolicyEvalData]], List[Union[
@@ -931,8 +936,8 @@ def _do_policy_eval(
 def _process_policy_eval_results(
         *,
         to_eval: Dict[PolicyID, List[PolicyEvalData]],
-        eval_results: Dict[PolicyID, Tuple[
-            TensorStructType, StateBatch, dict]],
+        eval_results: Dict[PolicyID, Tuple[TensorStructType, StateBatch,
+                                           dict]],
         active_episodes: Dict[str, MultiAgentEpisode],
         active_envs: Set[int],
         off_policy_actions: MultiEnvDict,
diff --git a/rllib/examples/env/random_env.py b/rllib/examples/env/random_env.py
index 6167ec882..7f8a2ef18 100644
--- a/rllib/examples/env/random_env.py
+++ b/rllib/examples/env/random_env.py
@@ -52,9 +52,9 @@ class RandomEnv(gym.Env):
             done = True
         # Max not reached yet -> Sample done via p_done.
         else:
-            done = bool(np.random.choice(
-                [True, False], p=[self.p_done, 1.0 - self.p_done]
-            ))
+            done = bool(
+                np.random.choice(
+                    [True, False], p=[self.p_done, 1.0 - self.p_done]))
 
         return self.observation_space.sample(), \
             float(self.reward_space.sample()), done, {}
diff --git a/rllib/examples/policy/random_policy.py b/rllib/examples/policy/random_policy.py
index 6081768d4..0ae04627c 100644
--- a/rllib/examples/policy/random_policy.py
+++ b/rllib/examples/policy/random_policy.py
@@ -18,8 +18,10 @@ class RandomPolicy(Policy):
         if self.config.get("ignore_action_bounds", False) and \
                 isinstance(self.action_space, Box):
             self.action_space_for_sampling = Box(
-                -float("inf"), float("inf"),
-                shape=self.action_space.shape, dtype=self.action_space.dtype)
+                -float("inf"),
+                float("inf"),
+                shape=self.action_space.shape,
+                dtype=self.action_space.dtype)
         else:
             self.action_space_for_sampling = self.action_space
 
diff --git a/rllib/examples/rollout_worker_custom_workflow.py b/rllib/examples/rollout_worker_custom_workflow.py
index 830efba71..5fb412816 100644
--- a/rllib/examples/rollout_worker_custom_workflow.py
+++ b/rllib/examples/rollout_worker_custom_workflow.py
@@ -44,8 +44,8 @@ class CustomPolicy(Policy):
                         episodes=None,
                         **kwargs):
         # return random actions
-        return np.array([self.action_space.sample()
-                         for _ in obs_batch]), [], {}
+        return np.array(
+            [self.action_space.sample() for _ in obs_batch]), [], {}
 
     def learn_on_batch(self, samples):
         # implement your learning code here
diff --git a/rllib/execution/train_ops.py b/rllib/execution/train_ops.py
index 5a3fd0cf0..edc8ea934 100644
--- a/rllib/execution/train_ops.py
+++ b/rllib/execution/train_ops.py
@@ -138,10 +138,8 @@ class TrainTFMultiGPU:
         with self.workers.local_worker().tf_sess.graph.as_default():
             with self.workers.local_worker().tf_sess.as_default():
                 for policy_id in self.policies:
-                    policy = self.workers.local_worker().get_policy(
-                        policy_id)
-                    with tf1.variable_scope(
-                            policy_id, reuse=tf1.AUTO_REUSE):
+                    policy = self.workers.local_worker().get_policy(policy_id)
+                    with tf1.variable_scope(policy_id, reuse=tf1.AUTO_REUSE):
                         if policy._state_inputs:
                             rnn_inputs = policy._state_inputs + [
                                 policy._seq_lens
@@ -150,12 +148,10 @@ class TrainTFMultiGPU:
                             rnn_inputs = []
                         self.optimizers[policy_id] = (
                             LocalSyncParallelOptimizer(
-                                policy._optimizer,
-                                self.devices,
-                                [v for _, v in policy._loss_inputs],
-                                rnn_inputs,
-                                self.per_device_batch_size,
-                                policy.copy))
+                                policy._optimizer, self.devices,
+                                [v
+                                 for _, v in policy._loss_inputs], rnn_inputs,
+                                self.per_device_batch_size, policy.copy))
 
                 self.sess = self.workers.local_worker().tf_sess
                 self.sess.run(tf1.global_variables_initializer())
diff --git a/rllib/models/tf/layers/__init__.py b/rllib/models/tf/layers/__init__.py
index 887853895..68ae2ea53 100644
--- a/rllib/models/tf/layers/__init__.py
+++ b/rllib/models/tf/layers/__init__.py
@@ -6,9 +6,6 @@ from ray.rllib.models.tf.layers.skip_connection import SkipConnection
 from ray.rllib.models.tf.layers.multi_head_attention import MultiHeadAttention
 
 __all__ = [
-    "GRUGate",
-    "MultiHeadAttention",
-    "NoisyLayer",
-    "RelativeMultiHeadAttention",
-    "SkipConnection"
+    "GRUGate", "MultiHeadAttention", "NoisyLayer",
+    "RelativeMultiHeadAttention", "SkipConnection"
 ]
diff --git a/rllib/models/tf/layers/noisy_layer.py b/rllib/models/tf/layers/noisy_layer.py
index 9fa570db5..6e6a32c68 100644
--- a/rllib/models/tf/layers/noisy_layer.py
+++ b/rllib/models/tf/layers/noisy_layer.py
@@ -16,11 +16,7 @@ class NoisyLayer(tf.keras.layers.Layer if tf else object):
     vanish along the training procedure
     """
 
-    def __init__(self,
-                 prefix,
-                 out_size,
-                 sigma0,
-                 activation="relu"):
+    def __init__(self, prefix, out_size, sigma0, activation="relu"):
         """Initializes a NoisyLayer object.
 
         Args:
@@ -53,8 +49,7 @@ class NoisyLayer(tf.keras.layers.Layer if tf else object):
             trainable=True,
             tf_name=self.prefix + "_sigma_w",
             shape=[in_size, self.out_size],
-            dtype=tf.float32
-        )
+            dtype=tf.float32)
 
         self.sigma_b = get_variable(
             value=tf.keras.initializers.Constant(
diff --git a/rllib/models/tf/visionnet.py b/rllib/models/tf/visionnet.py
index 86136515d..e8278ebda 100644
--- a/rllib/models/tf/visionnet.py
+++ b/rllib/models/tf/visionnet.py
@@ -81,9 +81,9 @@ class VisionNetwork(TFModelV2):
                         "Given `conv_filters` ({}) do not result in a [B, 1, "
                         "1, {} (`num_outputs`)] shape (but in {})! Please "
                         "adjust your Conv2D stack such that the dims 1 and 2 "
-                        "are both 1.".format(
-                            self.model_config["conv_filters"],
-                            self.num_outputs, list(conv_out.shape)))
+                        "are both 1.".format(self.model_config["conv_filters"],
+                                             self.num_outputs,
+                                             list(conv_out.shape)))
 
             # num_outputs not known -> Flatten, then set self.num_outputs
             # to the resulting number of nodes.
diff --git a/rllib/models/torch/modules/noisy_layer.py b/rllib/models/torch/modules/noisy_layer.py
index 2e10d8e0d..67d92fc15 100644
--- a/rllib/models/torch/modules/noisy_layer.py
+++ b/rllib/models/torch/modules/noisy_layer.py
@@ -67,20 +67,22 @@ class NoisyLayer(nn.Module):
             trainable=True)
 
     def forward(self, inputs):
-        epsilon_in = self._f_epsilon(torch.normal(
-            mean=torch.zeros([self.in_size]),
-            std=torch.ones([self.in_size])))
-        epsilon_out = self._f_epsilon(torch.normal(
-            mean=torch.zeros([self.out_size]),
-            std=torch.ones([self.out_size])))
+        epsilon_in = self._f_epsilon(
+            torch.normal(
+                mean=torch.zeros([self.in_size]),
+                std=torch.ones([self.in_size])))
+        epsilon_out = self._f_epsilon(
+            torch.normal(
+                mean=torch.zeros([self.out_size]),
+                std=torch.ones([self.out_size])))
         epsilon_w = torch.matmul(
             torch.unsqueeze(epsilon_in, -1),
             other=torch.unsqueeze(epsilon_out, 0))
         epsilon_b = epsilon_out
 
         action_activation = torch.matmul(
-            inputs, self.w + self.sigma_w * epsilon_w
-        ) + self.b + self.sigma_b * epsilon_b
+            inputs, self.w +
+            self.sigma_w * epsilon_w) + self.b + self.sigma_b * epsilon_b
 
         if self.activation is not None:
             action_activation = self.activation(action_activation)
diff --git a/rllib/models/torch/visionnet.py b/rllib/models/torch/visionnet.py
index b9aff6a9e..ae54f0835 100644
--- a/rllib/models/torch/visionnet.py
+++ b/rllib/models/torch/visionnet.py
@@ -158,9 +158,8 @@ class VisionNetwork(TorchModelV2, nn.Module):
                     "Given `conv_filters` ({}) do not result in a [B, {} "
                     "(`num_outputs`), 1, 1] shape (but in {})! Please adjust "
                     "your Conv2D stack such that the last 2 dims are both "
-                    "1.".format(
-                        self.model_config["conv_filters"], self.num_outputs,
-                        list(conv_out.shape)))
+                    "1.".format(self.model_config["conv_filters"],
+                                self.num_outputs, list(conv_out.shape)))
             logits = conv_out.squeeze(3)
             logits = logits.squeeze(2)
 
diff --git a/rllib/policy/dynamic_tf_policy.py b/rllib/policy/dynamic_tf_policy.py
index 559c70ff2..722a53248 100644
--- a/rllib/policy/dynamic_tf_policy.py
+++ b/rllib/policy/dynamic_tf_policy.py
@@ -47,36 +47,36 @@ class DynamicTFPolicy(TFPolicy):
     """
 
     @DeveloperAPI
-    def __init__(self,
-                 obs_space: gym.spaces.Space,
-                 action_space: gym.spaces.Space,
-                 config: TrainerConfigDict,
-                 loss_fn: Callable[
-                     [Policy, ModelV2, type, SampleBatch], TensorType],
-                 *,
-                 stats_fn: Optional[Callable[[Policy, SampleBatch],
-                                             Dict[str, TensorType]]] = None,
-                 grad_stats_fn: Optional[Callable[
-                     [Policy, SampleBatch, ModelGradients],
-                     Dict[str, TensorType]]] = None,
-                 before_loss_init: Optional[Callable[
-                     [Policy, gym.spaces.Space, gym.spaces.Space,
-                      TrainerConfigDict], None]] = None,
-                 make_model: Optional[Callable[
-                     [Policy, gym.spaces.Space, gym.spaces.Space,
-                      TrainerConfigDict], ModelV2]] = None,
-                 action_sampler_fn: Optional[Callable[
-                     [TensorType, List[TensorType]], Tuple[
-                      TensorType, TensorType]]] = None,
-                 action_distribution_fn: Optional[Callable[
-                     [Policy, ModelV2, TensorType, TensorType, TensorType],
-                     Tuple[TensorType, type, List[TensorType]]]] = None,
-                 existing_inputs: Optional[Dict[
-                     str, "tf1.placeholder"]] = None,
-                 existing_model: Optional[ModelV2] = None,
-                 get_batch_divisibility_req: Optional[Callable[
-                     [Policy], int]] = None,
-                 obs_include_prev_action_reward: bool = True):
+    def __init__(
+            self,
+            obs_space: gym.spaces.Space,
+            action_space: gym.spaces.Space,
+            config: TrainerConfigDict,
+            loss_fn: Callable[[Policy, ModelV2, type, SampleBatch],
+                              TensorType],
+            *,
+            stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
+                str, TensorType]]] = None,
+            grad_stats_fn: Optional[Callable[[
+                Policy, SampleBatch, ModelGradients
+            ], Dict[str, TensorType]]] = None,
+            before_loss_init: Optional[Callable[[
+                Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
+            ], None]] = None,
+            make_model: Optional[Callable[[
+                Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
+            ], ModelV2]] = None,
+            action_sampler_fn: Optional[Callable[[
+                TensorType, List[TensorType]
+            ], Tuple[TensorType, TensorType]]] = None,
+            action_distribution_fn: Optional[Callable[[
+                Policy, ModelV2, TensorType, TensorType, TensorType
+            ], Tuple[TensorType, type, List[TensorType]]]] = None,
+            existing_inputs: Optional[Dict[str, "tf1.placeholder"]] = None,
+            existing_model: Optional[ModelV2] = None,
+            get_batch_divisibility_req: Optional[Callable[[Policy],
+                                                          int]] = None,
+            obs_include_prev_action_reward: bool = True):
         """Initialize a dynamic TF policy.
 
         Arguments:
diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py
index a8bf265e2..65da4e146 100644
--- a/rllib/policy/eager_tf_policy.py
+++ b/rllib/policy/eager_tf_policy.py
@@ -641,8 +641,8 @@ def build_eager_tf_policy(name,
                 dummy_batch["seq_lens"] = np.array([1], dtype=np.int32)
 
             # Convert everything to tensors.
-            dummy_batch = tf.nest.map_structure(
-                tf1.convert_to_tensor, dummy_batch)
+            dummy_batch = tf.nest.map_structure(tf1.convert_to_tensor,
+                                                dummy_batch)
 
             # for IMPALA which expects a certain sample batch size.
             def tile_to(tensor, n):
diff --git a/rllib/policy/policy.py b/rllib/policy/policy.py
index 15380e2d4..920d40a1b 100644
--- a/rllib/policy/policy.py
+++ b/rllib/policy/policy.py
@@ -46,11 +46,8 @@ class Policy(metaclass=ABCMeta):
     """
 
     @DeveloperAPI
-    def __init__(
-            self,
-            observation_space: gym.spaces.Space,
-            action_space: gym.spaces.Space,
-            config: TrainerConfigDict):
+    def __init__(self, observation_space: gym.spaces.Space,
+                 action_space: gym.spaces.Space, config: TrainerConfigDict):
         """Initialize the graph.
 
         This is the standard constructor for policies. The policy
@@ -181,9 +178,9 @@ class Policy(metaclass=ABCMeta):
             episodes = [episode]
         if state is not None:
             state_batch = [
-                s.unsqueeze(0) if torch and isinstance(s, torch.Tensor) else
-                np.expand_dims(s, 0)
-                for s in state
+                s.unsqueeze(0)
+                if torch and isinstance(s, torch.Tensor) else np.expand_dims(
+                    s, 0) for s in state
             ]
 
         out = self.compute_actions(
@@ -261,10 +258,10 @@ class Policy(metaclass=ABCMeta):
             actions: Union[List[TensorType], TensorType],
             obs_batch: Union[List[TensorType], TensorType],
             state_batches: Optional[List[TensorType]] = None,
-            prev_action_batch: Optional[
-                Union[List[TensorType], TensorType]] = None,
-            prev_reward_batch: Optional[
-                Union[List[TensorType], TensorType]] = None) -> TensorType:
+            prev_action_batch: Optional[Union[List[TensorType],
+                                              TensorType]] = None,
+            prev_reward_batch: Optional[Union[List[
+                TensorType], TensorType]] = None) -> TensorType:
         """Computes the log-prob/likelihood for a given action and observation.
 
         Args:
@@ -309,8 +306,8 @@ class Policy(metaclass=ABCMeta):
     def postprocess_trajectory(
             self,
             sample_batch: SampleBatch,
-            other_agent_batches: Optional[
-                Dict[AgentID, Tuple["Policy", SampleBatch]]] = None,
+            other_agent_batches: Optional[Dict[AgentID, Tuple[
+                "Policy", SampleBatch]]] = None,
             episode: Optional["MultiAgentEpisode"] = None) -> SampleBatch:
         """Implements algorithm-specific trajectory postprocessing.
 
diff --git a/rllib/policy/sample_batch.py b/rllib/policy/sample_batch.py
index 17cfb1e7f..37fecb002 100644
--- a/rllib/policy/sample_batch.py
+++ b/rllib/policy/sample_batch.py
@@ -305,10 +305,9 @@ class SampleBatch:
         self.data[key] = item
 
     @DeveloperAPI
-    def compress(
-            self,
-            bulk: bool = False,
-            columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
+    def compress(self,
+                 bulk: bool = False,
+                 columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
         """Compresses the data buffers (by column) in place.
 
         Args:
@@ -327,10 +326,9 @@ class SampleBatch:
                         [pack(o) for o in self.data[key]])
 
     @DeveloperAPI
-    def decompress_if_needed(
-            self,
-            columns: Set[str] = frozenset(
-                ["obs", "new_obs"])) -> "SampleBatch":
+    def decompress_if_needed(self,
+                             columns: Set[str] = frozenset(
+                                 ["obs", "new_obs"])) -> "SampleBatch":
         """Decompresses data buffers (per column if not compressed) in place.
 
         Args:
@@ -374,8 +372,7 @@ class MultiAgentBatch:
     """
 
     @PublicAPI
-    def __init__(self,
-                 policy_batches: Dict[PolicyID, SampleBatch],
+    def __init__(self, policy_batches: Dict[PolicyID, SampleBatch],
                  env_steps: int):
         """Initialize a MultiAgentBatch object.
 
@@ -541,11 +538,9 @@ class MultiAgentBatch:
         return sum(b.size_bytes() for b in self.policy_batches.values())
 
     @DeveloperAPI
-    def compress(
-            self,
-            bulk: bool = False,
-            columns: Set[str] = frozenset(
-                ["obs", "new_obs"])) -> None:
+    def compress(self,
+                 bulk: bool = False,
+                 columns: Set[str] = frozenset(["obs", "new_obs"])) -> None:
         """Compresses each policy batch (per column) in place.
 
         Args:
@@ -558,10 +553,9 @@ class MultiAgentBatch:
             batch.compress(bulk=bulk, columns=columns)
 
     @DeveloperAPI
-    def decompress_if_needed(
-            self,
-            columns: Set[str] = frozenset(
-                ["obs", "new_obs"])) -> "MultiAgentBatch":
+    def decompress_if_needed(self,
+                             columns: Set[str] = frozenset(
+                                 ["obs", "new_obs"])) -> "MultiAgentBatch":
         """Decompresses each policy batch (per column), if already compressed.
 
         Args:
diff --git a/rllib/policy/tests/test_trajectory_view_api.py b/rllib/policy/tests/test_trajectory_view_api.py
index 9d728f4b7..91e10650e 100644
--- a/rllib/policy/tests/test_trajectory_view_api.py
+++ b/rllib/policy/tests/test_trajectory_view_api.py
@@ -25,8 +25,9 @@ class TestTrajectoryViewAPI(unittest.TestCase):
             assert len(view_req_model) == 1
             assert len(view_req_policy) == 6
             for key in [
-                SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
-                SampleBatch.DONES, SampleBatch.NEXT_OBS, SampleBatch.VF_PREDS
+                    SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
+                    SampleBatch.DONES, SampleBatch.NEXT_OBS,
+                    SampleBatch.VF_PREDS
             ]:
                 assert key in view_req_policy
                 # None of the view cols has a special underlying data_col,
@@ -53,9 +54,10 @@ class TestTrajectoryViewAPI(unittest.TestCase):
             assert len(view_req_model) == 3  # obs, prev_a, prev_r
             assert len(view_req_policy) == 8
             for key in [
-                SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
-                SampleBatch.DONES, SampleBatch.NEXT_OBS, SampleBatch.VF_PREDS,
-                SampleBatch.PREV_ACTIONS, SampleBatch.PREV_REWARDS
+                    SampleBatch.OBS, SampleBatch.ACTIONS, SampleBatch.REWARDS,
+                    SampleBatch.DONES, SampleBatch.NEXT_OBS,
+                    SampleBatch.VF_PREDS, SampleBatch.PREV_ACTIONS,
+                    SampleBatch.PREV_REWARDS
             ]:
                 assert key in view_req_policy
 
@@ -65,9 +67,10 @@ class TestTrajectoryViewAPI(unittest.TestCase):
                 elif key == SampleBatch.PREV_REWARDS:
                     assert view_req_policy[key].data_col == SampleBatch.REWARDS
                     assert view_req_policy[key].shift == -1
-                elif key not in [SampleBatch.NEXT_OBS,
-                                 SampleBatch.PREV_ACTIONS,
-                                 SampleBatch.PREV_REWARDS]:
+                elif key not in [
+                        SampleBatch.NEXT_OBS, SampleBatch.PREV_ACTIONS,
+                        SampleBatch.PREV_REWARDS
+                ]:
                     assert view_req_policy[key].data_col is None
                 else:
                     assert view_req_policy[key].data_col == SampleBatch.OBS
diff --git a/rllib/policy/tf_policy.py b/rllib/policy/tf_policy.py
index 84154a43f..ac9b768f8 100644
--- a/rllib/policy/tf_policy.py
+++ b/rllib/policy/tf_policy.py
@@ -239,8 +239,7 @@ class TFPolicy(Policy):
         """Returns whether the loss function has been initialized."""
         return self._loss is not None
 
-    def _initialize_loss(self,
-                         loss: TensorType,
+    def _initialize_loss(self, loss: TensorType,
                          loss_inputs: List[Tuple[str, TensorType]]) -> None:
         """Initializes the loss op from given loss tensor and placeholders.
 
@@ -264,8 +263,10 @@ class TFPolicy(Policy):
             self._loss = loss
 
         self._optimizer = self.optimizer()
-        self._grads_and_vars = [(g, v) for (g, v) in self.gradients(
-            self._optimizer, self._loss) if g is not None]
+        self._grads_and_vars = [
+            (g, v) for (g, v) in self.gradients(self._optimizer, self._loss)
+            if g is not None
+        ]
         self._grads = [g for (g, v) in self._grads_and_vars]
 
         # TODO(sven/ekl): Deprecate support for v1 models.
@@ -336,10 +337,10 @@ class TFPolicy(Policy):
             actions: Union[List[TensorType], TensorType],
             obs_batch: Union[List[TensorType], TensorType],
             state_batches: Optional[List[TensorType]] = None,
-            prev_action_batch: Optional[
-                Union[List[TensorType], TensorType]] = None,
-            prev_reward_batch: Optional[
-                Union[List[TensorType], TensorType]] = None) -> TensorType:
+            prev_action_batch: Optional[Union[List[TensorType],
+                                              TensorType]] = None,
+            prev_reward_batch: Optional[Union[List[
+                TensorType], TensorType]] = None) -> TensorType:
 
         if self._log_likelihood is None:
             raise ValueError("Cannot compute log-prob/likelihood w/o a "
@@ -378,8 +379,8 @@ class TFPolicy(Policy):
 
     @override(Policy)
     @DeveloperAPI
-    def learn_on_batch(self, postprocessed_batch: SampleBatch) -> Dict[
-            str, TensorType]:
+    def learn_on_batch(
+            self, postprocessed_batch: SampleBatch) -> Dict[str, TensorType]:
         assert self.loss_initialized()
         builder = TFRunBuilder(self._sess, "learn_on_batch")
         fetches = self._build_learn_on_batch(builder, postprocessed_batch)
@@ -457,7 +458,8 @@ class TFPolicy(Policy):
 
     @override(Policy)
     @DeveloperAPI
-    def export_checkpoint(self, export_dir: str,
+    def export_checkpoint(self,
+                          export_dir: str,
                           filename_prefix: str = "model") -> None:
         """Export tensorflow checkpoint to export_dir."""
         try:
@@ -573,8 +575,7 @@ class TFPolicy(Policy):
             return tf1.train.AdamOptimizer()
 
     @DeveloperAPI
-    def gradients(self,
-                  optimizer: "tf.keras.optimizers.Optimizer",
+    def gradients(self, optimizer: "tf.keras.optimizers.Optimizer",
                   loss: TensorType) -> List[Tuple[TensorType, TensorType]]:
         """Override this for a custom gradient computation behavior.
 
@@ -816,8 +817,7 @@ class LearningRateSchedule:
 
     @DeveloperAPI
     def __init__(self, lr, lr_schedule):
-        self.cur_lr = tf1.get_variable(
-            "lr", initializer=lr, trainable=False)
+        self.cur_lr = tf1.get_variable("lr", initializer=lr, trainable=False)
         if lr_schedule is None:
             self.lr_schedule = ConstantSchedule(lr, framework=None)
         else:
@@ -843,7 +843,9 @@ class EntropyCoeffSchedule:
     @DeveloperAPI
     def __init__(self, entropy_coeff, entropy_coeff_schedule):
         self.entropy_coeff = get_variable(
-            entropy_coeff, framework="tf", tf_name="entropy_coeff",
+            entropy_coeff,
+            framework="tf",
+            tf_name="entropy_coeff",
             trainable=False)
 
         if entropy_coeff_schedule is None:
diff --git a/rllib/policy/tf_policy_template.py b/rllib/policy/tf_policy_template.py
index 910ef9c94..806e4fb80 100644
--- a/rllib/policy/tf_policy_template.py
+++ b/rllib/policy/tf_policy_template.py
@@ -13,58 +13,52 @@ from ray.rllib.utils.types import ModelGradients, TensorType, TrainerConfigDict
 
 
 @DeveloperAPI
-def build_tf_policy(name: str,
-                    *,
-                    loss_fn: Callable[
-                        [Policy, ModelV2, type, SampleBatch], TensorType],
-                    get_default_config: Optional[
-                        Callable[[None], TrainerConfigDict]] = None,
-                    postprocess_fn: Optional[Callable[
-                        [Policy, SampleBatch, List[SampleBatch],
-                         "MultiAgentEpisode"], None]] = None,
-                    stats_fn: Optional[Callable[
-                        [Policy, SampleBatch], Dict[str, TensorType]]] = None,
-                    optimizer_fn: Optional[Callable[
-                        [Policy, TrainerConfigDict],
-                        "tf.keras.optimizers.Optimizer"]] = None,
-                    gradients_fn: Optional[Callable[
-                        [Policy, "tf.keras.optimizers.Optimizer",
-                         TensorType], ModelGradients]] = None,
-                    apply_gradients_fn: Optional[Callable[
-                        [Policy, "tf.keras.optimizers.Optimizer",
-                         ModelGradients], "tf.Operation"]] = None,
-                    grad_stats_fn: Optional[Callable[
-                        [Policy, SampleBatch, ModelGradients],
-                        Dict[str, TensorType]]] = None,
-                    extra_action_fetches_fn: Optional[Callable[
-                        [Policy], Dict[str, TensorType]]] = None,
-                    extra_learn_fetches_fn: Optional[Callable[
-                        [Policy], Dict[str, TensorType]]] = None,
-                    validate_spaces: Optional[Callable[
-                        [Policy, gym.Space, gym.Space, TrainerConfigDict],
-                        None]] = None,
-                    before_init: Optional[Callable[
-                        [Policy, gym.Space, gym.Space, TrainerConfigDict],
-                        None]] = None,
-                    before_loss_init: Optional[Callable[
-                        [Policy, gym.spaces.Space, gym.spaces.Space,
-                         TrainerConfigDict], None]] = None,
-                    after_init: Optional[Callable[
-                        [Policy, gym.Space, gym.Space, TrainerConfigDict],
-                        None]] = None,
-                    make_model: Optional[Callable[
-                        [Policy, gym.spaces.Space, gym.spaces.Space,
-                         TrainerConfigDict], ModelV2]] = None,
-                    action_sampler_fn: Optional[Callable[
-                        [TensorType, List[TensorType]], Tuple[
-                            TensorType, TensorType]]] = None,
-                    action_distribution_fn: Optional[Callable[
-                        [Policy, ModelV2, TensorType, TensorType, TensorType],
-                        Tuple[TensorType, type, List[TensorType]]]] = None,
-                    mixins: Optional[List[type]] = None,
-                    get_batch_divisibility_req: Optional[Callable[
-                        [Policy], int]] = None,
-                    obs_include_prev_action_reward: bool = True):
+def build_tf_policy(
+        name: str,
+        *,
+        loss_fn: Callable[[Policy, ModelV2, type, SampleBatch], TensorType],
+        get_default_config: Optional[Callable[[None],
+                                              TrainerConfigDict]] = None,
+        postprocess_fn: Optional[Callable[[
+            Policy, SampleBatch, List[SampleBatch], "MultiAgentEpisode"
+        ], None]] = None,
+        stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
+            str, TensorType]]] = None,
+        optimizer_fn: Optional[Callable[[
+            Policy, TrainerConfigDict
+        ], "tf.keras.optimizers.Optimizer"]] = None,
+        gradients_fn: Optional[Callable[[
+            Policy, "tf.keras.optimizers.Optimizer", TensorType
+        ], ModelGradients]] = None,
+        apply_gradients_fn: Optional[Callable[[
+            Policy, "tf.keras.optimizers.Optimizer", ModelGradients
+        ], "tf.Operation"]] = None,
+        grad_stats_fn: Optional[Callable[[Policy, SampleBatch, ModelGradients],
+                                         Dict[str, TensorType]]] = None,
+        extra_action_fetches_fn: Optional[Callable[[Policy], Dict[
+            str, TensorType]]] = None,
+        extra_learn_fetches_fn: Optional[Callable[[Policy], Dict[
+            str, TensorType]]] = None,
+        validate_spaces: Optional[Callable[
+            [Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
+        before_init: Optional[Callable[
+            [Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
+        before_loss_init: Optional[Callable[[
+            Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
+        ], None]] = None,
+        after_init: Optional[Callable[
+            [Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
+        make_model: Optional[Callable[[
+            Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
+        ], ModelV2]] = None,
+        action_sampler_fn: Optional[Callable[[TensorType, List[
+            TensorType]], Tuple[TensorType, TensorType]]] = None,
+        action_distribution_fn: Optional[Callable[[
+            Policy, ModelV2, TensorType, TensorType, TensorType
+        ], Tuple[TensorType, type, List[TensorType]]]] = None,
+        mixins: Optional[List[type]] = None,
+        get_batch_divisibility_req: Optional[Callable[[Policy], int]] = None,
+        obs_include_prev_action_reward: bool = True):
     """Helper function for creating a dynamic tf policy at runtime.
 
     Functions will be run in this order to initialize the policy:
diff --git a/rllib/policy/torch_policy_template.py b/rllib/policy/torch_policy_template.py
index 8e8b44e0b..3c2fa2527 100644
--- a/rllib/policy/torch_policy_template.py
+++ b/rllib/policy/torch_policy_template.py
@@ -19,62 +19,51 @@ torch, _ = try_import_torch()
 
 
 @DeveloperAPI
-def build_torch_policy(name: str,
-                       *,
-                       loss_fn: Callable[
-                           [Policy, ModelV2, type, SampleBatch], TensorType],
-                       get_default_config: Optional[Callable[
-                           [], TrainerConfigDict]] = None,
-                       stats_fn: Optional[Callable[
-                           [Policy, SampleBatch],
-                           Dict[str, TensorType]]] = None,
-                       postprocess_fn: Optional[Callable[
-                            [Policy, SampleBatch, List[SampleBatch],
-                             "MultiAgentEpisode"], None]] = None,
-                       extra_action_out_fn: Optional[Callable[
-                           [Policy, Dict[str, TensorType], List[TensorType],
-                            ModelV2, TorchDistributionWrapper],
-                           Dict[str, TensorType]]] = None,
-                       extra_grad_process_fn: Optional[Callable[
-                           [Policy, "torch.optim.Optimizer", TensorType],
-                           Dict[str, TensorType]]] = None,
-                       # TODO: (sven) Replace "fetches" with "process".
-                       extra_learn_fetches_fn: Optional[Callable[
-                           [Policy], Dict[str, TensorType]]] = None,
-                       optimizer_fn: Optional[Callable[
-                           [Policy, TrainerConfigDict],
-                           "torch.optim.Optimizer"]] = None,
-                       validate_spaces: Optional[Callable[
-                           [Policy, gym.Space, gym.Space, TrainerConfigDict],
-                           None]] = None,
-                       before_init: Optional[Callable[
-                           [Policy, gym.Space, gym.Space, TrainerConfigDict],
-                           None]] = None,
-                       after_init: Optional[Callable[
-                           [Policy, gym.Space, gym.Space, TrainerConfigDict],
-                           None]] = None,
-                       action_sampler_fn: Optional[Callable[
-                           [TensorType, List[TensorType]], Tuple[
-                               TensorType, TensorType]]] = None,
-                       action_distribution_fn: Optional[Callable[
-                           [Policy, ModelV2, TensorType, TensorType,
-                            TensorType],
-                           Tuple[TensorType, type, List[TensorType]]]] = None,
-                       make_model: Optional[Callable[
-                           [Policy, gym.spaces.Space, gym.spaces.Space,
-                            TrainerConfigDict], ModelV2]] = None,
-                       make_model_and_action_dist: Optional[Callable[
-                           [Policy, gym.spaces.Space, gym.spaces.Space,
-                            TrainerConfigDict],
-                           Tuple[ModelV2, TorchDistributionWrapper]]] = None,
-                       apply_gradients_fn: Optional[Callable[
-                           [Policy, "torch.optim.Optimizer"], None]] = None,
-                       mixins: Optional[List[type]] = None,
-                       training_view_requirements_fn: Optional[Callable[
-                           [], Dict[str, ViewRequirement]]] = None,
-                       get_batch_divisibility_req: Optional[Callable[
-                           [Policy], int]] = None
-                       ):
+def build_torch_policy(
+        name: str,
+        *,
+        loss_fn: Callable[[Policy, ModelV2, type, SampleBatch], TensorType],
+        get_default_config: Optional[Callable[[], TrainerConfigDict]] = None,
+        stats_fn: Optional[Callable[[Policy, SampleBatch], Dict[
+            str, TensorType]]] = None,
+        postprocess_fn: Optional[Callable[[
+            Policy, SampleBatch, List[SampleBatch], "MultiAgentEpisode"
+        ], None]] = None,
+        extra_action_out_fn: Optional[Callable[[
+            Policy, Dict[str, TensorType], List[TensorType], ModelV2,
+            TorchDistributionWrapper
+        ], Dict[str, TensorType]]] = None,
+        extra_grad_process_fn: Optional[Callable[[
+            Policy, "torch.optim.Optimizer", TensorType
+        ], Dict[str, TensorType]]] = None,
+        # TODO: (sven) Replace "fetches" with "process".
+        extra_learn_fetches_fn: Optional[Callable[[Policy], Dict[
+            str, TensorType]]] = None,
+        optimizer_fn: Optional[Callable[[Policy, TrainerConfigDict],
+                                        "torch.optim.Optimizer"]] = None,
+        validate_spaces: Optional[Callable[
+            [Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
+        before_init: Optional[Callable[
+            [Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
+        after_init: Optional[Callable[
+            [Policy, gym.Space, gym.Space, TrainerConfigDict], None]] = None,
+        action_sampler_fn: Optional[Callable[[TensorType, List[
+            TensorType]], Tuple[TensorType, TensorType]]] = None,
+        action_distribution_fn: Optional[Callable[[
+            Policy, ModelV2, TensorType, TensorType, TensorType
+        ], Tuple[TensorType, type, List[TensorType]]]] = None,
+        make_model: Optional[Callable[[
+            Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
+        ], ModelV2]] = None,
+        make_model_and_action_dist: Optional[Callable[[
+            Policy, gym.spaces.Space, gym.spaces.Space, TrainerConfigDict
+        ], Tuple[ModelV2, TorchDistributionWrapper]]] = None,
+        apply_gradients_fn: Optional[Callable[
+            [Policy, "torch.optim.Optimizer"], None]] = None,
+        mixins: Optional[List[type]] = None,
+        training_view_requirements_fn: Optional[Callable[[], Dict[
+            str, ViewRequirement]]] = None,
+        get_batch_divisibility_req: Optional[Callable[[Policy], int]] = None):
     """Helper function for creating a torch policy class at runtime.
 
     Args:
diff --git a/rllib/tests/test_catalog.py b/rllib/tests/test_catalog.py
index 6298bf20d..cf0069dea 100644
--- a/rllib/tests/test_catalog.py
+++ b/rllib/tests/test_catalog.py
@@ -147,7 +147,7 @@ class ModelCatalogTest(unittest.TestCase):
         self.assertEqual(param_shape, action_space.shape)
 
         # test the class works as a distribution
-        dist_input = tf1.placeholder(tf.float32, (None,) + param_shape)
+        dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape)
         model = Model()
         model.model_config = model_config
         dist = dist_cls(dist_input, model=model)
@@ -161,7 +161,7 @@ class ModelCatalogTest(unittest.TestCase):
         dist_cls, param_shape = ModelCatalog.get_action_dist(
             action_space, model_config)
         self.assertEqual(param_shape, (3, ))
-        dist_input = tf1.placeholder(tf.float32, (None,) + param_shape)
+        dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape)
         model.model_config = model_config
         dist = dist_cls(dist_input, model=model)
         self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:])
diff --git a/rllib/tests/test_eager_support.py b/rllib/tests/test_eager_support.py
index 55b7cf97c..95e6c69fc 100644
--- a/rllib/tests/test_eager_support.py
+++ b/rllib/tests/test_eager_support.py
@@ -76,7 +76,10 @@ class TestEagerSupportPG(unittest.TestCase):
 
     def test_impala(self):
         check_support(
-            "IMPALA", {"num_workers": 1, "num_gpus": 0}, test_eager=True)
+            "IMPALA", {
+                "num_workers": 1,
+                "num_gpus": 0
+            }, test_eager=True)
 
 
 class TestEagerSupportOffPolicy(unittest.TestCase):
@@ -130,5 +133,6 @@ if __name__ == "__main__":
     # None for all unittest.TestCase classes in this file.
     import pytest
     class_ = sys.argv[1] if len(sys.argv) > 1 else None
-    sys.exit(pytest.main(
-        ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+    sys.exit(
+        pytest.main(
+            ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
diff --git a/rllib/tests/test_multi_agent_pendulum.py b/rllib/tests/test_multi_agent_pendulum.py
index 7afa9f624..4db4fd9ae 100644
--- a/rllib/tests/test_multi_agent_pendulum.py
+++ b/rllib/tests/test_multi_agent_pendulum.py
@@ -21,32 +21,34 @@ class TestMultiAgentPendulum(unittest.TestCase):
 
         # Test for both torch and tf.
         for fw in framework_iterator(frameworks=["torch", "tf"]):
-            trials = run_experiments({
-                "test": {
-                    "run": "PPO",
-                    "env": "multi_agent_pendulum",
-                    "stop": {
-                        "timesteps_total": 500000,
-                        "episode_reward_mean": -300.0,
-                    },
-                    "config": {
-                        "train_batch_size": 2048,
-                        "vf_clip_param": 10.0,
-                        "num_workers": 0,
-                        "num_envs_per_worker": 10,
-                        "lambda": 0.1,
-                        "gamma": 0.95,
-                        "lr": 0.0003,
-                        "sgd_minibatch_size": 64,
-                        "num_sgd_iter": 10,
-                        "model": {
-                            "fcnet_hiddens": [128, 128],
+            trials = run_experiments(
+                {
+                    "test": {
+                        "run": "PPO",
+                        "env": "multi_agent_pendulum",
+                        "stop": {
+                            "timesteps_total": 500000,
+                            "episode_reward_mean": -300.0,
                         },
-                        "batch_mode": "complete_episodes",
-                        "framework": fw,
-                    },
-                }
-            }, verbose=1)
+                        "config": {
+                            "train_batch_size": 2048,
+                            "vf_clip_param": 10.0,
+                            "num_workers": 0,
+                            "num_envs_per_worker": 10,
+                            "lambda": 0.1,
+                            "gamma": 0.95,
+                            "lr": 0.0003,
+                            "sgd_minibatch_size": 64,
+                            "num_sgd_iter": 10,
+                            "model": {
+                                "fcnet_hiddens": [128, 128],
+                            },
+                            "batch_mode": "complete_episodes",
+                            "framework": fw,
+                        },
+                    }
+                },
+                verbose=1)
             if trials[0].last_result["episode_reward_mean"] < -300.0:
                 raise ValueError("Did not get to -200 reward",
                                  trials[0].last_result)
diff --git a/rllib/tests/test_rollout_worker.py b/rllib/tests/test_rollout_worker.py
index 038d47e22..e2c8255a6 100644
--- a/rllib/tests/test_rollout_worker.py
+++ b/rllib/tests/test_rollout_worker.py
@@ -278,7 +278,7 @@ class TestRolloutWorker(unittest.TestCase):
 
     def test_action_clipping(self):
         from ray.rllib.examples.env.random_env import RandomEnv
-        action_space = gym.spaces.Box(-2.0, 1.0, (3,))
+        action_space = gym.spaces.Box(-2.0, 1.0, (3, ))
 
         # Clipping: True (clip between Policy's action_space.low/high),
         ev = RolloutWorker(
diff --git a/rllib/tests/test_supported_multi_agent.py b/rllib/tests/test_supported_multi_agent.py
index be4681a19..7e7eecc41 100644
--- a/rllib/tests/test_supported_multi_agent.py
+++ b/rllib/tests/test_supported_multi_agent.py
@@ -125,5 +125,6 @@ if __name__ == "__main__":
     # One can specify the specific TestCase class to run.
     # None for all unittest.TestCase classes in this file.
     class_ = sys.argv[1] if len(sys.argv) > 1 else None
-    sys.exit(pytest.main(
-        ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+    sys.exit(
+        pytest.main(
+            ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
diff --git a/rllib/tests/test_supported_spaces.py b/rllib/tests/test_supported_spaces.py
index 0dbfddefe..c8e14df72 100644
--- a/rllib/tests/test_supported_spaces.py
+++ b/rllib/tests/test_supported_spaces.py
@@ -203,5 +203,6 @@ if __name__ == "__main__":
     # One can specify the specific TestCase class to run.
     # None for all unittest.TestCase classes in this file.
     class_ = sys.argv[1] if len(sys.argv) > 1 else None
-    sys.exit(pytest.main(
-        ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
+    sys.exit(
+        pytest.main(
+            ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
diff --git a/rllib/utils/exploration/epsilon_greedy.py b/rllib/utils/exploration/epsilon_greedy.py
index edd347068..3558d73b3 100644
--- a/rllib/utils/exploration/epsilon_greedy.py
+++ b/rllib/utils/exploration/epsilon_greedy.py
@@ -99,8 +99,8 @@ class EpsilonGreedy(Exploration):
             tf.random.categorical(random_valid_action_logits, 1), axis=1)
 
         chose_random = tf.random.uniform(
-            tf.stack([batch_size]),
-            minval=0, maxval=1, dtype=tf.float32) < epsilon
+            tf.stack([batch_size]), minval=0, maxval=1,
+            dtype=tf.float32) < epsilon
 
         action = tf.cond(
             pred=tf.constant(explore, dtype=tf.bool)
diff --git a/rllib/utils/exploration/gaussian_noise.py b/rllib/utils/exploration/gaussian_noise.py
index 53046348a..e8c6a6ce8 100644
--- a/rllib/utils/exploration/gaussian_noise.py
+++ b/rllib/utils/exploration/gaussian_noise.py
@@ -154,7 +154,7 @@ class GaussianNoise(Exploration):
                 scale = self.scale_schedule(self.last_timestep)
                 gaussian_sample = scale * torch.normal(
                     mean=torch.zeros(det_actions.size()), std=self.stddev).to(
-                    self.device)
+                        self.device)
                 action = torch.min(
                     torch.max(
                         det_actions + gaussian_sample,
diff --git a/rllib/utils/exploration/tests/test_parameter_noise.py b/rllib/utils/exploration/tests/test_parameter_noise.py
index 9d7e7bcb4..db8c1e84d 100644
--- a/rllib/utils/exploration/tests/test_parameter_noise.py
+++ b/rllib/utils/exploration/tests/test_parameter_noise.py
@@ -9,23 +9,18 @@ from ray.rllib.utils.test_utils import check, framework_iterator
 class TestParameterNoise(unittest.TestCase):
     def test_ddpg_parameter_noise(self):
         self.do_test_parameter_noise_exploration(
-            ddpg.DDPGTrainer,
-            ddpg.DEFAULT_CONFIG,
-            "Pendulum-v0", {},
+            ddpg.DDPGTrainer, ddpg.DEFAULT_CONFIG, "Pendulum-v0", {},
             np.array([1.0, 0.0, -1.0]))
 
     def test_dqn_parameter_noise(self):
         self.do_test_parameter_noise_exploration(
-            dqn.DQNTrainer,
-            dqn.DEFAULT_CONFIG,
-            "FrozenLake-v0", {
+            dqn.DQNTrainer, dqn.DEFAULT_CONFIG, "FrozenLake-v0", {
                 "is_slippery": False,
                 "map_name": "4x4"
-            },
-            np.array(0))
+            }, np.array(0))
 
-    def do_test_parameter_noise_exploration(
-            self, trainer_cls, config, env, env_config, obs):
+    def do_test_parameter_noise_exploration(self, trainer_cls, config, env,
+                                            env_config, obs):
         """Tests, whether an Agent works with ParameterNoise."""
         core_config = config.copy()
         core_config["num_workers"] = 0  # Run locally.
diff --git a/rllib/utils/framework.py b/rllib/utils/framework.py
index 5763e8169..9346b792b 100644
--- a/rllib/utils/framework.py
+++ b/rllib/utils/framework.py
@@ -200,9 +200,13 @@ def get_variable(value,
             if isinstance(value, float) else tf.int32
             if isinstance(value, int) else None)
         return tf.compat.v1.get_variable(
-            tf_name, initializer=value, dtype=dtype, trainable=trainable,
-            **({} if shape is None else {"shape": shape})
-        )
+            tf_name,
+            initializer=value,
+            dtype=dtype,
+            trainable=trainable,
+            **({} if shape is None else {
+                "shape": shape
+            }))
     elif framework == "torch" and torch_tensor is True:
         torch, _ = try_import_torch()
         var_ = torch.from_numpy(value)
diff --git a/rllib/utils/schedules/tests/test_schedules.py b/rllib/utils/schedules/tests/test_schedules.py
index 16b52006b..3764d2de2 100644
--- a/rllib/utils/schedules/tests/test_schedules.py
+++ b/rllib/utils/schedules/tests/test_schedules.py
@@ -53,7 +53,8 @@ class TestSchedules(unittest.TestCase):
     def test_polynomial_schedule(self):
         ts = [0, 5, 10, 100, 90, 2, 1, 99, 23, 1000]
         expected = [
-            0.5 + (2.0 - 0.5) * (1.0 - min(t, 100) / 100)**2 for t in ts]
+            0.5 + (2.0 - 0.5) * (1.0 - min(t, 100) / 100)**2 for t in ts
+        ]
         config = dict(
             type="ray.rllib.utils.schedules.polynomial_schedule."
             "PolynomialSchedule",
diff --git a/rllib/utils/spaces/flexdict.py b/rllib/utils/spaces/flexdict.py
index f9bf5fdcd..de84ebe27 100644
--- a/rllib/utils/spaces/flexdict.py
+++ b/rllib/utils/spaces/flexdict.py
@@ -12,6 +12,7 @@ class FlexDict(gym.spaces.Dict):
        space['key'] = spaces.Box(4,)
     See also: documentation for gym.spaces.Dict
     """
+
     def __init__(self, spaces=None, **spaces_kwargs):
         err = "Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)"
         assert (spaces is None) or (not spaces_kwargs), err
diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py
index 9826c6a0a..007bed40a 100644
--- a/rllib/utils/test_utils.py
+++ b/rllib/utils/test_utils.py
@@ -288,8 +288,8 @@ def check_compute_single_action(trainer,
             method_to_test = trainer.compute_action
             # Get the obs-space from Workers.env (not Policy) due to possible
             # pre-processor up front.
-            worker_set = getattr(
-                trainer, "workers", getattr(trainer, "_workers", None))
+            worker_set = getattr(trainer, "workers",
+                                 getattr(trainer, "_workers", None))
             assert worker_set
             if isinstance(worker_set, list):
                 obs_space = trainer.get_policy().observation_space
diff --git a/rllib/utils/tf_ops.py b/rllib/utils/tf_ops.py
index 9db6a145d..5e429ea95 100644
--- a/rllib/utils/tf_ops.py
+++ b/rllib/utils/tf_ops.py
@@ -34,8 +34,8 @@ def minimize_and_clip(optimizer, objective, var_list, clip_val=10.0):
 
     if tf.executing_eagerly():
         tape = optimizer.tape
-        grads_and_vars = list(zip(list(
-            tape.gradient(objective, var_list)), var_list))
+        grads_and_vars = list(
+            zip(list(tape.gradient(objective, var_list)), var_list))
     else:
         grads_and_vars = optimizer.compute_gradients(
             objective, var_list=var_list)
diff --git a/src/ray/common/common_protocol.h b/src/ray/common/common_protocol.h
index d7dccf734..8ff224922 100644
--- a/src/ray/common/common_protocol.h
+++ b/src/ray/common/common_protocol.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <flatbuffers/flatbuffers.h>
+
 #include <unordered_set>
 
 #include "ray/common/id.h"
diff --git a/src/ray/common/id_test.cc b/src/ray/common/id_test.cc
index a436ed501..dbff2638e 100644
--- a/src/ray/common/id_test.cc
+++ b/src/ray/common/id_test.cc
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "gtest/gtest.h"
-
 #include "ray/common/common_protocol.h"
 #include "ray/common/task/task_spec.h"
 
diff --git a/src/ray/common/task/scheduling_resources.cc b/src/ray/common/task/scheduling_resources.cc
index fb69eb315..68503e846 100644
--- a/src/ray/common/task/scheduling_resources.cc
+++ b/src/ray/common/task/scheduling_resources.cc
@@ -4,7 +4,6 @@
 #include <sstream>
 
 #include "absl/container/flat_hash_map.h"
-
 #include "ray/common/bundle_spec.h"
 #include "ray/util/logging.h"
 
diff --git a/src/ray/common/task/task_execution_spec.cc b/src/ray/common/task/task_execution_spec.cc
index 3be3d9042..cf6422b58 100644
--- a/src/ray/common/task/task_execution_spec.cc
+++ b/src/ray/common/task/task_execution_spec.cc
@@ -1,7 +1,7 @@
-#include <sstream>
-
 #include "ray/common/task/task_execution_spec.h"
 
+#include <sstream>
+
 namespace ray {
 
 size_t TaskExecutionSpecification::NumForwards() const {
diff --git a/src/ray/common/test/client_connection_test.cc b/src/ray/common/test/client_connection_test.cc
index 431b651e4..77ded00ae 100644
--- a/src/ray/common/test/client_connection_test.cc
+++ b/src/ray/common/test/client_connection_test.cc
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "ray/common/client_connection.h"
+
 #include <boost/asio.hpp>
 #include <boost/asio/error.hpp>
 #include <list>
@@ -20,8 +22,6 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-#include "ray/common/client_connection.h"
-
 namespace ray {
 namespace raylet {
 
diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_actor_NativeActorHandle.cc b/src/ray/core_worker/lib/java/io_ray_runtime_actor_NativeActorHandle.cc
index b7b6d0ee1..d149e4313 100644
--- a/src/ray/core_worker/lib/java/io_ray_runtime_actor_NativeActorHandle.cc
+++ b/src/ray/core_worker/lib/java/io_ray_runtime_actor_NativeActorHandle.cc
@@ -16,11 +16,11 @@
 
 #include <jni.h>
 
+#include "jni_utils.h"
 #include "ray/common/id.h"
 #include "ray/core_worker/actor_handle.h"
 #include "ray/core_worker/common.h"
 #include "ray/core_worker/core_worker.h"
-#include "jni_utils.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_context_NativeWorkerContext.cc b/src/ray/core_worker/lib/java/io_ray_runtime_context_NativeWorkerContext.cc
index 6edc03d80..c0833817c 100644
--- a/src/ray/core_worker/lib/java/io_ray_runtime_context_NativeWorkerContext.cc
+++ b/src/ray/core_worker/lib/java/io_ray_runtime_context_NativeWorkerContext.cc
@@ -13,11 +13,13 @@
 // limitations under the License.
 
 #include "io_ray_runtime_context_NativeWorkerContext.h"
+
 #include <jni.h>
+
+#include "jni_utils.h"
 #include "ray/common/id.h"
 #include "ray/core_worker/context.h"
 #include "ray/core_worker/core_worker.h"
-#include "jni_utils.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_gcs_GlobalStateAccessor.cc b/src/ray/core_worker/lib/java/io_ray_runtime_gcs_GlobalStateAccessor.cc
index 25101c4c0..dfb8b7c65 100644
--- a/src/ray/core_worker/lib/java/io_ray_runtime_gcs_GlobalStateAccessor.cc
+++ b/src/ray/core_worker/lib/java/io_ray_runtime_gcs_GlobalStateAccessor.cc
@@ -16,8 +16,8 @@
 
 #include <jni.h>
 
-#include "ray/core_worker/common.h"
 #include "jni_utils.h"
+#include "ray/core_worker/common.h"
 #include "ray/gcs/gcs_client/global_state_accessor.h"
 
 #ifdef __cplusplus
diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_metric_NativeMetric.cc b/src/ray/core_worker/lib/java/io_ray_runtime_metric_NativeMetric.cc
index bacb089c3..9ea70cffe 100644
--- a/src/ray/core_worker/lib/java/io_ray_runtime_metric_NativeMetric.cc
+++ b/src/ray/core_worker/lib/java/io_ray_runtime_metric_NativeMetric.cc
@@ -13,13 +13,14 @@
 // limitations under the License.
 
 #include "io_ray_runtime_metric_NativeMetric.h"
-#include "jni_utils.h"
-#include "ray/stats/metric.h"
 
 #include <jni.h>
 
 #include <algorithm>
+
+#include "jni_utils.h"
 #include "opencensus/tags/tag_key.h"
+#include "ray/stats/metric.h"
 
 using TagKeyType = opencensus::tags::TagKey;
 using TagsType = std::vector<std::pair<opencensus::tags::TagKey, std::string>>;
diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_object_NativeObjectStore.cc b/src/ray/core_worker/lib/java/io_ray_runtime_object_NativeObjectStore.cc
index b806c369d..81bbaeea9 100644
--- a/src/ray/core_worker/lib/java/io_ray_runtime_object_NativeObjectStore.cc
+++ b/src/ray/core_worker/lib/java/io_ray_runtime_object_NativeObjectStore.cc
@@ -13,7 +13,9 @@
 // limitations under the License.
 
 #include "io_ray_runtime_object_NativeObjectStore.h"
+
 #include <jni.h>
+
 #include "jni_utils.h"
 #include "ray/common/id.h"
 #include "ray/core_worker/common.h"
diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_task_NativeTaskExecutor.cc b/src/ray/core_worker/lib/java/io_ray_runtime_task_NativeTaskExecutor.cc
index 851bb31c6..b1db4f451 100644
--- a/src/ray/core_worker/lib/java/io_ray_runtime_task_NativeTaskExecutor.cc
+++ b/src/ray/core_worker/lib/java/io_ray_runtime_task_NativeTaskExecutor.cc
@@ -13,11 +13,13 @@
 // limitations under the License.
 
 #include "io_ray_runtime_task_NativeTaskExecutor.h"
+
 #include <jni.h>
+
+#include "jni_utils.h"
 #include "ray/common/id.h"
 #include "ray/core_worker/common.h"
 #include "ray/core_worker/core_worker.h"
-#include "jni_utils.h"
 #include "ray/raylet_client/raylet_client.h"
 
 #ifdef __cplusplus
diff --git a/src/ray/core_worker/lib/java/jni_utils.h b/src/ray/core_worker/lib/java/jni_utils.h
index 0467ba856..f7bb7e2d3 100644
--- a/src/ray/core_worker/lib/java/jni_utils.h
+++ b/src/ray/core_worker/lib/java/jni_utils.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <jni.h>
+
 #include <algorithm>
 
 #include "ray/common/buffer.h"
@@ -346,7 +347,7 @@ inline jobject NativeVectorToJavaList(
       env->NewObject(java_array_list_class, java_array_list_init_with_capacity,
                      (jint)native_vector.size());
   RAY_CHECK_JAVA_EXCEPTION(env);
-  for (auto it = native_vector.begin(); it != native_vector.end(); ++it){
+  for (auto it = native_vector.begin(); it != native_vector.end(); ++it) {
     auto element = element_converter(env, *it);
     env->CallVoidMethod(java_list, java_list_add, element);
     RAY_CHECK_JAVA_EXCEPTION(env);
diff --git a/src/ray/core_worker/test/scheduling_queue_test.cc b/src/ray/core_worker/test/scheduling_queue_test.cc
index 98d38d3af..0c5da0dea 100644
--- a/src/ray/core_worker/test/scheduling_queue_test.cc
+++ b/src/ray/core_worker/test/scheduling_queue_test.cc
@@ -13,8 +13,8 @@
 // limitations under the License.
 
 #include <thread>
-#include "gtest/gtest.h"
 
+#include "gtest/gtest.h"
 #include "ray/common/test_util.h"
 #include "ray/core_worker/transport/direct_actor_transport.h"
 
diff --git a/src/ray/gcs/asio.h b/src/ray/gcs/asio.h
index 264672a86..873907c84 100644
--- a/src/ray/gcs/asio.h
+++ b/src/ray/gcs/asio.h
@@ -35,12 +35,12 @@
 #pragma once
 
 #include <stdio.h>
-#include <iostream>
-#include <string>
 
 #include <boost/asio.hpp>
 #include <boost/asio/error.hpp>
 #include <boost/bind.hpp>
+#include <iostream>
+#include <string>
 
 #include "ray/gcs/redis_async_context.h"
 
diff --git a/src/ray/gcs/gcs_client.h b/src/ray/gcs/gcs_client.h
index 33cb0dd0d..d88389c46 100644
--- a/src/ray/gcs/gcs_client.h
+++ b/src/ray/gcs/gcs_client.h
@@ -18,6 +18,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+
 #include "ray/common/status.h"
 #include "ray/gcs/accessor.h"
 #include "ray/util/logging.h"
diff --git a/src/ray/gcs/gcs_client/test/service_based_gcs_client_test.cc b/src/ray/gcs/gcs_client/test/service_based_gcs_client_test.cc
index a7f5141a2..c49044686 100644
--- a/src/ray/gcs/gcs_client/test/service_based_gcs_client_test.cc
+++ b/src/ray/gcs/gcs_client/test/service_based_gcs_client_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "ray/gcs/gcs_client/service_based_gcs_client.h"
+
 #include "gtest/gtest.h"
 #include "ray/common/test_util.h"
 #include "ray/gcs/gcs_client/service_based_accessor.h"
diff --git a/src/ray/gcs/gcs_server/gcs_redis_failure_detector.h b/src/ray/gcs/gcs_server/gcs_redis_failure_detector.h
index 8580c50ef..78425b183 100644
--- a/src/ray/gcs/gcs_server/gcs_redis_failure_detector.h
+++ b/src/ray/gcs/gcs_server/gcs_redis_failure_detector.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <boost/asio.hpp>
+
 #include "ray/gcs/redis_context.h"
 
 namespace ray {
diff --git a/src/ray/gcs/pubsub/test/gcs_pub_sub_test.cc b/src/ray/gcs/pubsub/test/gcs_pub_sub_test.cc
index 5e03dc2a7..1f96daffb 100644
--- a/src/ray/gcs/pubsub/test/gcs_pub_sub_test.cc
+++ b/src/ray/gcs/pubsub/test/gcs_pub_sub_test.cc
@@ -12,11 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "ray/gcs/pubsub/gcs_pub_sub.h"
+
 #include <memory>
 
 #include "gtest/gtest.h"
 #include "ray/common/test_util.h"
-#include "ray/gcs/pubsub/gcs_pub_sub.h"
 
 namespace ray {
 
diff --git a/src/ray/gcs/redis_async_context.h b/src/ray/gcs/redis_async_context.h
index 6647cc279..5d0a95b74 100644
--- a/src/ray/gcs/redis_async_context.h
+++ b/src/ray/gcs/redis_async_context.h
@@ -15,7 +15,9 @@
 #pragma once
 
 #include <stdarg.h>
+
 #include <mutex>
+
 #include "ray/common/status.h"
 
 // These are forward declarations from hiredis.
diff --git a/src/ray/gcs/store_client/redis_store_client.cc b/src/ray/gcs/store_client/redis_store_client.cc
index f20270fd3..8f971e124 100644
--- a/src/ray/gcs/store_client/redis_store_client.cc
+++ b/src/ray/gcs/store_client/redis_store_client.cc
@@ -15,6 +15,7 @@
 #include "ray/gcs/store_client/redis_store_client.h"
 
 #include <functional>
+
 #include "ray/common/ray_config.h"
 #include "ray/gcs/redis_context.h"
 #include "ray/util/logging.h"
diff --git a/src/ray/gcs/store_client/test/in_memory_store_client_test.cc b/src/ray/gcs/store_client/test/in_memory_store_client_test.cc
index 3f84b8e40..5fa5f597d 100644
--- a/src/ray/gcs/store_client/test/in_memory_store_client_test.cc
+++ b/src/ray/gcs/store_client/test/in_memory_store_client_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "ray/gcs/store_client/in_memory_store_client.h"
+
 #include "ray/gcs/store_client/test/store_client_test_base.h"
 
 namespace ray {
diff --git a/src/ray/gcs/store_client/test/redis_store_client_test.cc b/src/ray/gcs/store_client/test/redis_store_client_test.cc
index afb570a57..f1ee83352 100644
--- a/src/ray/gcs/store_client/test/redis_store_client_test.cc
+++ b/src/ray/gcs/store_client/test/redis_store_client_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "ray/gcs/store_client/redis_store_client.h"
+
 #include "ray/common/test_util.h"
 #include "ray/gcs/redis_client.h"
 #include "ray/gcs/store_client/test/store_client_test_base.h"
diff --git a/src/ray/gcs/subscription_executor.h b/src/ray/gcs/subscription_executor.h
index 2ecde7791..1a63029b0 100644
--- a/src/ray/gcs/subscription_executor.h
+++ b/src/ray/gcs/subscription_executor.h
@@ -17,6 +17,7 @@
 #include <atomic>
 #include <list>
 #include <mutex>
+
 #include "ray/gcs/callback.h"
 #include "ray/gcs/tables.h"
 
diff --git a/src/ray/gcs/test/redis_gcs_client_test.cc b/src/ray/gcs/test/redis_gcs_client_test.cc
index 4bfa3eef7..fd5b43d74 100644
--- a/src/ray/gcs/test/redis_gcs_client_test.cc
+++ b/src/ray/gcs/test/redis_gcs_client_test.cc
@@ -12,12 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "gtest/gtest.h"
+#include "ray/gcs/redis_gcs_client.h"
 
+#include "gtest/gtest.h"
 #include "ray/common/ray_config.h"
 #include "ray/common/test_util.h"
 #include "ray/gcs/pb_util.h"
-#include "ray/gcs/redis_gcs_client.h"
 #include "ray/gcs/tables.h"
 
 extern "C" {
diff --git a/src/ray/gcs/test/redis_node_info_accessor_test.cc b/src/ray/gcs/test/redis_node_info_accessor_test.cc
index 2975f8df8..e46e0d242 100644
--- a/src/ray/gcs/test/redis_node_info_accessor_test.cc
+++ b/src/ray/gcs/test/redis_node_info_accessor_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <memory>
+
 #include "gtest/gtest.h"
 #include "ray/gcs/redis_accessor.h"
 #include "ray/gcs/redis_gcs_client.h"
diff --git a/src/ray/object_manager/notification/object_store_notification_manager.h b/src/ray/object_manager/notification/object_store_notification_manager.h
index 623da175b..a10fa8777 100644
--- a/src/ray/object_manager/notification/object_store_notification_manager.h
+++ b/src/ray/object_manager/notification/object_store_notification_manager.h
@@ -15,14 +15,12 @@
 #ifndef RAY_OBJECT_STORE_NOTIFICATION_MANAGER_H
 #define RAY_OBJECT_STORE_NOTIFICATION_MANAGER_H
 
+#include <boost/asio.hpp>
 #include <iostream>
 #include <memory>
 #include <vector>
 
-#include <boost/asio.hpp>
-
 #include "absl/synchronization/mutex.h"
-
 #include "ray/common/id.h"
 #include "ray/common/status.h"
 #include "ray/object_manager/format/object_manager_generated.h"
diff --git a/src/ray/object_manager/object_buffer_pool.h b/src/ray/object_manager/object_buffer_pool.h
index e5c036795..8b7017675 100644
--- a/src/ray/object_manager/object_buffer_pool.h
+++ b/src/ray/object_manager/object_buffer_pool.h
@@ -14,15 +14,14 @@
 
 #pragma once
 
+#include <boost/asio.hpp>
+#include <boost/asio/error.hpp>
+#include <boost/bind.hpp>
 #include <list>
 #include <memory>
 #include <mutex>
 #include <vector>
 
-#include <boost/asio.hpp>
-#include <boost/asio/error.hpp>
-#include <boost/bind.hpp>
-
 #include "ray/common/id.h"
 #include "ray/common/status.h"
 #include "ray/object_manager/plasma/client.h"
diff --git a/src/ray/object_manager/object_manager.h b/src/ray/object_manager/object_manager.h
index 4928ea7af..92d7b4f91 100644
--- a/src/ray/object_manager/object_manager.h
+++ b/src/ray/object_manager/object_manager.h
@@ -15,6 +15,9 @@
 #pragma once
 
 #include <algorithm>
+#include <boost/asio.hpp>
+#include <boost/asio/error.hpp>
+#include <boost/bind.hpp>
 #include <cstdint>
 #include <deque>
 #include <map>
@@ -23,10 +26,6 @@
 #include <random>
 #include <thread>
 
-#include <boost/asio.hpp>
-#include <boost/asio/error.hpp>
-#include <boost/bind.hpp>
-
 #include "absl/time/clock.h"
 #include "ray/common/id.h"
 #include "ray/common/ray_config.h"
diff --git a/src/ray/plasma/store_exec.cc b/src/ray/plasma/store_exec.cc
index 4e0a89389..f1c140db6 100644
--- a/src/ray/plasma/store_exec.cc
+++ b/src/ray/plasma/store_exec.cc
@@ -1,11 +1,10 @@
+#include <gflags/gflags.h>
 #include <signal.h>
 
 #include <chrono>
 #include <iostream>
 #include <thread>
 
-#include <gflags/gflags.h>
-
 #include "ray/object_manager/plasma/store_runner.h"
 
 #ifdef __linux__
diff --git a/src/ray/raylet/raylet.h b/src/ray/raylet/raylet.h
index 938b73c8c..fe7a6632f 100644
--- a/src/ray/raylet/raylet.h
+++ b/src/ray/raylet/raylet.h
@@ -14,10 +14,9 @@
 
 #pragma once
 
-#include <list>
-
 #include <boost/asio.hpp>
 #include <boost/asio/error.hpp>
+#include <list>
 
 // clang-format off
 #include "ray/raylet/node_manager.h"
diff --git a/src/ray/raylet/reconstruction_policy.h b/src/ray/raylet/reconstruction_policy.h
index f3eb8fa93..5d70268f7 100644
--- a/src/ray/raylet/reconstruction_policy.h
+++ b/src/ray/raylet/reconstruction_policy.h
@@ -14,15 +14,13 @@
 
 #pragma once
 
+#include <boost/asio.hpp>
 #include <functional>
 #include <unordered_map>
 #include <unordered_set>
 
-#include <boost/asio.hpp>
-
 #include "ray/common/id.h"
 #include "ray/gcs/tables.h"
-
 #include "ray/object_manager/object_directory.h"
 
 namespace ray {
diff --git a/src/ray/raylet/reconstruction_policy_test.cc b/src/ray/raylet/reconstruction_policy_test.cc
index b792d1512..ef7008fa2 100644
--- a/src/ray/raylet/reconstruction_policy_test.cc
+++ b/src/ray/raylet/reconstruction_policy_test.cc
@@ -12,21 +12,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "ray/raylet/reconstruction_policy.h"
+
+#include <boost/asio.hpp>
 #include <list>
 
 #include "absl/time/clock.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-
-#include <boost/asio.hpp>
-
 #include "ray/gcs/callback.h"
 #include "ray/gcs/redis_accessor.h"
-
-#include "ray/raylet/format/node_manager_generated.h"
-#include "ray/raylet/reconstruction_policy.h"
-
 #include "ray/object_manager/object_directory.h"
+#include "ray/raylet/format/node_manager_generated.h"
 
 namespace ray {
 
diff --git a/src/ray/rpc/client_call.h b/src/ray/rpc/client_call.h
index 23194d105..9e834e160 100644
--- a/src/ray/rpc/client_call.h
+++ b/src/ray/rpc/client_call.h
@@ -15,9 +15,10 @@
 #pragma once
 
 #include <grpcpp/grpcpp.h>
-#include <boost/asio.hpp>
-#include "absl/synchronization/mutex.h"
 
+#include <boost/asio.hpp>
+
+#include "absl/synchronization/mutex.h"
 #include "ray/common/grpc_util.h"
 #include "ray/common/status.h"
 
diff --git a/src/ray/rpc/grpc_client.h b/src/ray/rpc/grpc_client.h
index c49244e48..a01e23237 100644
--- a/src/ray/rpc/grpc_client.h
+++ b/src/ray/rpc/grpc_client.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <grpcpp/grpcpp.h>
+
 #include <boost/asio.hpp>
 
 #include "ray/common/grpc_util.h"
diff --git a/src/ray/rpc/server_call.h b/src/ray/rpc/server_call.h
index 62295093a..7f84f601a 100644
--- a/src/ray/rpc/server_call.h
+++ b/src/ray/rpc/server_call.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include <grpcpp/grpcpp.h>
+
 #include <boost/asio.hpp>
 
 #include "ray/common/grpc_util.h"
diff --git a/src/ray/stats/metric_exporter.cc b/src/ray/stats/metric_exporter.cc
index da69ec94f..bdf9f48c9 100644
--- a/src/ray/stats/metric_exporter.cc
+++ b/src/ray/stats/metric_exporter.cc
@@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <future>
-
 #include "ray/stats/metric_exporter.h"
 
+#include <future>
+
 namespace ray {
 namespace stats {
 
diff --git a/src/ray/stats/metric_exporter.h b/src/ray/stats/metric_exporter.h
index e07dcfff8..e3e44bae9 100644
--- a/src/ray/stats/metric_exporter.h
+++ b/src/ray/stats/metric_exporter.h
@@ -16,7 +16,6 @@
 #include "absl/memory/memory.h"
 #include "opencensus/stats/stats.h"
 #include "opencensus/tags/tag_key.h"
-
 #include "ray/stats/metric.h"
 #include "ray/stats/metric_exporter_client.h"
 #include "ray/util/logging.h"
diff --git a/src/ray/stats/metric_exporter_client.cc b/src/ray/stats/metric_exporter_client.cc
index 7a1779536..4272b4fdb 100644
--- a/src/ray/stats/metric_exporter_client.cc
+++ b/src/ray/stats/metric_exporter_client.cc
@@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <algorithm>
-
 #include "ray/stats/metric_exporter_client.h"
 
+#include <algorithm>
+
 namespace ray {
 namespace stats {
 
diff --git a/src/ray/stats/metric_exporter_client_test.cc b/src/ray/stats/metric_exporter_client_test.cc
index 455f020c6..6ce53ccb9 100644
--- a/src/ray/stats/metric_exporter_client_test.cc
+++ b/src/ray/stats/metric_exporter_client_test.cc
@@ -12,8 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
+#include "ray/stats/metric_exporter_client.h"
 
 #include <chrono>
 #include <iostream>
@@ -21,10 +20,11 @@
 #include <vector>
 
 #include "absl/memory/memory.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
 #include "opencensus/stats/internal/delta_producer.h"
 #include "opencensus/stats/internal/stats_exporter_impl.h"
 #include "ray/stats/metric_exporter.h"
-#include "ray/stats/metric_exporter_client.h"
 #include "ray/stats/stats.h"
 
 namespace ray {
diff --git a/src/ray/stats/stats.h b/src/ray/stats/stats.h
index 4f001b7aa..2f85f2549 100644
--- a/src/ray/stats/stats.h
+++ b/src/ray/stats/stats.h
@@ -19,7 +19,6 @@
 #include <unordered_map>
 
 #include "absl/synchronization/mutex.h"
-
 #include "opencensus/stats/internal/delta_producer.h"
 #include "opencensus/stats/stats.h"
 #include "opencensus/tags/tag_key.h"
diff --git a/src/ray/stats/stats_test.cc b/src/ray/stats/stats_test.cc
index 01f5dfc50..24013009a 100644
--- a/src/ray/stats/stats_test.cc
+++ b/src/ray/stats/stats_test.cc
@@ -12,8 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
+#include "ray/stats/stats.h"
 
 #include <chrono>
 #include <iostream>
@@ -21,7 +20,8 @@
 #include <vector>
 
 #include "absl/memory/memory.h"
-#include "ray/stats/stats.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
 
 namespace ray {
 
diff --git a/src/ray/thirdparty/dlmalloc.c b/src/ray/thirdparty/dlmalloc.c
index 6fb0fb00f..62a2a0400 100644
--- a/src/ray/thirdparty/dlmalloc.c
+++ b/src/ray/thirdparty/dlmalloc.c
@@ -542,8 +542,8 @@ MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
 #endif  /* WIN32 */
 #ifdef WIN32
 #define WIN32_LEAN_AND_MEAN
-#include <windows.h>
 #include <tchar.h>
+#include <windows.h>
 #define HAVE_MMAP 1
 #define HAVE_MORECORE 0
 #define LACKS_UNISTD_H
diff --git a/src/ray/thirdparty/sha256.c b/src/ray/thirdparty/sha256.c
index 0d8077964..3bc1da789 100644
--- a/src/ray/thirdparty/sha256.c
+++ b/src/ray/thirdparty/sha256.c
@@ -13,9 +13,9 @@
 *********************************************************************/
 
 /*************************** HEADER FILES ***************************/
-#include <stdlib.h>
-#include <memory.h>
 #include "ray/thirdparty/sha256.h"
+#include <memory.h>
+#include <stdlib.h>
 
 /****************************** MACROS ******************************/
 #define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
diff --git a/src/ray/util/io_service_pool.cc b/src/ray/util/io_service_pool.cc
index 6b56062b1..fdf31405c 100644
--- a/src/ray/util/io_service_pool.cc
+++ b/src/ray/util/io_service_pool.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "ray/util/io_service_pool.h"
+
 #include "ray/util/logging.h"
 
 namespace ray {
diff --git a/src/ray/util/sample_test.cc b/src/ray/util/sample_test.cc
index 32780045b..76fc9182b 100644
--- a/src/ray/util/sample_test.cc
+++ b/src/ray/util/sample_test.cc
@@ -12,10 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "ray/util/sample.h"
+
 #include <vector>
 
 #include "gtest/gtest.h"
-#include "ray/util/sample.h"
 
 namespace ray {
 
diff --git a/src/ray/util/sequencer.h b/src/ray/util/sequencer.h
index 3631ce728..9405b3703 100644
--- a/src/ray/util/sequencer.h
+++ b/src/ray/util/sequencer.h
@@ -17,6 +17,7 @@
 #include <deque>
 #include <functional>
 #include <unordered_map>
+
 #include "absl/synchronization/mutex.h"
 
 namespace ray {
diff --git a/src/ray/util/signal_test.cc b/src/ray/util/signal_test.cc
index f57432f26..bb10931e6 100644
--- a/src/ray/util/signal_test.cc
+++ b/src/ray/util/signal_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <signal.h>
+
 #include <cstdlib>
 #include <iostream>
 
diff --git a/src/ray/util/util.cc b/src/ray/util/util.cc
index df49304b5..09de3ed35 100644
--- a/src/ray/util/util.cc
+++ b/src/ray/util/util.cc
@@ -7,11 +7,10 @@
 #endif
 
 #include <algorithm>
+#include <boost/asio/generic/stream_protocol.hpp>
 #include <sstream>
 #include <string>
 #include <vector>
-
-#include <boost/asio/generic/stream_protocol.hpp>
 #ifndef _WIN32
 #include <boost/asio/local/stream_protocol.hpp>
 #endif
diff --git a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_ChannelId.h b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_ChannelId.h
index 8bd0c0aab..839617026 100644
--- a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_ChannelId.h
+++ b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_ChannelId.h
@@ -14,16 +14,16 @@ extern "C" {
  * Method:    createNativeId
  * Signature: (J)J
  */
-JNIEXPORT jlong JNICALL Java_io_ray_streaming_runtime_transfer_ChannelId_createNativeId
-  (JNIEnv *, jclass, jlong);
+JNIEXPORT jlong JNICALL
+Java_io_ray_streaming_runtime_transfer_ChannelId_createNativeId(JNIEnv *, jclass, jlong);
 
 /*
  * Class:     io_ray_streaming_runtime_transfer_ChannelId
  * Method:    destroyNativeId
  * Signature: (J)V
  */
-JNIEXPORT void JNICALL Java_io_ray_streaming_runtime_transfer_ChannelId_destroyNativeId
-  (JNIEnv *, jclass, jlong);
+JNIEXPORT void JNICALL
+Java_io_ray_streaming_runtime_transfer_ChannelId_destroyNativeId(JNIEnv *, jclass, jlong);
 
 #ifdef __cplusplus
 }
diff --git a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataReader.cc b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataReader.cc
index 6431127df..bcbe68d39 100644
--- a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataReader.cc
+++ b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataReader.cc
@@ -1,6 +1,7 @@
 #include "io_ray_streaming_runtime_transfer_DataReader.h"
 
 #include <cstdlib>
+
 #include "data_reader.h"
 #include "runtime_context.h"
 #include "streaming_jni_common.h"
diff --git a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataReader.h b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataReader.h
index b9f8a0196..221fe3105 100644
--- a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataReader.h
+++ b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataReader.h
@@ -10,7 +10,8 @@ extern "C" {
 /*
  * Class:     io_ray_streaming_runtime_transfer_DataReader
  * Method:    createDataReaderNative
- * Signature: (Lio/ray/streaming/runtime/transfer/ChannelCreationParametersBuilder;[[B[J[JJZ[BZ)J
+ * Signature:
+ * (Lio/ray/streaming/runtime/transfer/ChannelCreationParametersBuilder;[[B[J[JJZ[BZ)J
  */
 JNIEXPORT jlong JNICALL
 Java_io_ray_streaming_runtime_transfer_DataReader_createDataReaderNative(
diff --git a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataWriter.h b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataWriter.h
index dddcafdf7..d445638b9 100644
--- a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataWriter.h
+++ b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_DataWriter.h
@@ -10,7 +10,8 @@ extern "C" {
 /*
  * Class:     io_ray_streaming_runtime_transfer_DataWriter
  * Method:    createWriterNative
- * Signature: (Lio/ray/streaming/runtime/transfer/ChannelCreationParametersBuilder;[[B[JJ[BZ)J
+ * Signature:
+ * (Lio/ray/streaming/runtime/transfer/ChannelCreationParametersBuilder;[[B[JJ[BZ)J
  */
 JNIEXPORT jlong JNICALL
 Java_io_ray_streaming_runtime_transfer_DataWriter_createWriterNative(
diff --git a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_TransferHandler.h b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_TransferHandler.h
index 320b5c009..24517d7c4 100644
--- a/streaming/src/lib/java/io_ray_streaming_runtime_transfer_TransferHandler.h
+++ b/streaming/src/lib/java/io_ray_streaming_runtime_transfer_TransferHandler.h
@@ -12,16 +12,18 @@ extern "C" {
  * Method:    createWriterClientNative
  * Signature: (J)J
  */
-JNIEXPORT jlong JNICALL Java_io_ray_streaming_runtime_transfer_TransferHandler_createWriterClientNative
-  (JNIEnv *, jobject);
+JNIEXPORT jlong JNICALL
+Java_io_ray_streaming_runtime_transfer_TransferHandler_createWriterClientNative(JNIEnv *,
+                                                                                jobject);
 
 /*
  * Class:     io_ray_streaming_runtime_transfer_TransferHandler
  * Method:    createReaderClientNative
  * Signature: (J)J
  */
-JNIEXPORT jlong JNICALL Java_io_ray_streaming_runtime_transfer_TransferHandler_createReaderClientNative
-  (JNIEnv *, jobject);
+JNIEXPORT jlong JNICALL
+Java_io_ray_streaming_runtime_transfer_TransferHandler_createReaderClientNative(JNIEnv *,
+                                                                                jobject);
 
 /*
  * Class:     io_ray_streaming_runtime_transfer_TransferHandler
diff --git a/streaming/src/lib/java/streaming_jni_common.h b/streaming/src/lib/java/streaming_jni_common.h
index b66088309..996d58d21 100644
--- a/streaming/src/lib/java/streaming_jni_common.h
+++ b/streaming/src/lib/java/streaming_jni_common.h
@@ -1,7 +1,9 @@
 #pragma once
 
 #include <jni.h>
+
 #include <string>
+
 #include "channel.h"
 #include "ray/core_worker/common.h"
 #include "util/streaming_logging.h"
diff --git a/streaming/src/message/priority_queue.h b/streaming/src/message/priority_queue.h
index 994dc2c88..6d53bd240 100644
--- a/streaming/src/message/priority_queue.h
+++ b/streaming/src/message/priority_queue.h
@@ -3,6 +3,7 @@
 #include <algorithm>
 #include <memory>
 #include <vector>
+
 #include "util/streaming_logging.h"
 
 namespace ray {
diff --git a/streaming/src/queue/utils.h b/streaming/src/queue/utils.h
index a532e0744..3838f2f66 100644
--- a/streaming/src/queue/utils.h
+++ b/streaming/src/queue/utils.h
@@ -3,6 +3,7 @@
 #include <chrono>
 #include <future>
 #include <thread>
+
 #include "ray/util/util.h"
 
 namespace ray {
diff --git a/streaming/src/test/event_service_tests.cc b/streaming/src/test/event_service_tests.cc
index a7f4b1233..39e2793aa 100644
--- a/streaming/src/test/event_service_tests.cc
+++ b/streaming/src/test/event_service_tests.cc
@@ -1,4 +1,5 @@
 #include <thread>
+
 #include "event_service.h"
 #include "gtest/gtest.h"
 
diff --git a/streaming/src/test/message_serialization_tests.cc b/streaming/src/test/message_serialization_tests.cc
index 349a44155..b94064591 100644
--- a/streaming/src/test/message_serialization_tests.cc
+++ b/streaming/src/test/message_serialization_tests.cc
@@ -2,7 +2,6 @@
 #include <string>
 
 #include "gtest/gtest.h"
-
 #include "message/message.h"
 #include "message/message_bundle.h"
 
diff --git a/streaming/src/test/queue_tests_base.h b/streaming/src/test/queue_tests_base.h
index 920e24d55..afa1fb7af 100644
--- a/streaming/src/test/queue_tests_base.h
+++ b/streaming/src/test/queue_tests_base.h
@@ -1,10 +1,9 @@
 #pragma once
 
+#include "hiredis/hiredis.h"
 #include "ray/common/test_util.h"
 #include "ray/util/filesystem.h"
 
-#include "hiredis/hiredis.h"
-
 namespace ray {
 namespace streaming {
 
diff --git a/streaming/src/test/streaming_util_tests.cc b/streaming/src/test/streaming_util_tests.cc
index a633065e5..27e2ce7d8 100644
--- a/streaming/src/test/streaming_util_tests.cc
+++ b/streaming/src/test/streaming_util_tests.cc
@@ -1,5 +1,4 @@
 #include "gtest/gtest.h"
-
 #include "util/streaming_util.h"
 
 using namespace ray;