diff --git a/.travis.yml b/.travis.yml
index 8b86fd6b6..41cd5d13b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -53,7 +53,7 @@ matrix:
         - sphinx-build -W -b html -d _build/doctrees source _build/html
         - cd ..
         # Run Python linting, ignore dict vs {} (C408), others are defaults
-        - flake8 --exclude=python/ray/core/generated/,doc/source/conf.py,python/ray/cloudpickle/ --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
+        - flake8 --inline-quotes '"' --exclude=python/ray/core/generated/,doc/source/conf.py,python/ray/cloudpickle/ --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
         - ./ci/travis/format.sh --all
         # Make sure that the README is formatted properly.
         - cd python
diff --git a/ci/travis/format.sh b/ci/travis/format.sh
index b6b94f4f6..956143d21 100755
--- a/ci/travis/format.sh
+++ b/ci/travis/format.sh
@@ -53,15 +53,14 @@ format_changed() {
              yapf --in-place "${YAPF_EXCLUDES[@]}" "${YAPF_FLAGS[@]}"
         if which flake8 >/dev/null; then
             git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' | xargs -P 5 \
-                 flake8 --exclude=python/ray/core/generated/,doc/source/conf.py,python/ray/cloudpickle/ \
-                    --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
+                 flake8 --inline-quotes '"' --exclude=python/ray/core/generated/,doc/source/conf.py,python/ray/cloudpickle/ --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
         fi
     fi
 
     if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' &>/dev/null; then
         if which flake8 >/dev/null; then
             git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' | xargs -P 5 \
-                 flake8 --ignore=E211,E225,E226,E227,E999
+                 flake8 --inline-quotes '"' --exclude=python/ray/core/generated/,doc/source/conf.py,python/ray/cloudpickle/ --ignore=C408,E121,E123,E126,E226,E24,E704,W503,W504,W605
         fi
     fi
 
diff --git a/ci/travis/install-dependencies.sh b/ci/travis/install-dependencies.sh
index 13d472a55..d0f9c6988 100755
--- a/ci/travis/install-dependencies.sh
+++ b/ci/travis/install-dependencies.sh
@@ -57,7 +57,7 @@ elif [[ "$LINT" == "1" ]]; then
   bash miniconda.sh -b -p $HOME/miniconda
   export PATH="$HOME/miniconda/bin:$PATH"
   # Install Python linting tools.
-  pip install -q flake8 flake8-comprehensions
+  pip install -q flake8 flake8-comprehensions flake8-quotes
 elif [[ "$LINUX_WHEELS" == "1" ]]; then
   sudo apt-get install docker
   sudo usermod -a -G docker travis
diff --git a/examples/cython/cython_main.py b/examples/cython/cython_main.py
index 1d8b9baf4..612ee0249 100644
--- a/examples/cython/cython_main.py
+++ b/examples/cython/cython_main.py
@@ -26,7 +26,7 @@ def run_func(func, *args, **kwargs):
     return result
 
 
-@click.group(context_settings={'help_option_names': ['-h', '--help']})
+@click.group(context_settings={"help_option_names": ["-h", "--help"]})
 def cli():
     """Working with Cython actors and functions in Ray"""
 
diff --git a/examples/parameter_server/model.py b/examples/parameter_server/model.py
index 6387f1bc6..dc0c15b49 100644
--- a/examples/parameter_server/model.py
+++ b/examples/parameter_server/model.py
@@ -38,16 +38,16 @@ class SimpleCNN(object):
             # Build the graph for the deep net
             self.y_conv, self.keep_prob = deepnn(self.x)
 
-            with tf.name_scope('loss'):
+            with tf.name_scope("loss"):
                 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                     labels=self.y_, logits=self.y_conv)
             self.cross_entropy = tf.reduce_mean(cross_entropy)
 
-            with tf.name_scope('adam_optimizer'):
+            with tf.name_scope("adam_optimizer"):
                 self.optimizer = tf.train.AdamOptimizer(learning_rate)
                 self.train_step = self.optimizer.minimize(self.cross_entropy)
 
-            with tf.name_scope('accuracy'):
+            with tf.name_scope("accuracy"):
                 correct_prediction = tf.equal(
                     tf.argmax(self.y_conv, 1), tf.argmax(self.y_, 1))
                 correct_prediction = tf.cast(correct_prediction, tf.float32)
@@ -133,32 +133,32 @@ def deepnn(x):
     # Reshape to use within a convolutional neural net.
     # Last dimension is for "features" - there is only one here, since images
     # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
-    with tf.name_scope('reshape'):
+    with tf.name_scope("reshape"):
         x_image = tf.reshape(x, [-1, 28, 28, 1])
 
     # First convolutional layer - maps one grayscale image to 32 feature maps.
-    with tf.name_scope('conv1'):
+    with tf.name_scope("conv1"):
         W_conv1 = weight_variable([5, 5, 1, 32])
         b_conv1 = bias_variable([32])
         h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
 
     # Pooling layer - downsamples by 2X.
-    with tf.name_scope('pool1'):
+    with tf.name_scope("pool1"):
         h_pool1 = max_pool_2x2(h_conv1)
 
     # Second convolutional layer -- maps 32 feature maps to 64.
-    with tf.name_scope('conv2'):
+    with tf.name_scope("conv2"):
         W_conv2 = weight_variable([5, 5, 32, 64])
         b_conv2 = bias_variable([64])
         h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
 
     # Second pooling layer.
-    with tf.name_scope('pool2'):
+    with tf.name_scope("pool2"):
         h_pool2 = max_pool_2x2(h_conv2)
 
     # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
     # is down to 7x7x64 feature maps -- maps this to 1024 features.
-    with tf.name_scope('fc1'):
+    with tf.name_scope("fc1"):
         W_fc1 = weight_variable([7 * 7 * 64, 1024])
         b_fc1 = bias_variable([1024])
 
@@ -167,12 +167,12 @@ def deepnn(x):
 
     # Dropout - controls the complexity of the model, prevents co-adaptation of
     # features.
-    with tf.name_scope('dropout'):
+    with tf.name_scope("dropout"):
         keep_prob = tf.placeholder(tf.float32)
         h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
 
     # Map the 1024 features to 10 classes, one for each digit
-    with tf.name_scope('fc2'):
+    with tf.name_scope("fc2"):
         W_fc2 = weight_variable([1024, 10])
         b_fc2 = bias_variable([10])
 
@@ -182,13 +182,13 @@ def deepnn(x):
 
 def conv2d(x, W):
     """conv2d returns a 2d convolution layer with full stride."""
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
+    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
 
 
 def max_pool_2x2(x):
     """max_pool_2x2 downsamples a feature map by 2X."""
     return tf.nn.max_pool(
-        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
 
 
 def weight_variable(shape):
diff --git a/examples/resnet/resnet_model.py b/examples/resnet/resnet_model.py
index 7fa6dde69..06d19e64d 100644
--- a/examples/resnet/resnet_model.py
+++ b/examples/resnet/resnet_model.py
@@ -21,9 +21,9 @@ import ray
 import ray.experimental.tf_utils
 
 HParams = namedtuple(
-    'HParams', 'batch_size, num_classes, min_lrn_rate, lrn_rate, '
-    'num_residual_units, use_bottleneck, weight_decay_rate, '
-    'relu_leakiness, optimizer, num_gpus')
+    "HParams", "batch_size, num_classes, min_lrn_rate, lrn_rate, "
+    "num_residual_units, use_bottleneck, weight_decay_rate, "
+    "relu_leakiness, optimizer, num_gpus")
 
 
 class ResNet(object):
@@ -50,7 +50,7 @@ class ResNet(object):
         """Build a whole graph for the model."""
         self.global_step = tf.Variable(0, trainable=False)
         self._build_model()
-        if self.mode == 'train':
+        if self.mode == "train":
             self._build_train_op()
         else:
             # Additional initialization for the test network.
@@ -65,8 +65,8 @@ class ResNet(object):
     def _build_model(self):
         """Build the core model within the graph."""
 
-        with tf.variable_scope('init'):
-            x = self._conv('init_conv', self._images, 3, 3, 16,
+        with tf.variable_scope("init"):
+            x = self._conv("init_conv", self._images, 3, 3, 16,
                            self._stride_arr(1))
 
         strides = [1, 2, 2]
@@ -78,46 +78,46 @@ class ResNet(object):
             res_func = self._residual
             filters = [16, 16, 32, 64]
 
-        with tf.variable_scope('unit_1_0'):
+        with tf.variable_scope("unit_1_0"):
             x = res_func(x, filters[0], filters[1], self._stride_arr(
                 strides[0]), activate_before_residual[0])
         for i in range(1, self.hps.num_residual_units):
-            with tf.variable_scope('unit_1_%d' % i):
+            with tf.variable_scope("unit_1_%d" % i):
                 x = res_func(x, filters[1], filters[1], self._stride_arr(1),
                              False)
 
-        with tf.variable_scope('unit_2_0'):
+        with tf.variable_scope("unit_2_0"):
             x = res_func(x, filters[1], filters[2], self._stride_arr(
                 strides[1]), activate_before_residual[1])
         for i in range(1, self.hps.num_residual_units):
-            with tf.variable_scope('unit_2_%d' % i):
+            with tf.variable_scope("unit_2_%d" % i):
                 x = res_func(x, filters[2], filters[2], self._stride_arr(1),
                              False)
 
-        with tf.variable_scope('unit_3_0'):
+        with tf.variable_scope("unit_3_0"):
             x = res_func(x, filters[2], filters[3], self._stride_arr(
                 strides[2]), activate_before_residual[2])
         for i in range(1, self.hps.num_residual_units):
-            with tf.variable_scope('unit_3_%d' % i):
+            with tf.variable_scope("unit_3_%d" % i):
                 x = res_func(x, filters[3], filters[3], self._stride_arr(1),
                              False)
-        with tf.variable_scope('unit_last'):
-            x = self._batch_norm('final_bn', x)
+        with tf.variable_scope("unit_last"):
+            x = self._batch_norm("final_bn", x)
             x = self._relu(x, self.hps.relu_leakiness)
             x = self._global_avg_pool(x)
 
-        with tf.variable_scope('logit'):
+        with tf.variable_scope("logit"):
             logits = self._fully_connected(x, self.hps.num_classes)
             self.predictions = tf.nn.softmax(logits)
 
-        with tf.variable_scope('costs'):
+        with tf.variable_scope("costs"):
             xent = tf.nn.softmax_cross_entropy_with_logits(
                 logits=logits, labels=self.labels)
-            self.cost = tf.reduce_mean(xent, name='xent')
+            self.cost = tf.reduce_mean(xent, name="xent")
             self.cost += self._decay()
 
-            if self.mode == 'eval':
-                tf.summary.scalar('cost', self.cost)
+            if self.mode == "eval":
+                tf.summary.scalar("cost", self.cost)
 
     def _build_train_op(self):
         """Build training specific ops for the graph."""
@@ -127,11 +127,11 @@ class ResNet(object):
         values = [0.1, 0.01, 0.001, 0.0001]
         self.lrn_rate = tf.train.piecewise_constant(self.global_step,
                                                     boundaries, values)
-        tf.summary.scalar('learning rate', self.lrn_rate)
+        tf.summary.scalar("learning rate", self.lrn_rate)
 
-        if self.hps.optimizer == 'sgd':
+        if self.hps.optimizer == "sgd":
             optimizer = tf.train.GradientDescentOptimizer(self.lrn_rate)
-        elif self.hps.optimizer == 'mom':
+        elif self.hps.optimizer == "mom":
             optimizer = tf.train.MomentumOptimizer(self.lrn_rate, 0.9)
 
         apply_op = optimizer.minimize(self.cost, global_step=self.global_step)
@@ -146,27 +146,27 @@ class ResNet(object):
             params_shape = [x.get_shape()[-1]]
 
             beta = tf.get_variable(
-                'beta',
+                "beta",
                 params_shape,
                 tf.float32,
                 initializer=tf.constant_initializer(0.0, tf.float32))
             gamma = tf.get_variable(
-                'gamma',
+                "gamma",
                 params_shape,
                 tf.float32,
                 initializer=tf.constant_initializer(1.0, tf.float32))
 
-            if self.mode == 'train':
-                mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
+            if self.mode == "train":
+                mean, variance = tf.nn.moments(x, [0, 1, 2], name="moments")
 
                 moving_mean = tf.get_variable(
-                    'moving_mean',
+                    "moving_mean",
                     params_shape,
                     tf.float32,
                     initializer=tf.constant_initializer(0.0, tf.float32),
                     trainable=False)
                 moving_variance = tf.get_variable(
-                    'moving_variance',
+                    "moving_variance",
                     params_shape,
                     tf.float32,
                     initializer=tf.constant_initializer(1.0, tf.float32),
@@ -180,13 +180,13 @@ class ResNet(object):
                         moving_variance, variance, 0.9))
             else:
                 mean = tf.get_variable(
-                    'moving_mean',
+                    "moving_mean",
                     params_shape,
                     tf.float32,
                     initializer=tf.constant_initializer(0.0, tf.float32),
                     trainable=False)
                 variance = tf.get_variable(
-                    'moving_variance',
+                    "moving_variance",
                     params_shape,
                     tf.float32,
                     initializer=tf.constant_initializer(1.0, tf.float32),
@@ -208,27 +208,27 @@ class ResNet(object):
                   activate_before_residual=False):
         """Residual unit with 2 sub layers."""
         if activate_before_residual:
-            with tf.variable_scope('shared_activation'):
-                x = self._batch_norm('init_bn', x)
+            with tf.variable_scope("shared_activation"):
+                x = self._batch_norm("init_bn", x)
                 x = self._relu(x, self.hps.relu_leakiness)
                 orig_x = x
         else:
-            with tf.variable_scope('residual_only_activation'):
+            with tf.variable_scope("residual_only_activation"):
                 orig_x = x
-                x = self._batch_norm('init_bn', x)
+                x = self._batch_norm("init_bn", x)
                 x = self._relu(x, self.hps.relu_leakiness)
 
-        with tf.variable_scope('sub1'):
-            x = self._conv('conv1', x, 3, in_filter, out_filter, stride)
+        with tf.variable_scope("sub1"):
+            x = self._conv("conv1", x, 3, in_filter, out_filter, stride)
 
-        with tf.variable_scope('sub2'):
-            x = self._batch_norm('bn2', x)
+        with tf.variable_scope("sub2"):
+            x = self._batch_norm("bn2", x)
             x = self._relu(x, self.hps.relu_leakiness)
-            x = self._conv('conv2', x, 3, out_filter, out_filter, [1, 1, 1, 1])
+            x = self._conv("conv2", x, 3, out_filter, out_filter, [1, 1, 1, 1])
 
-        with tf.variable_scope('sub_add'):
+        with tf.variable_scope("sub_add"):
             if in_filter != out_filter:
-                orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID')
+                orig_x = tf.nn.avg_pool(orig_x, stride, stride, "VALID")
                 orig_x = tf.pad(
                     orig_x,
                     [[0, 0], [0, 0], [0, 0], [(out_filter - in_filter) // 2,
@@ -245,34 +245,34 @@ class ResNet(object):
                              activate_before_residual=False):
         """Bottleneck residual unit with 3 sub layers."""
         if activate_before_residual:
-            with tf.variable_scope('common_bn_relu'):
-                x = self._batch_norm('init_bn', x)
+            with tf.variable_scope("common_bn_relu"):
+                x = self._batch_norm("init_bn", x)
                 x = self._relu(x, self.hps.relu_leakiness)
                 orig_x = x
         else:
-            with tf.variable_scope('residual_bn_relu'):
+            with tf.variable_scope("residual_bn_relu"):
                 orig_x = x
-                x = self._batch_norm('init_bn', x)
+                x = self._batch_norm("init_bn", x)
                 x = self._relu(x, self.hps.relu_leakiness)
 
-        with tf.variable_scope('sub1'):
-            x = self._conv('conv1', x, 1, in_filter, out_filter / 4, stride)
+        with tf.variable_scope("sub1"):
+            x = self._conv("conv1", x, 1, in_filter, out_filter / 4, stride)
 
-        with tf.variable_scope('sub2'):
-            x = self._batch_norm('bn2', x)
+        with tf.variable_scope("sub2"):
+            x = self._batch_norm("bn2", x)
             x = self._relu(x, self.hps.relu_leakiness)
-            x = self._conv('conv2', x, 3, out_filter / 4, out_filter / 4,
+            x = self._conv("conv2", x, 3, out_filter / 4, out_filter / 4,
                            [1, 1, 1, 1])
 
-        with tf.variable_scope('sub3'):
-            x = self._batch_norm('bn3', x)
+        with tf.variable_scope("sub3"):
+            x = self._batch_norm("bn3", x)
             x = self._relu(x, self.hps.relu_leakiness)
-            x = self._conv('conv3', x, 1, out_filter / 4, out_filter,
+            x = self._conv("conv3", x, 1, out_filter / 4, out_filter,
                            [1, 1, 1, 1])
 
-        with tf.variable_scope('sub_add'):
+        with tf.variable_scope("sub_add"):
             if in_filter != out_filter:
-                orig_x = self._conv('project', orig_x, 1, in_filter,
+                orig_x = self._conv("project", orig_x, 1, in_filter,
                                     out_filter, stride)
             x += orig_x
 
@@ -282,7 +282,7 @@ class ResNet(object):
         """L2 weight decay loss."""
         costs = []
         for var in tf.trainable_variables():
-            if var.op.name.find(r'DW') > 0:
+            if var.op.name.find(r"DW") > 0:
                 costs.append(tf.nn.l2_loss(var))
 
         return tf.multiply(self.hps.weight_decay_rate, tf.add_n(costs))
@@ -292,24 +292,24 @@ class ResNet(object):
         with tf.variable_scope(name):
             n = filter_size * filter_size * out_filters
             kernel = tf.get_variable(
-                'DW', [filter_size, filter_size, in_filters, out_filters],
+                "DW", [filter_size, filter_size, in_filters, out_filters],
                 tf.float32,
                 initializer=tf.random_normal_initializer(
                     stddev=np.sqrt(2.0 / n)))
-            return tf.nn.conv2d(x, kernel, strides, padding='SAME')
+            return tf.nn.conv2d(x, kernel, strides, padding="SAME")
 
     def _relu(self, x, leakiness=0.0):
         """Relu, with optional leaky support."""
-        return tf.where(tf.less(x, 0.0), leakiness * x, x, name='leaky_relu')
+        return tf.where(tf.less(x, 0.0), leakiness * x, x, name="leaky_relu")
 
     def _fully_connected(self, x, out_dim):
         """FullyConnected layer for final output."""
         x = tf.reshape(x, [self.hps.batch_size, -1])
         w = tf.get_variable(
-            'DW', [x.get_shape()[1], out_dim],
+            "DW", [x.get_shape()[1], out_dim],
             initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
         b = tf.get_variable(
-            'biases', [out_dim], initializer=tf.constant_initializer())
+            "biases", [out_dim], initializer=tf.constant_initializer())
         return tf.nn.xw_plus_b(x, w, b)
 
     def _global_avg_pool(self, x):
diff --git a/python/ray/actor.py b/python/ray/actor.py
index 027d5feb6..7953308df 100644
--- a/python/ray/actor.py
+++ b/python/ray/actor.py
@@ -741,26 +741,26 @@ def make_actor(cls, num_cpus, num_gpus, resources, max_reconstructions):
 ray.worker.global_worker.make_actor = make_actor
 
 CheckpointContext = namedtuple(
-    'CheckpointContext',
+    "CheckpointContext",
     [
         # Actor's ID.
-        'actor_id',
+        "actor_id",
         # Number of tasks executed since last checkpoint.
-        'num_tasks_since_last_checkpoint',
+        "num_tasks_since_last_checkpoint",
         # Time elapsed since last checkpoint, in milliseconds.
-        'time_elapsed_ms_since_last_checkpoint',
+        "time_elapsed_ms_since_last_checkpoint",
     ],
 )
 """A namedtuple that contains information about actor's last checkpoint."""
 
 Checkpoint = namedtuple(
-    'Checkpoint',
+    "Checkpoint",
     [
         # ID of this checkpoint.
-        'checkpoint_id',
+        "checkpoint_id",
         # The timestamp at which this checkpoint was saved,
         # represented as milliseconds elapsed since Unix epoch.
-        'timestamp',
+        "timestamp",
     ],
 )
 """A namedtuple that represents a checkpoint."""
@@ -856,7 +856,7 @@ def get_checkpoints_for_actor(actor_id):
         return []
     checkpoints = [
         Checkpoint(checkpoint_id, timestamp) for checkpoint_id, timestamp in
-        zip(checkpoint_info['CheckpointIds'], checkpoint_info['Timestamps'])
+        zip(checkpoint_info["CheckpointIds"], checkpoint_info["Timestamps"])
     ]
     return sorted(
         checkpoints,
diff --git a/python/ray/autoscaler/autoscaler.py b/python/ray/autoscaler/autoscaler.py
index f946acd73..d12c0adab 100644
--- a/python/ray/autoscaler/autoscaler.py
+++ b/python/ray/autoscaler/autoscaler.py
@@ -746,7 +746,7 @@ def hash_runtime_conf(file_mounts, extra_objs):
     def add_content_hashes(path):
         def add_hash_of_file(fpath):
             with open(fpath, "rb") as f:
-                for chunk in iter(lambda: f.read(2**20), b''):
+                for chunk in iter(lambda: f.read(2**20), b""):
                     hasher.update(chunk)
 
         path = os.path.expanduser(path)
diff --git a/python/ray/autoscaler/aws/config.py b/python/ray/autoscaler/aws/config.py
index 22268ea27..5fbadfdfa 100644
--- a/python/ray/autoscaler/aws/config.py
+++ b/python/ray/autoscaler/aws/config.py
@@ -173,7 +173,7 @@ def _configure_subnet(config):
             "on instance launch unless you set 'use_internal_ips': True in "
             "the 'provider' config.")
     if "availability_zone" in config["provider"]:
-        azs = config["provider"]["availability_zone"].split(',')
+        azs = config["provider"]["availability_zone"].split(",")
         subnets = [s for s in subnets if s.availability_zone in azs]
         if not subnets:
             raise Exception(
@@ -315,11 +315,11 @@ def _get_key(key_name, config):
 
 
 def _client(name, config):
-    boto_config = Config(retries={'max_attempts': BOTO_MAX_RETRIES})
+    boto_config = Config(retries={"max_attempts": BOTO_MAX_RETRIES})
     return boto3.client(name, config["provider"]["region"], config=boto_config)
 
 
 def _resource(name, config):
-    boto_config = Config(retries={'max_attempts': BOTO_MAX_RETRIES})
+    boto_config = Config(retries={"max_attempts": BOTO_MAX_RETRIES})
     return boto3.resource(
         name, config["provider"]["region"], config=boto_config)
diff --git a/python/ray/autoscaler/aws/node_provider.py b/python/ray/autoscaler/aws/node_provider.py
index 8a05a4f7f..9d0bf4228 100644
--- a/python/ray/autoscaler/aws/node_provider.py
+++ b/python/ray/autoscaler/aws/node_provider.py
@@ -36,7 +36,7 @@ def from_aws_format(tags):
 class AWSNodeProvider(NodeProvider):
     def __init__(self, provider_config, cluster_name):
         NodeProvider.__init__(self, provider_config, cluster_name)
-        config = Config(retries={'max_attempts': BOTO_MAX_RETRIES})
+        config = Config(retries={"max_attempts": BOTO_MAX_RETRIES})
         self.ec2 = boto3.resource(
             "ec2", region_name=provider_config["region"], config=config)
 
diff --git a/python/ray/experimental/sgd/modified_allreduce.py b/python/ray/experimental/sgd/modified_allreduce.py
index adf79a060..7e224a769 100644
--- a/python/ray/experimental/sgd/modified_allreduce.py
+++ b/python/ray/experimental/sgd/modified_allreduce.py
@@ -31,27 +31,27 @@ from tensorflow.contrib.all_reduce.python import all_reduce
 
 logger = logging.getLogger(__name__)
 
-AllReduceSpecTuple = pycoll.namedtuple('AllReduceSpecTuple',
-                                       'alg shards limit')
+AllReduceSpecTuple = pycoll.namedtuple("AllReduceSpecTuple",
+                                       "alg shards limit")
 
 
 def parse_general_int(s):
     """Parse integer with power-of-2 suffix eg. 32k."""
-    mo = re.match(r'(\d+)([KkMGT]?)$', s)
+    mo = re.match(r"(\d+)([KkMGT]?)$", s)
     if mo:
         i, suffix = mo.group(1, 2)
         v = int(i)
         if suffix:
-            if suffix == 'K' or suffix == 'k':
+            if suffix == "K" or suffix == "k":
                 v *= 1024
-            elif suffix == 'M':
+            elif suffix == "M":
                 v *= (1024 * 1024)
-            elif suffix == 'G':
+            elif suffix == "G":
                 v *= (1024 * 1024 * 1024)
-            elif suffix == 'T':
+            elif suffix == "T":
                 v *= (1024 * 1024 * 1024 * 1024)
             else:
-                raise ValueError('invalid integer string %s' % s)
+                raise ValueError("invalid integer string %s" % s)
         return v
     else:
         v = int(s)
@@ -81,37 +81,37 @@ def parse_all_reduce_spec(all_reduce_spec):
   Not all syntactically correct specifications are supported.
   Examples of supported all_reduce_spec strings, with semantics explained:
 
-    'xring' == apply ring all-reduce to all tensors
-    'xring#2' == apply ring all-reduce to all tensors, using two simultaneous
+    "xring" == apply ring all-reduce to all tensors
+    "xring#2" == apply ring all-reduce to all tensors, using two simultaneous
             transfer rings, each operating on 1/2 of each tensor.
-    'nccl'  == apply NCCL all-reduce to all tensors (only works within
+    "nccl"  == apply NCCL all-reduce to all tensors (only works within
             a single worker process where all devices are GPUs)
-    'nccl/xring' == apply NCCL all-reduce to all tensors within each worker
+    "nccl/xring" == apply NCCL all-reduce to all tensors within each worker
             to produce at least one full-reduced (locally) value,
             then apply ring all-reduce to one such value from each
             worker, then apply NCCL broadcast to propagate those globally
             reduced values back to every device within each worker.
-    'pscpu' == Shuffle reduce using worker CPUs as the gather devices: each
+    "pscpu" == Shuffle reduce using worker CPUs as the gather devices: each
             distributed tensor is reduced by copying all instances to
             one of the worker CPUs, computing the reduction there, then
             copying back to each participating device.  Tensor reductions
             are assigned to specific CPUs round-robin.
-    'psgpu#4' == Arrange all GPUs across all workers into groups of 4.
+    "psgpu#4" == Arrange all GPUs across all workers into groups of 4.
             Each distributed tensor is shuffle reduced against one
             such group of 4 GPUs, selected round-robin.  That is, each
             tensor is split across 4 shards for the reduction.
-    'pscpu:2k:pscpu#2:64k:xring' == Apply single-shard pscpu to
+    "pscpu:2k:pscpu#2:64k:xring" == Apply single-shard pscpu to
             tensors of size <= 2048 elements, apply 2-shard pscpu to
             tensors up to size 64k elements, apply xring to larger tensors.
-    'pscpu/pscpu#2' == Use shuffle gather to locally reduce each tensor on
+    "pscpu/pscpu#2" == Use shuffle gather to locally reduce each tensor on
             the worker's CPU, then use 2-shard shuffle to reduce those
             locally reduced tensors across workers (on the worker CPUs), then
             scatter the globally reduced values locally from each worker CPU.
   """
-    range_parts = all_reduce_spec.split(':') + ['-1']
+    range_parts = all_reduce_spec.split(":") + ["-1"]
     if len(range_parts) % 2:
         raise ValueError(
-            'all_reduce_spec not well formed: %s' % all_reduce_spec)
+            "all_reduce_spec not well formed: %s" % all_reduce_spec)
     limit = 0
     spec = []
     alg = None
@@ -124,26 +124,26 @@ def parse_all_reduce_spec(all_reduce_spec):
                     AllReduceSpecTuple(alg=alg, shards=shards, limit=limit))
             except ValueError:
                 raise ValueError(
-                    'all_reduce_spec (%s) contains non-integer range %s' %
+                    "all_reduce_spec (%s) contains non-integer range %s" %
                     (all_reduce_spec, range_part))
         else:
             alg = range_part
-            alg_parts = range_part.split('#')
+            alg_parts = range_part.split("#")
             alg = alg_parts[0]
             if len(alg_parts) > 1:
                 try:
                     shards = int(alg_parts[1])
                 except ValueError:
                     raise ValueError(
-                        'all_reduce_spec (%s) contains non-integer '
-                        'shards %s' % all_reduce_spec, alg_parts[1])
+                        "all_reduce_spec (%s) contains non-integer "
+                        "shards %s" % all_reduce_spec, alg_parts[1])
             else:
                 shards = 1
             if alg not in [
-                    'nccl', 'nccl/xring', 'nccl/rechd', 'nccl/pscpu', 'xring',
-                    'pscpu', 'psgpu', 'pscpu/pscpu'
+                    "nccl", "nccl/xring", "nccl/rechd", "nccl/pscpu", "xring",
+                    "pscpu", "psgpu", "pscpu/pscpu"
             ]:
-                raise ValueError('all_reduce_spec (%s) contains invalid alg %s'
+                raise ValueError("all_reduce_spec (%s) contains invalid alg %s"
                                  % (all_reduce_spec, alg))
     return spec
 
@@ -152,19 +152,19 @@ def build_all_reduce_device_prefixes(job_name, num_tasks):
     """Build list of device prefix names for all_reduce.
 
   Args:
-    job_name: 'worker', 'ps' or 'localhost'.
+    job_name: "worker", "ps" or "localhost".
     num_tasks: number of jobs across which device names should be generated.
 
   Returns:
      A list of device name prefix strings. Each element spells out the full
      host name without adding the device.
-     e.g. '/job:worker/task:0'
+     e.g. "/job:worker/task:0"
   """
-    if job_name != 'localhost':
-        return ['/job:%s/task:%d' % (job_name, d) for d in range(0, num_tasks)]
+    if job_name != "localhost":
+        return ["/job:%s/task:%d" % (job_name, d) for d in range(0, num_tasks)]
     else:
         assert num_tasks == 1
-        return ['/job:%s' % job_name]
+        return ["/job:%s" % job_name]
 
 
 def group_device_names(devices, group_size):
@@ -186,7 +186,7 @@ def group_device_names(devices, group_size):
     num_devices = len(devices)
     if group_size > num_devices:
         raise ValueError(
-            'only %d devices, but group_size=%d' % (num_devices, group_size))
+            "only %d devices, but group_size=%d" % (num_devices, group_size))
     num_groups = (
         num_devices // group_size + (1 if
                                      (num_devices % group_size != 0) else 0))
@@ -303,30 +303,30 @@ def sum_grad_and_var_all_reduce(grad_and_vars,
                                 aux_devices=None,
                                 num_shards=1):
     """Apply all-reduce algorithm over specified gradient tensors."""
-    with tf.name_scope('allreduce'):
+    with tf.name_scope("allreduce"):
         # Note that each grad_and_vars looks like the following:
         #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
         scaled_grads = [g for g, _ in grad_and_vars]
-        if alg == 'nccl':
+        if alg == "nccl":
             from tensorflow.python.ops import nccl_ops
             summed_grads = nccl_ops.all_sum(scaled_grads)
-        elif alg == 'simple':
+        elif alg == "simple":
             summed_grads = build_reduce_sum(scaled_grads)
-        elif alg == 'trivial':
+        elif alg == "trivial":
             summed_grads = build_trivial_sum(scaled_grads)
-        elif alg == 'xring':
+        elif alg == "xring":
             summed_grads = all_reduce.build_ring_all_reduce(
                 scaled_grads, num_workers, num_shards, gpu_indices, tf.add)
-        elif alg == 'nccl/xring':
+        elif alg == "nccl/xring":
             summed_grads = all_reduce.build_nccl_then_ring(
                 scaled_grads, num_shards, tf.add)
-        elif alg == 'nccl/rechd':
+        elif alg == "nccl/rechd":
             summed_grads = all_reduce.build_nccl_then_recursive_hd(
                 scaled_grads, tf.add)
-        elif alg == 'nccl/pscpu':
+        elif alg == "nccl/pscpu":
             summed_grads = all_reduce.build_nccl_then_shuffle(
                 scaled_grads, aux_devices, tf.add, tf.add_n)
-        elif alg == 'pscpu/pscpu':
+        elif alg == "pscpu/pscpu":
             summed_grads = all_reduce.build_shuffle_then_shuffle(
                 scaled_grads,
                 aux_devices,
@@ -334,11 +334,11 @@ def sum_grad_and_var_all_reduce(grad_and_vars,
                 # for the second level.
                 [aux_devices[0]],
                 tf.add_n)
-        elif alg in ['pscpu', 'psgpu']:
+        elif alg in ["pscpu", "psgpu"]:
             summed_grads = all_reduce.build_shuffle_all_reduce(
                 scaled_grads, aux_devices, tf.add_n)
         else:
-            raise ValueError('unsupported all_reduce alg: ', alg)
+            raise ValueError("unsupported all_reduce alg: ", alg)
 
         result = []
         for (_, v), g in zip(grad_and_vars, summed_grads):
@@ -385,17 +385,17 @@ def sum_gradients_all_reduce(dev_prefixes,
   Returns:
     list of reduced tensors, packing values
   """
-    alg_contains_shuffle = contains_any(alg, ['pscpu', 'psgpu'])
-    is_hierarchical = '/' in alg
-    if 'pscpu' in alg:
-        aux_devices = [prefix + '/cpu:0' for prefix in dev_prefixes]
-    elif 'psgpu' in alg:
+    alg_contains_shuffle = contains_any(alg, ["pscpu", "psgpu"])
+    is_hierarchical = "/" in alg
+    if "pscpu" in alg:
+        aux_devices = [prefix + "/cpu:0" for prefix in dev_prefixes]
+    elif "psgpu" in alg:
         aux_devices = [
-            prefix + '/gpu:%d' % i for i in range(len(gpu_indices))
+            prefix + "/gpu:%d" % i for i in range(len(gpu_indices))
             for prefix in dev_prefixes
         ]
     else:
-        aux_devices = ['/job:localhost/cpu:0']
+        aux_devices = ["/job:localhost/cpu:0"]
     aux_device_groups = group_device_names(
         aux_devices, num_shards if alg_contains_shuffle else 1)
     group_index = 0
@@ -406,8 +406,8 @@ def sum_gradients_all_reduce(dev_prefixes,
     else:
         packing = None
     new_tower_grads = []
-    if alg == 'better':
-        raw_devices = ['/gpu:%i' % (i) for i in gpu_indices]
+    if alg == "better":
+        raw_devices = ["/gpu:%i" % (i) for i in gpu_indices]
         agg_grads = aggregate_gradients_using_copy_with_device_selection(
             tower_grads, raw_devices)
         for arr in tower_grads:
@@ -427,12 +427,12 @@ def sum_gradients_all_reduce(dev_prefixes,
 
 
 def print_stats(sizes):
-    def sizeof_fmt(num, suffix='B'):
-        for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
+    def sizeof_fmt(num, suffix="B"):
+        for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
             if abs(num) < 1024.0:
                 return "%3.1f%s%s" % (num, unit, suffix)
             num /= 1024.0
-        return "%.1f%s%s" % (num, 'Yi', suffix)
+        return "%.1f%s%s" % (num, "Yi", suffix)
 
     stats = {
         "avg": np.mean(sizes),
@@ -482,7 +482,7 @@ def extract_ranges(index_list, range_size_limit=32):
     return ranges, singles
 
 
-GradPackTuple = pycoll.namedtuple('GradPackTuple', 'indices vars shapes')
+GradPackTuple = pycoll.namedtuple("GradPackTuple", "indices vars shapes")
 
 
 def pack_range(key, packing, grad_vars, rng):
@@ -503,7 +503,7 @@ def pack_range(key, packing, grad_vars, rng):
     members = []
     variables = []
     restore_shapes = []
-    with tf.name_scope('pack'):
+    with tf.name_scope("pack"):
         for g, v in to_pack:
             variables.append(v)
             restore_shapes.append(g.shape)
@@ -531,7 +531,7 @@ def unpack_grad_tuple(gv, gpt):
   """
     elt_widths = [x.num_elements() for x in gpt.shapes]
     with tf.device(gv[0][0].device):
-        with tf.name_scope('unpack'):
+        with tf.name_scope("unpack"):
             splits = tf.split(gv[0], elt_widths)
             unpacked_gv = []
             for idx, s in enumerate(splits):
@@ -595,9 +595,9 @@ def pack_small_tensors(tower_grads, max_bytes=0):
                 "https://github.com/ray-project/ray/issues/3136")
             new_gv_list = []
             for r in small_ranges:
-                key = '%d:%d' % (dev_idx, len(new_gv_list))
+                key = "%d:%d" % (dev_idx, len(new_gv_list))
                 new_gv_list.append((pack_range(key, packing, gv_list, r),
-                                    'packing_var_placeholder'))
+                                    "packing_var_placeholder"))
             for i in large_indices:
                 new_gv_list.append(gv_list[i])
             new_tower_grads.append(new_gv_list)
@@ -627,7 +627,7 @@ def unpack_small_tensors(tower_grads, packing):
     for dev_idx, gv_list in enumerate(tower_grads):
         new_gv_list = gv_list[num_packed:]
         for i in xrange(0, num_packed):
-            k = '%d:%d' % (dev_idx, i)
+            k = "%d:%d" % (dev_idx, i)
             gpt = packing[k]
             gv = unpack_grad_tuple(gv_list[i], gpt)
             for gi, idx in enumerate(gpt.indices):
diff --git a/python/ray/experimental/sgd/tfbench/convnet_builder.py b/python/ray/experimental/sgd/tfbench/convnet_builder.py
index e59085e5f..4936fb10d 100644
--- a/python/ray/experimental/sgd/tfbench/convnet_builder.py
+++ b/python/ray/experimental/sgd/tfbench/convnet_builder.py
@@ -37,7 +37,7 @@ class ConvNetBuilder(object):
                  input_nchan,
                  phase_train,
                  use_tf_layers,
-                 data_format='NCHW',
+                 data_format="NCHW",
                  dtype=tf.float32,
                  variable_dtype=tf.float32):
         self.top_layer = input_op
@@ -49,9 +49,9 @@ class ConvNetBuilder(object):
         self.variable_dtype = variable_dtype
         self.counts = defaultdict(lambda: 0)
         self.use_batch_norm = False
-        self.batch_norm_config = {}  # 'decay': 0.997, 'scale': True}
-        self.channel_pos = ('channels_last'
-                            if data_format == 'NHWC' else 'channels_first')
+        self.batch_norm_config = {}  # "decay": 0.997, "scale": True}
+        self.channel_pos = ("channels_last"
+                            if data_format == "NHWC" else "channels_first")
         self.aux_top_layer = None
         self.aux_top_size = 0
 
@@ -63,7 +63,7 @@ class ConvNetBuilder(object):
 
     ```python
     network = ConvNetBuilder(...)
-    with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
+    with tf.variable_scope("cg", custom_getter=network.get_custom_getter()):
       network.conv(...)
       # Call more methods of network here
     ```
@@ -77,10 +77,10 @@ class ConvNetBuilder(object):
         def inner_custom_getter(getter, *args, **kwargs):
             if not self.use_tf_layers:
                 return getter(*args, **kwargs)
-            requested_dtype = kwargs['dtype']
+            requested_dtype = kwargs["dtype"]
             if not (requested_dtype == tf.float32
                     and self.variable_dtype == tf.float16):
-                kwargs['dtype'] = self.variable_dtype
+                kwargs["dtype"] = self.variable_dtype
             var = getter(*args, **kwargs)
             if var.dtype.base_dtype != requested_dtype:
                 var = tf.cast(var, requested_dtype)
@@ -92,7 +92,7 @@ class ConvNetBuilder(object):
     def switch_to_aux_top_layer(self):
         """Context that construct cnn in the auxiliary arm."""
         if self.aux_top_layer is None:
-            raise RuntimeError('Empty auxiliary top layer in the network.')
+            raise RuntimeError("Empty auxiliary top layer in the network.")
         saved_top_layer = self.top_layer
         saved_top_size = self.top_size
         self.top_layer = self.aux_top_layer
@@ -124,12 +124,12 @@ class ConvNetBuilder(object):
                 kernel_size[0], kernel_size[1], num_channels_in, filters
             ]
             weights = self.get_variable(
-                'conv2d/kernel',
+                "conv2d/kernel",
                 weights_shape,
                 self.variable_dtype,
                 self.dtype,
                 initializer=kernel_initializer)
-            if self.data_format == 'NHWC':
+            if self.data_format == "NHWC":
                 strides = [1] + strides + [1]
             else:
                 strides = [1, 1] + strides
@@ -146,12 +146,12 @@ class ConvNetBuilder(object):
              k_width,
              d_height=1,
              d_width=1,
-             mode='SAME',
+             mode="SAME",
              input_layer=None,
              num_channels_in=None,
              use_batch_norm=None,
              stddev=None,
-             activation='relu',
+             activation="relu",
              bias=0.0):
         """Construct a conv2d layer on top of cnn."""
         if input_layer is None:
@@ -161,13 +161,13 @@ class ConvNetBuilder(object):
         kernel_initializer = None
         if stddev is not None:
             kernel_initializer = tf.truncated_normal_initializer(stddev=stddev)
-        name = 'conv' + str(self.counts['conv'])
-        self.counts['conv'] += 1
+        name = "conv" + str(self.counts["conv"])
+        self.counts["conv"] += 1
         with tf.variable_scope(name):
             strides = [1, d_height, d_width, 1]
-            if self.data_format == 'NCHW':
+            if self.data_format == "NCHW":
                 strides = [strides[0], strides[3], strides[1], strides[2]]
-            if mode != 'SAME_RESNET':
+            if mode != "SAME_RESNET":
                 conv = self._conv2d_impl(
                     input_layer,
                     num_channels_in,
@@ -184,7 +184,7 @@ class ConvNetBuilder(object):
                         num_out_channels,
                         kernel_size=[k_height, k_width],
                         strides=[d_height, d_width],
-                        padding='SAME',
+                        padding="SAME",
                         kernel_initializer=kernel_initializer)
                 else:
                     rate = 1  # Unused (for 'a trous' convolutions)
@@ -198,7 +198,7 @@ class ConvNetBuilder(object):
                     pad_w_end = kernel_width_effective - 1 - pad_w_beg
                     padding = [[0, 0], [pad_h_beg, pad_h_end],
                                [pad_w_beg, pad_w_end], [0, 0]]
-                    if self.data_format == 'NCHW':
+                    if self.data_format == "NCHW":
                         padding = [
                             padding[0], padding[3], padding[1], padding[2]
                         ]
@@ -209,14 +209,14 @@ class ConvNetBuilder(object):
                         num_out_channels,
                         kernel_size=[k_height, k_width],
                         strides=[d_height, d_width],
-                        padding='VALID',
+                        padding="VALID",
                         kernel_initializer=kernel_initializer)
             if use_batch_norm is None:
                 use_batch_norm = self.use_batch_norm
             if not use_batch_norm:
                 if bias is not None:
                     biases = self.get_variable(
-                        'biases', [num_out_channels],
+                        "biases", [num_out_channels],
                         self.variable_dtype,
                         self.dtype,
                         initializer=tf.constant_initializer(bias))
@@ -230,14 +230,14 @@ class ConvNetBuilder(object):
                 self.top_layer = conv
                 self.top_size = num_out_channels
                 biased = self.batch_norm(**self.batch_norm_config)
-            if activation == 'relu':
+            if activation == "relu":
                 conv1 = tf.nn.relu(biased)
-            elif activation == 'linear' or activation is None:
+            elif activation == "linear" or activation is None:
                 conv1 = biased
-            elif activation == 'tanh':
+            elif activation == "tanh":
                 conv1 = tf.nn.tanh(biased)
             else:
-                raise KeyError('Invalid activation type \'%s\'' % activation)
+                raise KeyError("Invalid activation type \"%s\"" % activation)
             self.top_layer = conv1
             self.top_size = num_out_channels
             return conv1
@@ -258,7 +258,7 @@ class ConvNetBuilder(object):
                 data_format=self.channel_pos,
                 name=name)
         else:
-            if self.data_format == 'NHWC':
+            if self.data_format == "NHWC":
                 ksize = [1, k_height, k_width, 1]
                 strides = [1, d_height, d_width, 1]
             else:
@@ -279,11 +279,11 @@ class ConvNetBuilder(object):
               k_width,
               d_height=2,
               d_width=2,
-              mode='VALID',
+              mode="VALID",
               input_layer=None,
               num_channels_in=None):
         """Construct a max pooling layer."""
-        return self._pool('mpool', pooling_layers.max_pooling2d, k_height,
+        return self._pool("mpool", pooling_layers.max_pooling2d, k_height,
                           k_width, d_height, d_width, mode, input_layer,
                           num_channels_in)
 
@@ -292,11 +292,11 @@ class ConvNetBuilder(object):
               k_width,
               d_height=2,
               d_width=2,
-              mode='VALID',
+              mode="VALID",
               input_layer=None,
               num_channels_in=None):
         """Construct an average pooling layer."""
-        return self._pool('apool', pooling_layers.average_pooling2d, k_height,
+        return self._pool("apool", pooling_layers.average_pooling2d, k_height,
                           k_width, d_height, d_width, mode, input_layer,
                           num_channels_in)
 
@@ -313,33 +313,33 @@ class ConvNetBuilder(object):
                num_channels_in=None,
                bias=0.0,
                stddev=None,
-               activation='relu'):
+               activation="relu"):
         if input_layer is None:
             input_layer = self.top_layer
         if num_channels_in is None:
             num_channels_in = self.top_size
-        name = 'affine' + str(self.counts['affine'])
-        self.counts['affine'] += 1
+        name = "affine" + str(self.counts["affine"])
+        self.counts["affine"] += 1
         with tf.variable_scope(name):
-            init_factor = 2. if activation == 'relu' else 1.
+            init_factor = 2. if activation == "relu" else 1.
             stddev = stddev or np.sqrt(init_factor / num_channels_in)
             kernel = self.get_variable(
-                'weights', [num_channels_in, num_out_channels],
+                "weights", [num_channels_in, num_out_channels],
                 self.variable_dtype,
                 self.dtype,
                 initializer=tf.truncated_normal_initializer(stddev=stddev))
             biases = self.get_variable(
-                'biases', [num_out_channels],
+                "biases", [num_out_channels],
                 self.variable_dtype,
                 self.dtype,
                 initializer=tf.constant_initializer(bias))
             logits = tf.nn.xw_plus_b(input_layer, kernel, biases)
-            if activation == 'relu':
+            if activation == "relu":
                 affine1 = tf.nn.relu(logits, name=name)
-            elif activation == 'linear' or activation is None:
+            elif activation == "linear" or activation is None:
                 affine1 = logits
             else:
-                raise KeyError('Invalid activation type \'%s\'' % activation)
+                raise KeyError("Invalid activation type \"%s\"" % activation)
             self.top_layer = affine1
             self.top_size = num_out_channels
             return affine1
@@ -360,34 +360,34 @@ class ConvNetBuilder(object):
                 for lx, layer in enumerate(col):
                     ltype, args = layer[0], layer[1:]
                     kwargs = {
-                        'input_layer': input_layer,
-                        'num_channels_in': in_size
+                        "input_layer": input_layer,
+                        "num_channels_in": in_size
                     } if lx == 0 else {}
-                    if ltype == 'conv':
+                    if ltype == "conv":
                         self.conv(*args, **kwargs)
-                    elif ltype == 'mpool':
+                    elif ltype == "mpool":
                         self.mpool(*args, **kwargs)
-                    elif ltype == 'apool':
+                    elif ltype == "apool":
                         self.apool(*args, **kwargs)
-                    elif ltype == 'share':
+                    elif ltype == "share":
                         self.top_layer = col_layers[c - 1][lx]
                         self.top_size = col_layer_sizes[c - 1][lx]
                     else:
                         raise KeyError(
-                            'Invalid layer type for inception module: \'%s\'' %
+                            "Invalid layer type for inception module: \"%s\"" %
                             ltype)
                     col_layers[c].append(self.top_layer)
                     col_layer_sizes[c].append(self.top_size)
-            catdim = 3 if self.data_format == 'NHWC' else 1
+            catdim = 3 if self.data_format == "NHWC" else 1
             self.top_layer = tf.concat([layers[-1] for layers in col_layers],
                                        catdim)
             self.top_size = sum(sizes[-1] for sizes in col_layer_sizes)
             return self.top_layer
 
     def spatial_mean(self, keep_dims=False):
-        name = 'spatial_mean' + str(self.counts['spatial_mean'])
-        self.counts['spatial_mean'] += 1
-        axes = [1, 2] if self.data_format == 'NHWC' else [2, 3]
+        name = "spatial_mean" + str(self.counts["spatial_mean"])
+        self.counts["spatial_mean"] += 1
+        axes = [1, 2] if self.data_format == "NHWC" else [2, 3]
         self.top_layer = tf.reduce_mean(
             self.top_layer, axes, keep_dims=keep_dims, name=name)
         return self.top_layer
@@ -397,7 +397,7 @@ class ConvNetBuilder(object):
             input_layer = self.top_layer
         else:
             self.top_size = None
-        name = 'dropout' + str(self.counts['dropout'])
+        name = "dropout" + str(self.counts["dropout"])
         with tf.variable_scope(name):
             if not self.phase_train:
                 keep_prob = 1.0
@@ -412,27 +412,27 @@ class ConvNetBuilder(object):
                                    epsilon):
         """Batch normalization on `input_layer` without tf.layers."""
         shape = input_layer.shape
-        num_channels = shape[3] if self.data_format == 'NHWC' else shape[1]
+        num_channels = shape[3] if self.data_format == "NHWC" else shape[1]
         beta = self.get_variable(
-            'beta', [num_channels],
+            "beta", [num_channels],
             tf.float32,
             tf.float32,
             initializer=tf.zeros_initializer())
         if use_scale:
             gamma = self.get_variable(
-                'gamma', [num_channels],
+                "gamma", [num_channels],
                 tf.float32,
                 tf.float32,
                 initializer=tf.ones_initializer())
         else:
             gamma = tf.constant(1.0, tf.float32, [num_channels])
         moving_mean = tf.get_variable(
-            'moving_mean', [num_channels],
+            "moving_mean", [num_channels],
             tf.float32,
             initializer=tf.zeros_initializer(),
             trainable=False)
         moving_variance = tf.get_variable(
-            'moving_variance', [num_channels],
+            "moving_variance", [num_channels],
             tf.float32,
             initializer=tf.ones_initializer(),
             trainable=False)
@@ -475,8 +475,8 @@ class ConvNetBuilder(object):
             input_layer = self.top_layer
         else:
             self.top_size = None
-        name = 'batchnorm' + str(self.counts['batchnorm'])
-        self.counts['batchnorm'] += 1
+        name = "batchnorm" + str(self.counts["batchnorm"])
+        self.counts["batchnorm"] += 1
 
         with tf.variable_scope(name) as scope:
             if self.use_tf_layers:
@@ -494,14 +494,14 @@ class ConvNetBuilder(object):
                                                      epsilon)
         self.top_layer = bn
         self.top_size = bn.shape[
-            3] if self.data_format == 'NHWC' else bn.shape[1]
+            3] if self.data_format == "NHWC" else bn.shape[1]
         self.top_size = int(self.top_size)
         return bn
 
     def lrn(self, depth_radius, bias, alpha, beta):
         """Adds a local response normalization layer."""
-        name = 'lrn' + str(self.counts['lrn'])
-        self.counts['lrn'] += 1
+        name = "lrn" + str(self.counts["lrn"])
+        self.counts["lrn"] += 1
         self.top_layer = tf.nn.lrn(
             self.top_layer, depth_radius, bias, alpha, beta, name=name)
         return self.top_layer
diff --git a/python/ray/experimental/sgd/tfbench/model.py b/python/ray/experimental/sgd/tfbench/model.py
index c33c50247..c5ebb30d8 100644
--- a/python/ray/experimental/sgd/tfbench/model.py
+++ b/python/ray/experimental/sgd/tfbench/model.py
@@ -63,7 +63,7 @@ class Model(object):
         return self.learning_rate
 
     def add_inference(self, unused_cnn):
-        raise ValueError('Must be implemented in derived classes')
+        raise ValueError("Must be implemented in derived classes")
 
     def skip_final_affine_layer(self):
         """Returns if the caller of this class should skip the final affine
@@ -82,11 +82,11 @@ class Model(object):
                       nclass=1001,
                       image_depth=3,
                       data_type=tf.float32,
-                      data_format='NCHW',
+                      data_format="NCHW",
                       use_tf_layers=True,
                       fp16_vars=False):
         """Returns logits and aux_logits from images."""
-        if data_format == 'NCHW':
+        if data_format == "NCHW":
             images = tf.transpose(images, [0, 3, 1, 2])
         var_type = tf.float32
         if data_type == tf.float16 and fp16_vars:
@@ -95,17 +95,17 @@ class Model(object):
             images, image_depth, phase_train, use_tf_layers, data_format,
             data_type, var_type)
         with tf.variable_scope(
-                'cg', custom_getter=network.get_custom_getter()):
+                "cg", custom_getter=network.get_custom_getter()):
             self.add_inference(network)
             # Add the final fully-connected class layer
-            logits = (network.affine(nclass, activation='linear')
+            logits = (network.affine(nclass, activation="linear")
                       if not self.skip_final_affine_layer() else
                       network.top_layer)
             aux_logits = None
             if network.aux_top_layer is not None:
                 with network.switch_to_aux_top_layer():
                     aux_logits = network.affine(
-                        nclass, activation='linear', stddev=0.001)
+                        nclass, activation="linear", stddev=0.001)
         if data_type == tf.float16:
             # TODO(reedwm): Determine if we should do this cast here.
             logits = tf.cast(logits, tf.float32)
diff --git a/python/ray/experimental/sgd/tfbench/model_config.py b/python/ray/experimental/sgd/tfbench/model_config.py
index 387bc0345..ffa2f9064 100644
--- a/python/ray/experimental/sgd/tfbench/model_config.py
+++ b/python/ray/experimental/sgd/tfbench/model_config.py
@@ -18,31 +18,31 @@
 from . import resnet_model
 
 _model_name_to_imagenet_model = {
-    'resnet50': resnet_model.create_resnet50_model,
-    'resnet50_v2': resnet_model.create_resnet50_v2_model,
-    'resnet101': resnet_model.create_resnet101_model,
-    'resnet101_v2': resnet_model.create_resnet101_v2_model,
-    'resnet152': resnet_model.create_resnet152_model,
-    'resnet152_v2': resnet_model.create_resnet152_v2_model,
+    "resnet50": resnet_model.create_resnet50_model,
+    "resnet50_v2": resnet_model.create_resnet50_v2_model,
+    "resnet101": resnet_model.create_resnet101_model,
+    "resnet101_v2": resnet_model.create_resnet101_v2_model,
+    "resnet152": resnet_model.create_resnet152_model,
+    "resnet152_v2": resnet_model.create_resnet152_v2_model,
 }
 
 _model_name_to_cifar_model = {}
 
 
 def _get_model_map(dataset_name):
-    if 'cifar10' == dataset_name:
+    if "cifar10" == dataset_name:
         return _model_name_to_cifar_model
-    elif dataset_name in ('imagenet', 'synthetic'):
+    elif dataset_name in ("imagenet", "synthetic"):
         return _model_name_to_imagenet_model
     else:
-        raise ValueError('Invalid dataset name: %s' % dataset_name)
+        raise ValueError("Invalid dataset name: %s" % dataset_name)
 
 
 def get_model_config(model_name, dataset):
     """Map model name to model network configuration."""
     model_map = _get_model_map(dataset.name)
     if model_name not in model_map:
-        raise ValueError('Invalid model name \'%s\' for dataset \'%s\'' %
+        raise ValueError("Invalid model name \"%s\" for dataset \"%s\"" %
                          (model_name, dataset.name))
     else:
         return model_map[model_name]()
@@ -52,6 +52,6 @@ def register_model(model_name, dataset_name, model_func):
     """Register a new model that can be obtained with `get_model_config`."""
     model_map = _get_model_map(dataset_name)
     if model_name in model_map:
-        raise ValueError('Model "%s" is already registered for dataset "%s"' %
-                         (model_name, dataset_name))
+        raise ValueError("Model \"%s\" is already registered for dataset"
+                         "\"%s\"" % (model_name, dataset_name))
     model_map[model_name] = model_func
diff --git a/python/ray/experimental/sgd/tfbench/resnet_model.py b/python/ray/experimental/sgd/tfbench/resnet_model.py
index 59052ed57..d89f7ee7f 100644
--- a/python/ray/experimental/sgd/tfbench/resnet_model.py
+++ b/python/ray/experimental/sgd/tfbench/resnet_model.py
@@ -47,7 +47,7 @@ def bottleneck_block_v1(cnn, depth, depth_bottleneck, stride):
   """
     input_layer = cnn.top_layer
     in_size = cnn.top_size
-    name_key = 'resnet_v1'
+    name_key = "resnet_v1"
     name = name_key + str(cnn.counts[name_key])
     cnn.counts[name_key] += 1
 
@@ -91,7 +91,7 @@ def bottleneck_block_v1(cnn, depth, depth_bottleneck, stride):
             3,
             1,
             1,
-            mode='SAME_RESNET',
+            mode="SAME_RESNET",
             use_batch_norm=True,
             bias=None)
         res = cnn.conv(
@@ -116,7 +116,7 @@ def bottleneck_block_v2(cnn, depth, depth_bottleneck, stride):
   """
     input_layer = cnn.top_layer
     in_size = cnn.top_size
-    name_key = 'resnet_v2'
+    name_key = "resnet_v2"
     name = name_key + str(cnn.counts[name_key])
     cnn.counts[name_key] += 1
 
@@ -162,7 +162,7 @@ def bottleneck_block_v2(cnn, depth, depth_bottleneck, stride):
             3,
             1,
             1,
-            mode='SAME_RESNET',
+            mode="SAME_RESNET",
             use_batch_norm=True,
             bias=None)
         res = cnn.conv(
@@ -216,7 +216,7 @@ def residual_block(cnn, depth, stride, pre_activation):
             input_layer=input_layer,
             num_channels_in=in_size)
         padding = (depth - in_size) // 2
-        if cnn.channel_pos == 'channels_last':
+        if cnn.channel_pos == "channels_last":
             shortcut = tf.pad(shortcut,
                               [[0, 0], [0, 0], [0, 0], [padding, padding]])
         else:
@@ -263,30 +263,30 @@ class ResnetModel(model_lib.Model):
 
     def __init__(self, model, layer_counts):
         default_batch_sizes = {
-            'resnet50': 64,
-            'resnet101': 32,
-            'resnet152': 32,
-            'resnet50_v2': 64,
-            'resnet101_v2': 32,
-            'resnet152_v2': 32,
+            "resnet50": 64,
+            "resnet101": 32,
+            "resnet152": 32,
+            "resnet50_v2": 64,
+            "resnet101_v2": 32,
+            "resnet152_v2": 32,
         }
         batch_size = default_batch_sizes.get(model, 32)
         super(ResnetModel, self).__init__(model, 224, batch_size, 0.005,
                                           layer_counts)
-        self.pre_activation = 'v2' in model
+        self.pre_activation = "v2" in model
 
     def add_inference(self, cnn):
         if self.layer_counts is None:
             raise ValueError(
-                'Layer counts not specified for %s' % self.get_model())
+                "Layer counts not specified for %s" % self.get_model())
         cnn.use_batch_norm = True
         cnn.batch_norm_config = {
-            'decay': 0.997,
-            'epsilon': 1e-5,
-            'scale': True
+            "decay": 0.997,
+            "epsilon": 1e-5,
+            "scale": True
         }
-        cnn.conv(64, 7, 7, 2, 2, mode='SAME_RESNET', use_batch_norm=True)
-        cnn.mpool(3, 3, 2, 2, mode='SAME')
+        cnn.conv(64, 7, 7, 2, 2, mode="SAME_RESNET", use_batch_norm=True)
+        cnn.mpool(3, 3, 2, 2, mode="SAME")
         for _ in xrange(self.layer_counts[0]):
             bottleneck_block(cnn, 256, 64, 1, self.pre_activation)
         for i in xrange(self.layer_counts[1]):
@@ -308,27 +308,27 @@ class ResnetModel(model_lib.Model):
 
 
 def create_resnet50_model():
-    return ResnetModel('resnet50', (3, 4, 6, 3))
+    return ResnetModel("resnet50", (3, 4, 6, 3))
 
 
 def create_resnet50_v2_model():
-    return ResnetModel('resnet50_v2', (3, 4, 6, 3))
+    return ResnetModel("resnet50_v2", (3, 4, 6, 3))
 
 
 def create_resnet101_model():
-    return ResnetModel('resnet101', (3, 4, 23, 3))
+    return ResnetModel("resnet101", (3, 4, 23, 3))
 
 
 def create_resnet101_v2_model():
-    return ResnetModel('resnet101_v2', (3, 4, 23, 3))
+    return ResnetModel("resnet101_v2", (3, 4, 23, 3))
 
 
 def create_resnet152_model():
-    return ResnetModel('resnet152', (3, 8, 36, 3))
+    return ResnetModel("resnet152", (3, 8, 36, 3))
 
 
 def create_resnet152_v2_model():
-    return ResnetModel('resnet152_v2', (3, 8, 36, 3))
+    return ResnetModel("resnet152_v2", (3, 8, 36, 3))
 
 
 class ResnetCifar10Model(model_lib.Model):
@@ -342,17 +342,17 @@ class ResnetCifar10Model(model_lib.Model):
   """
 
     def __init__(self, model, layer_counts):
-        self.pre_activation = 'v2' in model
+        self.pre_activation = "v2" in model
         super(ResnetCifar10Model, self).__init__(model, 32, 128, 0.1,
                                                  layer_counts)
 
     def add_inference(self, cnn):
         if self.layer_counts is None:
             raise ValueError(
-                'Layer counts not specified for %s' % self.get_model())
+                "Layer counts not specified for %s" % self.get_model())
 
         cnn.use_batch_norm = True
-        cnn.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
+        cnn.batch_norm_config = {"decay": 0.9, "epsilon": 1e-5, "scale": True}
         if self.pre_activation:
             cnn.conv(16, 3, 3, 1, 1, use_batch_norm=True)
         else:
@@ -383,40 +383,40 @@ class ResnetCifar10Model(model_lib.Model):
 
 
 def create_resnet20_cifar_model():
-    return ResnetCifar10Model('resnet20', (3, 3, 3))
+    return ResnetCifar10Model("resnet20", (3, 3, 3))
 
 
 def create_resnet20_v2_cifar_model():
-    return ResnetCifar10Model('resnet20_v2', (3, 3, 3))
+    return ResnetCifar10Model("resnet20_v2", (3, 3, 3))
 
 
 def create_resnet32_cifar_model():
-    return ResnetCifar10Model('resnet32_v2', (5, 5, 5))
+    return ResnetCifar10Model("resnet32_v2", (5, 5, 5))
 
 
 def create_resnet32_v2_cifar_model():
-    return ResnetCifar10Model('resnet32_v2', (5, 5, 5))
+    return ResnetCifar10Model("resnet32_v2", (5, 5, 5))
 
 
 def create_resnet44_cifar_model():
-    return ResnetCifar10Model('resnet44', (7, 7, 7))
+    return ResnetCifar10Model("resnet44", (7, 7, 7))
 
 
 def create_resnet44_v2_cifar_model():
-    return ResnetCifar10Model('resnet44_v2', (7, 7, 7))
+    return ResnetCifar10Model("resnet44_v2", (7, 7, 7))
 
 
 def create_resnet56_cifar_model():
-    return ResnetCifar10Model('resnet56', (9, 9, 9))
+    return ResnetCifar10Model("resnet56", (9, 9, 9))
 
 
 def create_resnet56_v2_cifar_model():
-    return ResnetCifar10Model('resnet56_v2', (9, 9, 9))
+    return ResnetCifar10Model("resnet56_v2", (9, 9, 9))
 
 
 def create_resnet110_cifar_model():
-    return ResnetCifar10Model('resnet110', (18, 18, 18))
+    return ResnetCifar10Model("resnet110", (18, 18, 18))
 
 
 def create_resnet110_v2_cifar_model():
-    return ResnetCifar10Model('resnet110_v2', (18, 18, 18))
+    return ResnetCifar10Model("resnet110_v2", (18, 18, 18))
diff --git a/python/ray/experimental/sgd/tfbench/test_model.py b/python/ray/experimental/sgd/tfbench/test_model.py
index ab625143c..99900c43b 100644
--- a/python/ray/experimental/sgd/tfbench/test_model.py
+++ b/python/ray/experimental/sgd/tfbench/test_model.py
@@ -25,17 +25,17 @@ class TFBenchModel(Model):
             dtype=tf.float32,
             mean=127,
             stddev=60,
-            name='synthetic_images')
+            name="synthetic_images")
 
         # Minor hack to avoid H2D copy when using synthetic data
         inputs = tf.contrib.framework.local_variable(
-            images, name='gpu_cached_images')
+            images, name="gpu_cached_images")
         labels = tf.random_uniform(
             labels_shape,
             minval=0,
             maxval=999,
             dtype=tf.int32,
-            name='synthetic_labels')
+            name="synthetic_labels")
 
         model = model_config.get_model_config("resnet101", MockDataset())
         logits, aux = model.build_network(
@@ -44,7 +44,7 @@ class TFBenchModel(Model):
             logits=logits, labels=labels)
 
         # Implement model interface
-        self.loss = tf.reduce_mean(loss, name='xentropy-loss')
+        self.loss = tf.reduce_mean(loss, name="xentropy-loss")
         self.optimizer = tf.train.GradientDescentOptimizer(1e-6)
 
         self.variables = ray_tf_utils.TensorFlowVariables(
diff --git a/python/ray/ray_constants.py b/python/ray/ray_constants.py
index da9a26e98..6fd7d5b64 100644
--- a/python/ray/ray_constants.py
+++ b/python/ray/ray_constants.py
@@ -100,7 +100,7 @@ LOGGER_FORMAT = (
     "%(asctime)s\t%(levelname)s %(filename)s:%(lineno)s -- %(message)s")
 LOGGER_FORMAT_HELP = "The logging format. default='{}'".format(LOGGER_FORMAT)
 LOGGER_LEVEL = "info"
-LOGGER_LEVEL_CHOICES = ['debug', 'info', 'warning', 'error', 'critical']
+LOGGER_LEVEL_CHOICES = ["debug", "info", "warning", "error", "critical"]
 LOGGER_LEVEL_HELP = ("The logging level threshold, choices=['debug', 'info',"
                      " 'warning', 'error', 'critical'], default='info'")
 
diff --git a/python/ray/remote_function.py b/python/ray/remote_function.py
index dff3c7801..c26dd5884 100644
--- a/python/ray/remote_function.py
+++ b/python/ray/remote_function.py
@@ -43,7 +43,7 @@ class RemoteFunction(object):
         self._function = function
         self._function_descriptor = FunctionDescriptor.from_function(function)
         self._function_name = (
-            self._function.__module__ + '.' + self._function.__name__)
+            self._function.__module__ + "." + self._function.__name__)
         self._num_cpus = (DEFAULT_REMOTE_FUNCTION_CPUS
                           if num_cpus is None else num_cpus)
         self._num_gpus = num_gpus
diff --git a/python/ray/rllib/agents/ars/ars.py b/python/ray/rllib/agents/ars/ars.py
index 9136bf393..b91e4ef9f 100644
--- a/python/ray/rllib/agents/ars/ars.py
+++ b/python/ray/rllib/agents/ars/ars.py
@@ -108,7 +108,7 @@ class Worker(object):
             self.env,
             timestep_limit=timestep_limit,
             add_noise=add_noise,
-            offset=self.config['offset'])
+            offset=self.config["offset"])
         return rollout_rewards, rollout_length
 
     def do_rollouts(self, params, timestep_limit=None):
diff --git a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
index bd05a843c..ad4f879e7 100644
--- a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
+++ b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
@@ -563,7 +563,7 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
         # No need to add any noise on LayerNorm parameters
         for var in pnet_params:
             noise_var = tf.get_variable(
-                name=var.name.split(':')[0] + "_noise",
+                name=var.name.split(":")[0] + "_noise",
                 shape=var.shape,
                 initializer=tf.constant_initializer(.0),
                 trainable=False)
diff --git a/python/ray/rllib/agents/dqn/dqn_policy_graph.py b/python/ray/rllib/agents/dqn/dqn_policy_graph.py
index 1b5bb4624..6a226d237 100644
--- a/python/ray/rllib/agents/dqn/dqn_policy_graph.py
+++ b/python/ray/rllib/agents/dqn/dqn_policy_graph.py
@@ -524,7 +524,7 @@ class DQNPolicyGraph(LearningRateSchedule, DQNPostprocessing, TFPolicyGraph):
         # No need to add any noise on LayerNorm parameters
         for var in pnet_params:
             noise_var = tf.get_variable(
-                name=var.name.split(':')[0] + "_noise",
+                name=var.name.split(":")[0] + "_noise",
                 shape=var.shape,
                 initializer=tf.constant_initializer(.0),
                 trainable=False)
diff --git a/python/ray/rllib/agents/impala/vtrace.py b/python/ray/rllib/agents/impala/vtrace.py
index 4a5c8a4a6..238b30d99 100644
--- a/python/ray/rllib/agents/impala/vtrace.py
+++ b/python/ray/rllib/agents/impala/vtrace.py
@@ -38,12 +38,12 @@ import tensorflow as tf
 
 nest = tf.contrib.framework.nest
 
-VTraceFromLogitsReturns = collections.namedtuple('VTraceFromLogitsReturns', [
-    'vs', 'pg_advantages', 'log_rhos', 'behaviour_action_log_probs',
-    'target_action_log_probs'
+VTraceFromLogitsReturns = collections.namedtuple("VTraceFromLogitsReturns", [
+    "vs", "pg_advantages", "log_rhos", "behaviour_action_log_probs",
+    "target_action_log_probs"
 ])
 
-VTraceReturns = collections.namedtuple('VTraceReturns', 'vs pg_advantages')
+VTraceReturns = collections.namedtuple("VTraceReturns", "vs pg_advantages")
 
 
 def log_probs_from_logits_and_actions(policy_logits, actions):
@@ -100,7 +100,7 @@ def from_logits(behaviour_policy_logits,
                 bootstrap_value,
                 clip_rho_threshold=1.0,
                 clip_pg_rho_threshold=1.0,
-                name='vtrace_from_logits'):
+                name="vtrace_from_logits"):
     """multi_from_logits wrapper used only for tests"""
 
     res = multi_from_logits(
@@ -133,7 +133,7 @@ def multi_from_logits(behaviour_policy_logits,
                       bootstrap_value,
                       clip_rho_threshold=1.0,
                       clip_pg_rho_threshold=1.0,
-                      name='vtrace_from_logits'):
+                      name="vtrace_from_logits"):
     r"""V-trace for softmax policies.
 
   Calculates V-trace actor critic targets for softmax polices as described in
@@ -251,7 +251,7 @@ def from_importance_weights(log_rhos,
                             bootstrap_value,
                             clip_rho_threshold=1.0,
                             clip_pg_rho_threshold=1.0,
-                            name='vtrace_from_importance_weights'):
+                            name="vtrace_from_importance_weights"):
     r"""V-trace from log importance weights.
 
   Calculates V-trace actor critic targets as described in
@@ -323,19 +323,19 @@ def from_importance_weights(log_rhos,
         rhos = tf.exp(log_rhos)
         if clip_rho_threshold is not None:
             clipped_rhos = tf.minimum(
-                clip_rho_threshold, rhos, name='clipped_rhos')
+                clip_rho_threshold, rhos, name="clipped_rhos")
 
-            tf.summary.histogram('clipped_rhos_1000', tf.minimum(1000.0, rhos))
+            tf.summary.histogram("clipped_rhos_1000", tf.minimum(1000.0, rhos))
             tf.summary.scalar(
-                'num_of_clipped_rhos',
+                "num_of_clipped_rhos",
                 tf.reduce_sum(
                     tf.cast(
                         tf.equal(clipped_rhos, clip_rho_threshold), tf.int32)))
-            tf.summary.scalar('size_of_clipped_rhos', tf.size(clipped_rhos))
+            tf.summary.scalar("size_of_clipped_rhos", tf.size(clipped_rhos))
         else:
             clipped_rhos = rhos
 
-        cs = tf.minimum(1.0, rhos, name='cs')
+        cs = tf.minimum(1.0, rhos, name="cs")
         # Append bootstrapped value to get [v1, ..., v_t+1]
         values_t_plus_1 = tf.concat(
             [values[1:], tf.expand_dims(bootstrap_value, 0)], axis=0)
@@ -362,19 +362,19 @@ def from_importance_weights(log_rhos,
             initializer=initial_values,
             parallel_iterations=1,
             back_prop=False,
-            name='scan')
+            name="scan")
         # Reverse the results back to original order.
-        vs_minus_v_xs = tf.reverse(vs_minus_v_xs, [0], name='vs_minus_v_xs')
+        vs_minus_v_xs = tf.reverse(vs_minus_v_xs, [0], name="vs_minus_v_xs")
 
         # Add V(x_s) to get v_s.
-        vs = tf.add(vs_minus_v_xs, values, name='vs')
+        vs = tf.add(vs_minus_v_xs, values, name="vs")
 
         # Advantage for policy gradient.
         vs_t_plus_1 = tf.concat(
             [vs[1:], tf.expand_dims(bootstrap_value, 0)], axis=0)
         if clip_pg_rho_threshold is not None:
             clipped_pg_rhos = tf.minimum(
-                clip_pg_rho_threshold, rhos, name='clipped_pg_rhos')
+                clip_pg_rho_threshold, rhos, name="clipped_pg_rhos")
         else:
             clipped_pg_rhos = rhos
         pg_advantages = (
diff --git a/python/ray/rllib/agents/impala/vtrace_test.py b/python/ray/rllib/agents/impala/vtrace_test.py
index f74798fff..145ed4e7a 100644
--- a/python/ray/rllib/agents/impala/vtrace_test.py
+++ b/python/ray/rllib/agents/impala/vtrace_test.py
@@ -85,7 +85,7 @@ def _ground_truth_calculation(discounts, log_rhos, rewards, values,
 
 class LogProbsFromLogitsAndActionsTest(tf.test.TestCase,
                                        parameterized.TestCase):
-    @parameterized.named_parameters(('Batch1', 1), ('Batch2', 2))
+    @parameterized.named_parameters(("Batch1", 1), ("Batch2", 2))
     def test_log_probs_from_logits_and_actions(self, batch_size):
         """Tests log_probs_from_logits_and_actions."""
         seq_len = 7
@@ -117,7 +117,7 @@ class LogProbsFromLogitsAndActionsTest(tf.test.TestCase,
 
 
 class VtraceTest(tf.test.TestCase, parameterized.TestCase):
-    @parameterized.named_parameters(('Batch1', 1), ('Batch5', 5))
+    @parameterized.named_parameters(("Batch1", 1), ("Batch5", 5))
     def test_vtrace(self, batch_size):
         """Tests V-trace against ground truth data calculated in python."""
         seq_len = 5
@@ -129,15 +129,15 @@ class VtraceTest(tf.test.TestCase, parameterized.TestCase):
         log_rhos = _shaped_arange(seq_len, batch_size) / (batch_size * seq_len)
         log_rhos = 5 * (log_rhos - 0.5)  # [0.0, 1.0) -> [-2.5, 2.5).
         values = {
-            'log_rhos': log_rhos,
+            "log_rhos": log_rhos,
             # T, B where B_i: [0.9 / (i+1)] * T
-            'discounts': np.array([[0.9 / (b + 1) for b in range(batch_size)]
+            "discounts": np.array([[0.9 / (b + 1) for b in range(batch_size)]
                                    for _ in range(seq_len)]),
-            'rewards': _shaped_arange(seq_len, batch_size),
-            'values': _shaped_arange(seq_len, batch_size) / batch_size,
-            'bootstrap_value': _shaped_arange(batch_size) + 1.0,
-            'clip_rho_threshold': 3.7,
-            'clip_pg_rho_threshold': 2.2,
+            "rewards": _shaped_arange(seq_len, batch_size),
+            "values": _shaped_arange(seq_len, batch_size) / batch_size,
+            "bootstrap_value": _shaped_arange(batch_size) + 1.0,
+            "clip_rho_threshold": 3.7,
+            "clip_pg_rho_threshold": 2.2,
         }
 
         output = vtrace.from_importance_weights(**values)
@@ -149,7 +149,7 @@ class VtraceTest(tf.test.TestCase, parameterized.TestCase):
         for a, b in zip(ground_truth_v, output_v):
             self.assertAllClose(a, b)
 
-    @parameterized.named_parameters(('Batch1', 1), ('Batch2', 2))
+    @parameterized.named_parameters(("Batch1", 1), ("Batch2", 2))
     def test_vtrace_from_logits(self, batch_size):
         """Tests V-trace calculated from logits."""
         seq_len = 5
@@ -161,16 +161,16 @@ class VtraceTest(tf.test.TestCase, parameterized.TestCase):
         # deal with that.
         placeholders = {
             # T, B, NUM_ACTIONS
-            'behaviour_policy_logits': tf.placeholder(
+            "behaviour_policy_logits": tf.placeholder(
                 dtype=tf.float32, shape=[None, None, None]),
             # T, B, NUM_ACTIONS
-            'target_policy_logits': tf.placeholder(
+            "target_policy_logits": tf.placeholder(
                 dtype=tf.float32, shape=[None, None, None]),
-            'actions': tf.placeholder(dtype=tf.int32, shape=[None, None]),
-            'discounts': tf.placeholder(dtype=tf.float32, shape=[None, None]),
-            'rewards': tf.placeholder(dtype=tf.float32, shape=[None, None]),
-            'values': tf.placeholder(dtype=tf.float32, shape=[None, None]),
-            'bootstrap_value': tf.placeholder(dtype=tf.float32, shape=[None]),
+            "actions": tf.placeholder(dtype=tf.int32, shape=[None, None]),
+            "discounts": tf.placeholder(dtype=tf.float32, shape=[None, None]),
+            "rewards": tf.placeholder(dtype=tf.float32, shape=[None, None]),
+            "values": tf.placeholder(dtype=tf.float32, shape=[None, None]),
+            "bootstrap_value": tf.placeholder(dtype=tf.float32, shape=[None]),
         }
 
         from_logits_output = vtrace.from_logits(
@@ -179,25 +179,25 @@ class VtraceTest(tf.test.TestCase, parameterized.TestCase):
             **placeholders)
 
         target_log_probs = vtrace.log_probs_from_logits_and_actions(
-            placeholders['target_policy_logits'], placeholders['actions'])
+            placeholders["target_policy_logits"], placeholders["actions"])
         behaviour_log_probs = vtrace.log_probs_from_logits_and_actions(
-            placeholders['behaviour_policy_logits'], placeholders['actions'])
+            placeholders["behaviour_policy_logits"], placeholders["actions"])
         log_rhos = target_log_probs - behaviour_log_probs
         ground_truth = (log_rhos, behaviour_log_probs, target_log_probs)
 
         values = {
-            'behaviour_policy_logits': _shaped_arange(seq_len, batch_size,
+            "behaviour_policy_logits": _shaped_arange(seq_len, batch_size,
                                                       num_actions),
-            'target_policy_logits': _shaped_arange(seq_len, batch_size,
+            "target_policy_logits": _shaped_arange(seq_len, batch_size,
                                                    num_actions),
-            'actions': np.random.randint(
+            "actions": np.random.randint(
                 0, num_actions - 1, size=(seq_len, batch_size)),
-            'discounts': np.array(  # T, B where B_i: [0.9 / (i+1)] * T
+            "discounts": np.array(  # T, B where B_i: [0.9 / (i+1)] * T
                 [[0.9 / (b + 1) for b in range(batch_size)]
                  for _ in range(seq_len)]),
-            'rewards': _shaped_arange(seq_len, batch_size),
-            'values': _shaped_arange(seq_len, batch_size) / batch_size,
-            'bootstrap_value': _shaped_arange(batch_size) + 1.0,  # B
+            "rewards": _shaped_arange(seq_len, batch_size),
+            "values": _shaped_arange(seq_len, batch_size) / batch_size,
+            "bootstrap_value": _shaped_arange(batch_size) + 1.0,  # B
         }
 
         feed_dict = {placeholders[k]: v for k, v in values.items()}
@@ -211,10 +211,10 @@ class VtraceTest(tf.test.TestCase, parameterized.TestCase):
         # Calculate V-trace using the ground truth logits.
         from_iw = vtrace.from_importance_weights(
             log_rhos=ground_truth_log_rhos,
-            discounts=values['discounts'],
-            rewards=values['rewards'],
-            values=values['values'],
-            bootstrap_value=values['bootstrap_value'],
+            discounts=values["discounts"],
+            rewards=values["rewards"],
+            values=values["values"],
+            bootstrap_value=values["bootstrap_value"],
             clip_rho_threshold=clip_rho_threshold,
             clip_pg_rho_threshold=clip_pg_rho_threshold)
 
@@ -234,14 +234,14 @@ class VtraceTest(tf.test.TestCase, parameterized.TestCase):
     def test_higher_rank_inputs_for_importance_weights(self):
         """Checks support for additional dimensions in inputs."""
         placeholders = {
-            'log_rhos': tf.placeholder(
+            "log_rhos": tf.placeholder(
                 dtype=tf.float32, shape=[None, None, 1]),
-            'discounts': tf.placeholder(
+            "discounts": tf.placeholder(
                 dtype=tf.float32, shape=[None, None, 1]),
-            'rewards': tf.placeholder(
+            "rewards": tf.placeholder(
                 dtype=tf.float32, shape=[None, None, 42]),
-            'values': tf.placeholder(dtype=tf.float32, shape=[None, None, 42]),
-            'bootstrap_value': tf.placeholder(
+            "values": tf.placeholder(dtype=tf.float32, shape=[None, None, 42]),
+            "bootstrap_value": tf.placeholder(
                 dtype=tf.float32, shape=[None, 42])
         }
         output = vtrace.from_importance_weights(**placeholders)
@@ -250,19 +250,19 @@ class VtraceTest(tf.test.TestCase, parameterized.TestCase):
     def test_inconsistent_rank_inputs_for_importance_weights(self):
         """Test one of many possible errors in shape of inputs."""
         placeholders = {
-            'log_rhos': tf.placeholder(
+            "log_rhos": tf.placeholder(
                 dtype=tf.float32, shape=[None, None, 1]),
-            'discounts': tf.placeholder(
+            "discounts": tf.placeholder(
                 dtype=tf.float32, shape=[None, None, 1]),
-            'rewards': tf.placeholder(
+            "rewards": tf.placeholder(
                 dtype=tf.float32, shape=[None, None, 42]),
-            'values': tf.placeholder(dtype=tf.float32, shape=[None, None, 42]),
+            "values": tf.placeholder(dtype=tf.float32, shape=[None, None, 42]),
             # Should be [None, 42].
-            'bootstrap_value': tf.placeholder(dtype=tf.float32, shape=[None])
+            "bootstrap_value": tf.placeholder(dtype=tf.float32, shape=[None])
         }
-        with self.assertRaisesRegexp(ValueError, 'must have rank 2'):
+        with self.assertRaisesRegexp(ValueError, "must have rank 2"):
             vtrace.from_importance_weights(**placeholders)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     tf.test.main()
diff --git a/python/ray/rllib/agents/mock.py b/python/ray/rllib/agents/mock.py
index 560573af7..8cd8c08b4 100644
--- a/python/ray/rllib/agents/mock.py
+++ b/python/ray/rllib/agents/mock.py
@@ -36,12 +36,12 @@ class _MockTrainer(Trainer):
 
     def _save(self, checkpoint_dir):
         path = os.path.join(checkpoint_dir, "mock_agent.pkl")
-        with open(path, 'wb') as f:
+        with open(path, "wb") as f:
             pickle.dump(self.info, f)
         return path
 
     def _restore(self, checkpoint_path):
-        with open(checkpoint_path, 'rb') as f:
+        with open(checkpoint_path, "rb") as f:
             info = pickle.load(f)
         self.info = info
         self.restored = True
diff --git a/python/ray/rllib/env/atari_wrappers.py b/python/ray/rllib/env/atari_wrappers.py
index e222a8140..347bc6167 100644
--- a/python/ray/rllib/env/atari_wrappers.py
+++ b/python/ray/rllib/env/atari_wrappers.py
@@ -85,7 +85,7 @@ class NoopResetEnv(gym.Wrapper):
         self.noop_max = noop_max
         self.override_num_noops = None
         self.noop_action = 0
-        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
+        assert env.unwrapped.get_action_meanings()[0] == "NOOP"
 
     def reset(self, **kwargs):
         """ Do no-op action for a number of steps in [1, noop_max]."""
@@ -121,7 +121,7 @@ class FireResetEnv(gym.Wrapper):
 
         For environments that are fixed until firing."""
         gym.Wrapper.__init__(self, env)
-        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
+        assert env.unwrapped.get_action_meanings()[1] == "FIRE"
         assert len(env.unwrapped.get_action_meanings()) >= 3
 
     def reset(self, **kwargs):
@@ -278,10 +278,10 @@ def wrap_deepmind(env, dim=84, framestack=True):
     """
     env = MonitorEnv(env)
     env = NoopResetEnv(env, noop_max=30)
-    if 'NoFrameskip' in env.spec.id:
+    if "NoFrameskip" in env.spec.id:
         env = MaxAndSkipEnv(env, skip=4)
     env = EpisodicLifeEnv(env)
-    if 'FIRE' in env.unwrapped.get_action_meanings():
+    if "FIRE" in env.unwrapped.get_action_meanings():
         env = FireResetEnv(env)
     env = WarpFrame(env, dim)
     # env = ScaledFloatFrame(env)  # TODO: use for dqn?
diff --git a/python/ray/rllib/evaluation/metrics.py b/python/ray/rllib/evaluation/metrics.py
index 6b1c766c4..aea321b74 100644
--- a/python/ray/rllib/evaluation/metrics.py
+++ b/python/ray/rllib/evaluation/metrics.py
@@ -114,8 +114,8 @@ def summarize_episodes(episodes, new_episodes, num_dropped):
         min_reward = min(episode_rewards)
         max_reward = max(episode_rewards)
     else:
-        min_reward = float('nan')
-        max_reward = float('nan')
+        min_reward = float("nan")
+        max_reward = float("nan")
     avg_reward = np.mean(episode_rewards)
     avg_length = np.mean(episode_lengths)
 
diff --git a/python/ray/rllib/examples/cartpole_lstm.py b/python/ray/rllib/examples/cartpole_lstm.py
index 39996e6cb..681647872 100644
--- a/python/ray/rllib/examples/cartpole_lstm.py
+++ b/python/ray/rllib/examples/cartpole_lstm.py
@@ -20,8 +20,8 @@ parser.add_argument("--run", type=str, default="PPO")
 
 class CartPoleStatelessEnv(gym.Env):
     metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second': 60
+        "render.modes": ["human", "rgb_array"],
+        "video.frames_per_second": 60
     }
 
     def __init__(self):
@@ -102,7 +102,7 @@ class CartPoleStatelessEnv(gym.Env):
         rv = np.r_[self.state[0], self.state[2]]
         return rv
 
-    def render(self, mode='human'):
+    def render(self, mode="human"):
         screen_width = 600
         screen_height = 400
 
@@ -149,7 +149,7 @@ class CartPoleStatelessEnv(gym.Env):
         self.carttrans.set_translation(cartx, carty)
         self.poletrans.set_rotation(-x[2])
 
-        return self.viewer.render(return_rgb_array=mode == 'rgb_array')
+        return self.viewer.render(return_rgb_array=mode == "rgb_array")
 
     def close(self):
         if self.viewer:
diff --git a/python/ray/rllib/optimizers/segment_tree.py b/python/ray/rllib/optimizers/segment_tree.py
index e09ed4723..64e8b29f7 100644
--- a/python/ray/rllib/optimizers/segment_tree.py
+++ b/python/ray/rllib/optimizers/segment_tree.py
@@ -138,7 +138,7 @@ class SumSegmentTree(SegmentTree):
 class MinSegmentTree(SegmentTree):
     def __init__(self, capacity):
         super(MinSegmentTree, self).__init__(
-            capacity=capacity, operation=min, neutral_element=float('inf'))
+            capacity=capacity, operation=min, neutral_element=float("inf"))
 
     def min(self, start=0, end=None):
         """Returns min(arr[start], ...,  arr[end])"""
diff --git a/python/ray/rllib/rollout.py b/python/ray/rllib/rollout.py
index af12e9c59..2bb25f5c4 100755
--- a/python/ray/rllib/rollout.py
+++ b/python/ray/rllib/rollout.py
@@ -86,7 +86,7 @@ def run(args, parser):
                 "Could not find params.pkl in either the checkpoint dir or "
                 "its parent directory.")
     else:
-        with open(config_path, 'rb') as f:
+        with open(config_path, "rb") as f:
             config = pickle.load(f)
     if "num_workers" in config:
         config["num_workers"] = min(2, config["num_workers"])
diff --git a/python/ray/rllib/setup-rllib-dev.py b/python/ray/rllib/setup-rllib-dev.py
index e31f80490..7495a74a3 100755
--- a/python/ray/rllib/setup-rllib-dev.py
+++ b/python/ray/rllib/setup-rllib-dev.py
@@ -40,7 +40,7 @@ if __name__ == "__main__":
         formatter_class=argparse.RawDescriptionHelpFormatter,
         description="Setup dev.")
     parser.add_argument(
-        "--yes", action='store_true', help="Don't ask for confirmation.")
+        "--yes", action="store_true", help="Don't ask for confirmation.")
     args = parser.parse_args()
 
     do_link("rllib", force=args.yes)
diff --git a/python/ray/rllib/train.py b/python/ray/rllib/train.py
index 539a857bc..6a5141699 100755
--- a/python/ray/rllib/train.py
+++ b/python/ray/rllib/train.py
@@ -79,7 +79,7 @@ def create_parser(parser_creator=None):
         "--env", default=None, type=str, help="The gym environment to use.")
     parser.add_argument(
         "--queue-trials",
-        action='store_true',
+        action="store_true",
         help=(
             "Whether to queue trials when the cluster does not currently have "
             "enough resources to launch one. This should be set to True when "
diff --git a/python/ray/rllib/utils/filter.py b/python/ray/rllib/utils/filter.py
index 6fd677131..b0b27706a 100644
--- a/python/ray/rllib/utils/filter.py
+++ b/python/ray/rllib/utils/filter.py
@@ -110,7 +110,7 @@ class RunningStat(object):
         self._S = S
 
     def __repr__(self):
-        return '(n={}, mean_mean={}, mean_std={})'.format(
+        return "(n={}, mean_mean={}, mean_std={})".format(
             self.n, np.mean(self.mean), np.mean(self.std))
 
     @property
@@ -234,7 +234,7 @@ class MeanStdFilter(Filter):
         return x
 
     def __repr__(self):
-        return 'MeanStdFilter({}, {}, {}, {}, {}, {})'.format(
+        return "MeanStdFilter({}, {}, {}, {}, {}, {})".format(
             self.shape, self.demean, self.destd, self.clip, self.rs,
             self.buffer)
 
@@ -268,7 +268,7 @@ class ConcurrentMeanStdFilter(MeanStdFilter):
         return other
 
     def __repr__(self):
-        return 'ConcurrentMeanStdFilter({}, {}, {}, {}, {}, {})'.format(
+        return "ConcurrentMeanStdFilter({}, {}, {}, {}, {}, {})".format(
             self.shape, self.demean, self.destd, self.clip, self.rs,
             self.buffer)
 
diff --git a/python/ray/rllib/utils/policy_server.py b/python/ray/rllib/utils/policy_server.py
index 04dafc8ac..ad8da0bf4 100644
--- a/python/ray/rllib/utils/policy_server.py
+++ b/python/ray/rllib/utils/policy_server.py
@@ -61,7 +61,7 @@ class PolicyServer(ThreadingMixIn, HTTPServer):
 def _make_handler(external_env):
     class Handler(SimpleHTTPRequestHandler):
         def do_POST(self):
-            content_len = int(self.headers.get('Content-Length'), 0)
+            content_len = int(self.headers.get("Content-Length"), 0)
             raw_body = self.rfile.read(content_len)
             parsed_input = pickle.loads(raw_body)
             try:
diff --git a/python/ray/tests/test_autoscaler.py b/python/ray/tests/test_autoscaler.py
index 98cda7e4a..6f0ac9f85 100644
--- a/python/ray/tests/test_autoscaler.py
+++ b/python/ray/tests/test_autoscaler.py
@@ -388,7 +388,7 @@ class AutoscalingTest(unittest.TestCase):
         rtc1.clear()
         autoscaler.update()
         # Synchronization: wait for launchy thread to be blocked on rtc1
-        if hasattr(rtc1, '_cond'):  # Python 3.5
+        if hasattr(rtc1, "_cond"):  # Python 3.5
             waiters = rtc1._cond._waiters
         else:  # Python 2.7
             waiters = rtc1._Event__cond._Condition__waiters
@@ -539,9 +539,9 @@ class AutoscalingTest(unittest.TestCase):
 
     def testReportsConfigFailures(self):
         config = copy.deepcopy(SMALL_CLUSTER)
-        config['provider']['type'] = 'external'
+        config["provider"]["type"] = "external"
         config = fillout_defaults(config)
-        config['provider']['type'] = 'mock'
+        config["provider"]["type"] = "mock"
         config_path = self.write_config(config)
         self.provider = MockProvider()
         runner = MockProcessRunner(fail_cmds=["cmd1"])
diff --git a/python/ray/tests/test_basic.py b/python/ray/tests/test_basic.py
index 065fee609..38799d8ad 100644
--- a/python/ray/tests/test_basic.py
+++ b/python/ray/tests/test_basic.py
@@ -2678,7 +2678,7 @@ def test_raylet_is_robust_to_random_messages(ray_start_regular):
     # Try to bring down the node manager:
     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     s.connect((node_manager_address, node_manager_port))
-    s.send(1000 * b'asdf')
+    s.send(1000 * b"asdf")
 
     @ray.remote
     def f():
diff --git a/python/ray/tests/test_credis.py b/python/ray/tests/test_credis.py
index 751a25a4f..a64b4020b 100644
--- a/python/ray/tests/test_credis.py
+++ b/python/ray/tests/test_credis.py
@@ -24,18 +24,18 @@ class CredisTest(unittest.TestCase):
 
     def test_credis_started(self):
         assert "redis_address" in self.config
-        primary = parse_client(self.config['redis_address'])
+        primary = parse_client(self.config["redis_address"])
         assert primary.ping() is True
-        member = primary.lrange('RedisShards', 0, -1)[0]
+        member = primary.lrange("RedisShards", 0, -1)[0]
         shard = parse_client(member.decode())
 
-        # Check that primary has loaded credis' master module.
-        chain = primary.execute_command('MASTER.GET_CHAIN')
+        # Check that primary has loaded credis's master module.
+        chain = primary.execute_command("MASTER.GET_CHAIN")
         assert len(chain) == 1
 
         # Check that the shard has loaded credis' member module.
         assert chain[0] == member
-        assert shard.execute_command('MEMBER.SN') == -1
+        assert shard.execute_command("MEMBER.SN") == -1
 
 
 if __name__ == "__main__":
diff --git a/python/ray/tests/test_failure.py b/python/ray/tests/test_failure.py
index c6739a4fe..8fb58e576 100644
--- a/python/ray/tests/test_failure.py
+++ b/python/ray/tests/test_failure.py
@@ -702,7 +702,7 @@ def test_connect_with_disconnected_node(shutdown_only):
     # This node is killed by SIGTERM, ray_monitor will not mark it again.
     removing_node = cluster.add_node(num_cpus=0, _internal_config=config)
     cluster.remove_node(removing_node, allow_graceful=True)
-    with pytest.raises(Exception, match=('Timing out of wait.')):
+    with pytest.raises(Exception, match=("Timing out of wait.")):
         wait_for_errors(ray_constants.REMOVED_NODE_ERROR, 3, timeout=2)
     # There is no connection error to a dead node.
     info = relevant_errors(ray_constants.RAYLET_CONNECTION_ERROR)
diff --git a/python/ray/tests/test_monitors.py b/python/ray/tests/test_monitors.py
index 486d8ca02..d588732c1 100644
--- a/python/ray/tests/test_monitors.py
+++ b/python/ray/tests/test_monitors.py
@@ -79,7 +79,7 @@ def _test_cleanup_on_driver_exit(num_redis_shards):
 
         ray.shutdown()
 
-    success = multiprocessing.Value('b', False)
+    success = multiprocessing.Value("b", False)
     driver = multiprocessing.Process(target=Driver, args=(success, ))
     driver.start()
     # Wait for client to exit.
diff --git a/python/ray/tests/utils.py b/python/ray/tests/utils.py
index 3485b2638..22146e89f 100644
--- a/python/ray/tests/utils.py
+++ b/python/ray/tests/utils.py
@@ -41,7 +41,7 @@ def run_and_get_output(command):
         p = subprocess.Popen(command, stdout=tmp, stderr=tmp)
         if p.wait() != 0:
             raise RuntimeError("ray start did not terminate properly")
-        with open(tmp.name, 'r') as f:
+        with open(tmp.name, "r") as f:
             result = f.readlines()
             return "\n".join(result)
 
diff --git a/python/ray/tune/automl/search_policy.py b/python/ray/tune/automl/search_policy.py
index e2fcb2116..a35ccb462 100644
--- a/python/ray/tune/automl/search_policy.py
+++ b/python/ray/tune/automl/search_policy.py
@@ -84,11 +84,11 @@ class AutoMLSearcher(SearchAlgorithm):
 
         for exp in self.experiment_list:
             for param_config, extra_arg in zip(raw_param_list, extra_arg_list):
-                tag = ''
+                tag = ""
                 new_spec = copy.deepcopy(exp.spec)
                 for path, value in param_config.items():
-                    tag += '%s=%s-' % (path.split('.')[-1], value)
-                    deep_insert(path.split('.'), value, new_spec['config'])
+                    tag += "%s=%s-" % (path.split(".")[-1], value)
+                    deep_insert(path.split("."), value, new_spec["config"])
 
                 trial = create_trial_from_spec(
                     new_spec, exp.name, self._parser, experiment_tag=tag)
diff --git a/python/ray/tune/automl/search_space.py b/python/ray/tune/automl/search_space.py
index 8de1906f6..65db657e8 100644
--- a/python/ray/tune/automl/search_space.py
+++ b/python/ray/tune/automl/search_space.py
@@ -67,7 +67,7 @@ class ContinuousSpace(ParameterSpace):
     certain distribution such as linear.
     """
 
-    LINEAR = 'linear'
+    LINEAR = "linear"
 
     # TODO: logspace
 
diff --git a/python/ray/tune/automlboard/backend/collector.py b/python/ray/tune/automlboard/backend/collector.py
index d382cd629..5566f4799 100644
--- a/python/ray/tune/automlboard/backend/collector.py
+++ b/python/ray/tune/automlboard/backend/collector.py
@@ -63,9 +63,9 @@ class CollectorService(object):
         """Initialize logger settings."""
         logger = logging.getLogger("AutoMLBoard")
         handler = logging.StreamHandler()
-        formatter = logging.Formatter('[%(levelname)s %(asctime)s] '
-                                      '%(filename)s: %(lineno)d  '
-                                      '%(message)s')
+        formatter = logging.Formatter("[%(levelname)s %(asctime)s] "
+                                      "%(filename)s: %(lineno)d  "
+                                      "%(message)s")
         handler.setFormatter(formatter)
         logger.setLevel(log_level)
         logger.addHandler(handler)
@@ -294,7 +294,7 @@ class Collector(Thread):
         meta = parse_json(meta_file)
 
         if not meta:
-            job_name = job_dir.split('/')[-1]
+            job_name = job_dir.split("/")[-1]
             user = os.environ.get("USER", None)
             meta = {
                 "job_id": job_name,
@@ -325,7 +325,7 @@ class Collector(Thread):
         meta = parse_json(meta_file)
 
         if not meta:
-            job_id = expr_dir.split('/')[-2]
+            job_id = expr_dir.split("/")[-2]
             trial_id = expr_dir[-8:]
             params = parse_json(os.path.join(expr_dir, EXPR_PARARM_FILE))
             meta = {
diff --git a/python/ray/tune/automlboard/common/utils.py b/python/ray/tune/automlboard/common/utils.py
index 9de1118ac..551afbfe3 100644
--- a/python/ray/tune/automlboard/common/utils.py
+++ b/python/ray/tune/automlboard/common/utils.py
@@ -19,9 +19,9 @@ def dump_json(json_info, json_file, overwrite=True):
         overwrite(boolean)
     """
     if overwrite:
-        mode = 'w'
+        mode = "w"
     else:
-        mode = 'w+'
+        mode = "w+"
 
     try:
         with open(json_file, mode) as f:
@@ -45,7 +45,7 @@ def parse_json(json_file):
         return None
 
     try:
-        with open(json_file, 'r') as f:
+        with open(json_file, "r") as f:
             info_str = f.readlines()
             info_str = "".join(info_str)
             json_info = json.loads(info_str)
@@ -76,11 +76,11 @@ def parse_multiple_json(json_file, offset=None):
         return json_info_list
 
     try:
-        with open(json_file, 'r') as f:
+        with open(json_file, "r") as f:
             if offset:
                 f.seek(offset)
             for line in f:
-                if line[-1] != '\n':
+                if line[-1] != "\n":
                     # Incomplete line
                     break
                 json_info = json.loads(line)
@@ -94,7 +94,7 @@ def parse_multiple_json(json_file, offset=None):
 
 def timestamp2date(timestamp):
     """Convert a timestamp to date."""
-    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))
+    return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp))
 
 
 def unicode2str(content):
@@ -109,4 +109,4 @@ def unicode2str(content):
     elif isinstance(content, int) or isinstance(content, float):
         return content
     else:
-        return content.encode('utf-8')
+        return content.encode("utf-8")
diff --git a/python/ray/tune/automlboard/frontend/query.py b/python/ray/tune/automlboard/frontend/query.py
index 75b9fa431..75ea0cc94 100644
--- a/python/ray/tune/automlboard/frontend/query.py
+++ b/python/ray/tune/automlboard/frontend/query.py
@@ -36,7 +36,7 @@ def query_job(request):
         "success_trials": 4
     }
     """
-    job_id = request.GET.get('job_id')
+    job_id = request.GET.get("job_id")
     jobs = JobRecord.objects.filter(job_id=job_id)
     trials = TrialRecord.objects.filter(job_id=job_id)
 
@@ -68,7 +68,7 @@ def query_job(request):
             "progress": progress
         }
         resp = json.dumps(result)
-    return HttpResponse(resp, content_type='application/json;charset=utf-8')
+    return HttpResponse(resp, content_type="application/json;charset=utf-8")
 
 
 def query_trial(request):
@@ -90,10 +90,10 @@ def query_trial(request):
         "trial_id": "2067R2ZD",
     }
     """
-    trial_id = request.GET.get('trial_id')
+    trial_id = request.GET.get("trial_id")
     trials = TrialRecord.objects \
         .filter(trial_id=trial_id) \
-        .order_by('-start_time')
+        .order_by("-start_time")
     if len(trials) == 0:
         resp = "Unkonwn trial id %s.\n" % trials
     else:
@@ -107,4 +107,4 @@ def query_trial(request):
             "params": trial.params
         }
         resp = json.dumps(result)
-    return HttpResponse(resp, content_type='application/json;charset=utf-8')
+    return HttpResponse(resp, content_type="application/json;charset=utf-8")
diff --git a/python/ray/tune/automlboard/frontend/urls.py b/python/ray/tune/automlboard/frontend/urls.py
index 672176f66..118fb77ce 100644
--- a/python/ray/tune/automlboard/frontend/urls.py
+++ b/python/ray/tune/automlboard/frontend/urls.py
@@ -29,10 +29,10 @@ import ray.tune.automlboard.frontend.view as view
 import ray.tune.automlboard.frontend.query as query
 
 urlpatterns = [
-    url(r'^admin/', admin.site.urls),
-    url(r'^$', view.index),
-    url(r'^job$', view.job),
-    url(r'^trial$', view.trial),
-    url(r'^query_job', query.query_job),
-    url(r'^query_trial', query.query_trial)
+    url(r"^admin/", admin.site.urls),
+    url(r"^$", view.index),
+    url(r"^job$", view.job),
+    url(r"^trial$", view.trial),
+    url(r"^query_job", query.query_job),
+    url(r"^query_trial", query.query_trial)
 ]
diff --git a/python/ray/tune/automlboard/frontend/view.py b/python/ray/tune/automlboard/frontend/view.py
index e3ff4636a..4101c0212 100644
--- a/python/ray/tune/automlboard/frontend/view.py
+++ b/python/ray/tune/automlboard/frontend/view.py
@@ -16,8 +16,8 @@ import datetime
 
 def index(request):
     """View for the home page."""
-    recent_jobs = JobRecord.objects.order_by('-start_time')[0:100]
-    recent_trials = TrialRecord.objects.order_by('-start_time')[0:500]
+    recent_jobs = JobRecord.objects.order_by("-start_time")[0:100]
+    recent_trials = TrialRecord.objects.order_by("-start_time")[0:500]
 
     total_num = len(recent_trials)
     running_num = sum(t.trial_status == Trial.RUNNING for t in recent_trials)
@@ -29,31 +29,31 @@ def index(request):
     for recent_job in recent_jobs:
         job_records.append(get_job_info(recent_job))
     context = {
-        'log_dir': AUTOMLBOARD_LOG_DIR,
-        'reload_interval': AUTOMLBOARD_RELOAD_INTERVAL,
-        'recent_jobs': job_records,
-        'job_num': len(job_records),
-        'trial_num': total_num,
-        'running_num': running_num,
-        'success_num': success_num,
-        'failed_num': failed_num
+        "log_dir": AUTOMLBOARD_LOG_DIR,
+        "reload_interval": AUTOMLBOARD_RELOAD_INTERVAL,
+        "recent_jobs": job_records,
+        "job_num": len(job_records),
+        "trial_num": total_num,
+        "running_num": running_num,
+        "success_num": success_num,
+        "failed_num": failed_num
     }
-    return render(request, 'index.html', context)
+    return render(request, "index.html", context)
 
 
 def job(request):
     """View for a single job."""
-    job_id = request.GET.get('job_id')
-    recent_jobs = JobRecord.objects.order_by('-start_time')[0:100]
+    job_id = request.GET.get("job_id")
+    recent_jobs = JobRecord.objects.order_by("-start_time")[0:100]
     recent_trials = TrialRecord.objects \
         .filter(job_id=job_id) \
-        .order_by('-start_time')
+        .order_by("-start_time")
     trial_records = []
     for recent_trial in recent_trials:
         trial_records.append(get_trial_info(recent_trial))
     current_job = JobRecord.objects \
         .filter(job_id=job_id) \
-        .order_by('-start_time')[0]
+        .order_by("-start_time")[0]
 
     if len(trial_records) > 0:
         param_keys = trial_records[0]["params"].keys()
@@ -63,38 +63,38 @@ def job(request):
     # TODO: support custom metrics here
     metric_keys = ["episode_reward", "accuracy", "loss"]
     context = {
-        'current_job': get_job_info(current_job),
-        'recent_jobs': recent_jobs,
-        'recent_trials': trial_records,
-        'param_keys': param_keys,
-        'param_num': len(param_keys),
-        'metric_keys': metric_keys,
-        'metric_num': len(metric_keys)
+        "current_job": get_job_info(current_job),
+        "recent_jobs": recent_jobs,
+        "recent_trials": trial_records,
+        "param_keys": param_keys,
+        "param_num": len(param_keys),
+        "metric_keys": metric_keys,
+        "metric_num": len(metric_keys)
     }
-    return render(request, 'job.html', context)
+    return render(request, "job.html", context)
 
 
 def trial(request):
     """View for a single trial."""
-    job_id = request.GET.get('job_id')
-    trial_id = request.GET.get('trial_id')
+    job_id = request.GET.get("job_id")
+    trial_id = request.GET.get("trial_id")
     recent_trials = TrialRecord.objects \
         .filter(job_id=job_id) \
-        .order_by('-start_time')
+        .order_by("-start_time")
     recent_results = ResultRecord.objects \
         .filter(trial_id=trial_id) \
-        .order_by('-date')[0:2000]
+        .order_by("-date")[0:2000]
     current_trial = TrialRecord.objects \
         .filter(trial_id=trial_id) \
-        .order_by('-start_time')[0]
+        .order_by("-start_time")[0]
     context = {
-        'job_id': job_id,
-        'trial_id': trial_id,
-        'current_trial': current_trial,
-        'recent_results': recent_results,
-        'recent_trials': recent_trials
+        "job_id": job_id,
+        "trial_id": trial_id,
+        "current_trial": current_trial,
+        "recent_results": recent_results,
+        "recent_trials": recent_trials
     }
-    return render(request, 'trial.html', context)
+    return render(request, "trial.html", context)
 
 
 def get_job_info(current_job):
@@ -133,7 +133,7 @@ def get_job_info(current_job):
 
 def get_trial_info(current_trial):
     """Get job information for current trial."""
-    if current_trial.end_time and ('_' in current_trial.end_time):
+    if current_trial.end_time and ("_" in current_trial.end_time):
         # end time is parsed from result.json and the format
         # is like: yyyy-mm-dd_hh-MM-ss, which will be converted
         # to yyyy-mm-dd hh:MM:ss here
@@ -170,7 +170,7 @@ def get_winner(trials):
         first_metrics = get_trial_info(trials[0])["metrics"]
         if first_metrics and not first_metrics.get("accuracy", None):
             sort_key = "episode_reward"
-        max_metric = float('-Inf')
+        max_metric = float("-Inf")
         for t in trials:
             metrics = get_trial_info(t).get("metrics", None)
             if metrics and metrics.get(sort_key, None):
diff --git a/python/ray/tune/automlboard/models/apps.py b/python/ray/tune/automlboard/models/apps.py
index 7535e3250..d4c16ce41 100644
--- a/python/ray/tune/automlboard/models/apps.py
+++ b/python/ray/tune/automlboard/models/apps.py
@@ -9,4 +9,4 @@ from django.apps import AppConfig
 class ModelConfig(AppConfig):
     """Model Congig for models."""
 
-    name = 'ray.tune.automlboard.models'
+    name = "ray.tune.automlboard.models"
diff --git a/python/ray/tune/automlboard/run.py b/python/ray/tune/automlboard/run.py
index b48ca9af9..5304a56ef 100644
--- a/python/ray/tune/automlboard/run.py
+++ b/python/ray/tune/automlboard/run.py
@@ -37,8 +37,8 @@ def run_board(args):
     # frontend service
     logger.info("Try to start automlboard on port %s\n" % args.port)
     command = [
-        os.path.join(root_path, 'manage.py'), 'runserver',
-        '0.0.0.0:%s' % args.port, '--noreload'
+        os.path.join(root_path, "manage.py"), "runserver",
+        "0.0.0.0:%s" % args.port, "--noreload"
     ]
     execute_from_command_line(command)
 
@@ -76,7 +76,7 @@ def init_config(args):
     os.environ.setdefault("DJANGO_SETTINGS_MODULE",
                           "ray.tune.automlboard.settings")
     django.setup()
-    command = [os.path.join(root_path, 'manage.py'), 'migrate', '--run-syncdb']
+    command = [os.path.join(root_path, "manage.py"), "migrate", "--run-syncdb"]
     execute_from_command_line(command)
 
 
diff --git a/python/ray/tune/automlboard/settings.py b/python/ray/tune/automlboard/settings.py
index 378f9b418..2bd940fc9 100644
--- a/python/ray/tune/automlboard/settings.py
+++ b/python/ray/tune/automlboard/settings.py
@@ -21,54 +21,54 @@ import os
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 
 # You can specify your own secret key, here we just pick one randomly.
-SECRET_KEY = 'tktks103=$7a#5axn)52&b87!#w_qm(%*72^@hsq!nur%dtk4b'
+SECRET_KEY = "tktks103=$7a#5axn)52&b87!#w_qm(%*72^@hsq!nur%dtk4b"
 
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = True
 
-ALLOWED_HOSTS = ['*']
+ALLOWED_HOSTS = ["*"]
 
 # Application definition
 
 INSTALLED_APPS = [
-    'django.contrib.admin',
-    'django.contrib.auth',
-    'django.contrib.contenttypes',
-    'django.contrib.sessions',
-    'django.contrib.messages',
-    'django.contrib.staticfiles',
-    'ray.tune.automlboard.models',
+    "django.contrib.admin",
+    "django.contrib.auth",
+    "django.contrib.contenttypes",
+    "django.contrib.sessions",
+    "django.contrib.messages",
+    "django.contrib.staticfiles",
+    "ray.tune.automlboard.models",
 ]
 
 MIDDLEWARE = [
-    'django.middleware.security.SecurityMiddleware',
-    'django.contrib.sessions.middleware.SessionMiddleware',
-    'django.middleware.common.CommonMiddleware',
-    'django.middleware.csrf.CsrfViewMiddleware',
-    'django.contrib.auth.middleware.AuthenticationMiddleware',
-    'django.contrib.messages.middleware.MessageMiddleware',
-    'django.middleware.clickjacking.XFrameOptionsMiddleware',
+    "django.middleware.security.SecurityMiddleware",
+    "django.contrib.sessions.middleware.SessionMiddleware",
+    "django.middleware.common.CommonMiddleware",
+    "django.middleware.csrf.CsrfViewMiddleware",
+    "django.contrib.auth.middleware.AuthenticationMiddleware",
+    "django.contrib.messages.middleware.MessageMiddleware",
+    "django.middleware.clickjacking.XFrameOptionsMiddleware",
 ]
 
-ROOT_URLCONF = 'ray.tune.automlboard.frontend.urls'
+ROOT_URLCONF = "ray.tune.automlboard.frontend.urls"
 
 TEMPLATES = [
     {
-        'BACKEND': 'django.template.backends.django.DjangoTemplates',
-        'DIRS': [BASE_DIR + "/templates"],
-        'APP_DIRS': True,
-        'OPTIONS': {
-            'context_processors': [
-                'django.template.context_processors.debug',
-                'django.template.context_processors.request',
-                'django.contrib.auth.context_processors.auth',
-                'django.contrib.messages.context_processors.messages',
+        "BACKEND": "django.template.backends.django.DjangoTemplates",
+        "DIRS": [BASE_DIR + "/templates"],
+        "APP_DIRS": True,
+        "OPTIONS": {
+            "context_processors": [
+                "django.template.context_processors.debug",
+                "django.template.context_processors.request",
+                "django.contrib.auth.context_processors.auth",
+                "django.contrib.messages.context_processors.messages",
             ],
         },
     },
 ]
 
-WSGI_APPLICATION = 'ray.tune.automlboard.frontend.wsgi.application'
+WSGI_APPLICATION = "ray.tune.automlboard.frontend.wsgi.application"
 
 DB_ENGINE_NAME_MAP = {
     "mysql": "django.db.backends.mysql",
@@ -85,17 +85,17 @@ def lookup_db_engine(name):
 # https://docs.djangoproject.com/en/1.11/ref/settings/#databases
 if not os.environ.get("AUTOMLBOARD_DB_ENGINE", None):
     DATABASES = {
-        'default': {
-            'ENGINE': 'django.db.backends.sqlite3',
-            'NAME': 'automlboard.db',
+        "default": {
+            "ENGINE": "django.db.backends.sqlite3",
+            "NAME": "automlboard.db",
         }
     }
 else:
     DATABASES = {
-        'default': {
-            'ENGINE': lookup_db_engine(os.environ["AUTOMLBOARD_DB_ENGINE"]),
-            'NAME': os.environ["AUTOMLBOARD_DB_NAME"],
-            'USER': os.environ["AUTOMLBOARD_DB_USER"],
+        "default": {
+            "ENGINE": lookup_db_engine(os.environ["AUTOMLBOARD_DB_ENGINE"]),
+            "NAME": os.environ["AUTOMLBOARD_DB_NAME"],
+            "USER": os.environ["AUTOMLBOARD_DB_USER"],
             "PASSWORD": os.environ["AUTOMLBOARD_DB_PASSWORD"],
             "HOST": os.environ["AUTOMLBOARD_DB_HOST"],
             "PORT": os.environ["AUTOMLBOARD_DB_PORT"]
@@ -109,25 +109,25 @@ VALIDATION_PREFIX = "django.contrib.auth.password_validation."
 
 AUTH_PASSWORD_VALIDATORS = [
     {
-        'NAME': VALIDATION_PREFIX + "UserAttributeSimilarityValidator",
+        "NAME": VALIDATION_PREFIX + "UserAttributeSimilarityValidator",
     },
     {
-        'NAME': VALIDATION_PREFIX + "MinimumLengthValidator",
+        "NAME": VALIDATION_PREFIX + "MinimumLengthValidator",
     },
     {
-        'NAME': VALIDATION_PREFIX + "CommonPasswordValidator",
+        "NAME": VALIDATION_PREFIX + "CommonPasswordValidator",
     },
     {
-        'NAME': VALIDATION_PREFIX + "NumericPasswordValidator",
+        "NAME": VALIDATION_PREFIX + "NumericPasswordValidator",
     },
 ]
 
 # Internationalization
 # https://docs.djangoproject.com/en/1.11/topics/i18n/
 
-LANGUAGE_CODE = 'en-us'
+LANGUAGE_CODE = "en-us"
 
-TIME_ZONE = 'Asia/Shanghai'
+TIME_ZONE = "Asia/Shanghai"
 
 USE_I18N = True
 
@@ -138,8 +138,8 @@ USE_TZ = False
 # Static files (CSS, JavaScript, Images)
 # https://docs.djangoproject.com/en/1.11/howto/static-files/
 
-STATIC_URL = '/static/'
-STATICFILES_DIRS = (os.path.join(BASE_DIR, 'static').replace('\\', '/'), )
+STATIC_URL = "/static/"
+STATICFILES_DIRS = (os.path.join(BASE_DIR, "static").replace("\\", "/"), )
 
 # automlboard settings
 AUTOMLBOARD_LOG_DIR = os.environ.get("AUTOMLBOARD_LOGDIR", None)
diff --git a/python/ray/tune/commands.py b/python/ray/tune/commands.py
index e272eb52e..bf3651c44 100644
--- a/python/ray/tune/commands.py
+++ b/python/ray/tune/commands.py
@@ -53,12 +53,12 @@ except subprocess.CalledProcessError:
     TERM_HEIGHT, TERM_WIDTH = 100, 100
 
 OPERATORS = {
-    '<': operator.lt,
-    '<=': operator.le,
-    '==': operator.eq,
-    '!=': operator.ne,
-    '>=': operator.ge,
-    '>': operator.gt,
+    "<": operator.lt,
+    "<=": operator.le,
+    "==": operator.eq,
+    "!=": operator.ne,
+    ">=": operator.ge,
+    ">": operator.gt,
 }
 
 
@@ -89,7 +89,7 @@ def print_format_output(dataframe):
 
         print_df[col] = dataframe[col]
         test_table = tabulate(print_df, headers="keys", tablefmt="psql")
-        if str(test_table).index('\n') > TERM_WIDTH:
+        if str(test_table).index("\n") > TERM_WIDTH:
             # Drop all columns beyond terminal width
             print_df.drop(col, axis=1, inplace=True)
             dropped_cols += list(dataframe.columns)[i:]
@@ -172,10 +172,10 @@ def list_trials(experiment_path,
     if "logdir" in checkpoints_df:
         # logdir often too verbose to view in table, so drop experiment_path
         checkpoints_df["logdir"] = checkpoints_df["logdir"].str.replace(
-            experiment_path, '')
+            experiment_path, "")
 
     if filter_op:
-        col, op, val = filter_op.split(' ')
+        col, op, val = filter_op.split(" ")
         col_type = checkpoints_df[col].dtype
         if is_numeric_dtype(col_type):
             val = float(val)
@@ -183,7 +183,7 @@ def list_trials(experiment_path,
             val = str(val)
         # TODO(Andrew): add support for datetime and boolean
         else:
-            raise ValueError("Unsupported dtype for '{}': {}".format(
+            raise ValueError("Unsupported dtype for \"{}\": {}".format(
                 val, col_type))
         op = OPERATORS[op]
         filtered_index = op(checkpoints_df[col], val)
@@ -191,7 +191,7 @@ def list_trials(experiment_path,
 
     if sort:
         if sort not in checkpoints_df:
-            raise KeyError("Sort Index '{}' not in: {}".format(
+            raise KeyError("Sort Index \"{}\" not in: {}".format(
                 sort, list(checkpoints_df)))
         checkpoints_df = checkpoints_df.sort_values(by=sort)
 
@@ -276,7 +276,7 @@ def list_experiments(project_path,
     info_df = info_df[col_keys]
 
     if filter_op:
-        col, op, val = filter_op.split(' ')
+        col, op, val = filter_op.split(" ")
         col_type = info_df[col].dtype
         if is_numeric_dtype(col_type):
             val = float(val)
@@ -284,7 +284,7 @@ def list_experiments(project_path,
             val = str(val)
         # TODO(Andrew): add support for datetime and boolean
         else:
-            raise ValueError("Unsupported dtype for '{}': {}".format(
+            raise ValueError("Unsupported dtype for \"{}\": {}".format(
                 val, col_type))
         op = OPERATORS[op]
         filtered_index = op(info_df[col], val)
@@ -292,7 +292,7 @@ def list_experiments(project_path,
 
     if sort:
         if sort not in info_df:
-            raise KeyError("Sort Index '{}' not in: {}".format(
+            raise KeyError("Sort Index \"{}\" not in: {}".format(
                 sort, list(info_df)))
         info_df = info_df.sort_values(by=sort)
 
diff --git a/python/ray/tune/examples/bayesopt_example.py b/python/ray/tune/examples/bayesopt_example.py
index f5019e804..3b1e8342c 100644
--- a/python/ray/tune/examples/bayesopt_example.py
+++ b/python/ray/tune/examples/bayesopt_example.py
@@ -32,7 +32,7 @@ if __name__ == "__main__":
     args, _ = parser.parse_known_args()
     ray.init()
 
-    space = {'width': (0, 20), 'height': (-100, 100)}
+    space = {"width": (0, 20), "height": (-100, 100)}
 
     config = {
         "num_samples": 10 if args.smoke_test else 1000,
diff --git a/python/ray/tune/examples/genetic_example.py b/python/ray/tune/examples/genetic_example.py
index 09a292be7..12f80ebe7 100644
--- a/python/ray/tune/examples/genetic_example.py
+++ b/python/ray/tune/examples/genetic_example.py
@@ -17,7 +17,7 @@ def michalewicz_function(config, reporter):
     """f(x) = -sum{sin(xi) * [sin(i*xi^2 / pi)]^(2m)}"""
     import numpy as np
     x = np.array(
-        [config['x1'], config['x2'], config['x3'], config['x4'], config['x5']])
+        [config["x1"], config["x2"], config["x3"], config["x4"], config["x5"]])
     sin_x = np.sin(x)
     z = (np.arange(1, 6) / np.pi * (x * x))
     sin_z = np.power(np.sin(z), 20)  # let m = 20
@@ -37,11 +37,11 @@ if __name__ == "__main__":
     ray.init()
 
     space = SearchSpace({
-        ContinuousSpace('x1', 0, 4, 100),
-        ContinuousSpace('x2', -2, 2, 100),
-        ContinuousSpace('x3', 1, 5, 100),
-        ContinuousSpace('x4', -3, 3, 100),
-        DiscreteSpace('x5', [-1, 0, 1, 2, 3]),
+        ContinuousSpace("x1", 0, 4, 100),
+        ContinuousSpace("x2", -2, 2, 100),
+        ContinuousSpace("x3", 1, 5, 100),
+        ContinuousSpace("x4", -3, 3, 100),
+        DiscreteSpace("x5", [-1, 0, 1, 2, 3]),
     })
 
     config = {"stop": {"training_iteration": 100}}
diff --git a/python/ray/tune/examples/hyperopt_example.py b/python/ray/tune/examples/hyperopt_example.py
index 3264b4d84..870b13d59 100644
--- a/python/ray/tune/examples/hyperopt_example.py
+++ b/python/ray/tune/examples/hyperopt_example.py
@@ -36,9 +36,9 @@ if __name__ == "__main__":
     ray.init()
 
     space = {
-        'width': hp.uniform('width', 0, 20),
-        'height': hp.uniform('height', -100, 100),
-        'activation': hp.choice("activation", ["relu", "tanh"])
+        "width": hp.uniform("width", 0, 20),
+        "height": hp.uniform("height", -100, 100),
+        "activation": hp.choice("activation", ["relu", "tanh"])
     }
 
     current_best_params = [
diff --git a/python/ray/tune/examples/mnist_pytorch.py b/python/ray/tune/examples/mnist_pytorch.py
index 8e374c8c2..6a17ffa39 100644
--- a/python/ray/tune/examples/mnist_pytorch.py
+++ b/python/ray/tune/examples/mnist_pytorch.py
@@ -10,50 +10,50 @@ import torch.optim as optim
 from torchvision import datasets, transforms
 
 # Training settings
-parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
+parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
 parser.add_argument(
-    '--batch-size',
+    "--batch-size",
     type=int,
     default=64,
-    metavar='N',
-    help='input batch size for training (default: 64)')
+    metavar="N",
+    help="input batch size for training (default: 64)")
 parser.add_argument(
-    '--test-batch-size',
+    "--test-batch-size",
     type=int,
     default=1000,
-    metavar='N',
-    help='input batch size for testing (default: 1000)')
+    metavar="N",
+    help="input batch size for testing (default: 1000)")
 parser.add_argument(
-    '--epochs',
+    "--epochs",
     type=int,
     default=1,
-    metavar='N',
-    help='number of epochs to train (default: 1)')
+    metavar="N",
+    help="number of epochs to train (default: 1)")
 parser.add_argument(
-    '--lr',
+    "--lr",
     type=float,
     default=0.01,
-    metavar='LR',
-    help='learning rate (default: 0.01)')
+    metavar="LR",
+    help="learning rate (default: 0.01)")
 parser.add_argument(
-    '--momentum',
+    "--momentum",
     type=float,
     default=0.5,
-    metavar='M',
-    help='SGD momentum (default: 0.5)')
+    metavar="M",
+    help="SGD momentum (default: 0.5)")
 parser.add_argument(
-    '--no-cuda',
-    action='store_true',
+    "--no-cuda",
+    action="store_true",
     default=False,
-    help='disables CUDA training')
+    help="disables CUDA training")
 parser.add_argument(
-    '--seed',
+    "--seed",
     type=int,
     default=1,
-    metavar='S',
-    help='random seed (default: 1)')
+    metavar="S",
+    help="random seed (default: 1)")
 parser.add_argument(
-    '--smoke-test', action="store_true", help="Finish quickly for testing")
+    "--smoke-test", action="store_true", help="Finish quickly for testing")
 
 
 def train_mnist(args, config, reporter):
@@ -64,10 +64,10 @@ def train_mnist(args, config, reporter):
     if args.cuda:
         torch.cuda.manual_seed(args.seed)
 
-    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
+    kwargs = {"num_workers": 1, "pin_memory": True} if args.cuda else {}
     train_loader = torch.utils.data.DataLoader(
         datasets.MNIST(
-            '~/data',
+            "~/data",
             train=True,
             download=False,
             transform=transforms.Compose([
@@ -79,7 +79,7 @@ def train_mnist(args, config, reporter):
         **kwargs)
     test_loader = torch.utils.data.DataLoader(
         datasets.MNIST(
-            '~/data',
+            "~/data",
             train=False,
             transform=transforms.Compose([
                 transforms.ToTensor(),
@@ -135,7 +135,7 @@ def train_mnist(args, config, reporter):
                     data, target = data.cuda(), target.cuda()
                 output = model(data)
                 # sum up batch loss
-                test_loss += F.nll_loss(output, target, reduction='sum').item()
+                test_loss += F.nll_loss(output, target, reduction="sum").item()
                 # get the index of the max log-probability
                 pred = output.argmax(dim=1, keepdim=True)
                 correct += pred.eq(
@@ -151,7 +151,7 @@ def train_mnist(args, config, reporter):
 
 
 if __name__ == "__main__":
-    datasets.MNIST('~/data', train=True, download=True)
+    datasets.MNIST("~/data", train=True, download=True)
     args = parser.parse_args()
 
     import numpy as np
diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py
index f644dfd46..8a8715dd5 100644
--- a/python/ray/tune/examples/mnist_pytorch_trainable.py
+++ b/python/ray/tune/examples/mnist_pytorch_trainable.py
@@ -13,50 +13,50 @@ from torchvision import datasets, transforms
 from ray.tune import Trainable
 
 # Training settings
-parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
+parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
 parser.add_argument(
-    '--batch-size',
+    "--batch-size",
     type=int,
     default=64,
-    metavar='N',
-    help='input batch size for training (default: 64)')
+    metavar="N",
+    help="input batch size for training (default: 64)")
 parser.add_argument(
-    '--test-batch-size',
+    "--test-batch-size",
     type=int,
     default=1000,
-    metavar='N',
-    help='input batch size for testing (default: 1000)')
+    metavar="N",
+    help="input batch size for testing (default: 1000)")
 parser.add_argument(
-    '--epochs',
+    "--epochs",
     type=int,
     default=1,
-    metavar='N',
-    help='number of epochs to train (default: 1)')
+    metavar="N",
+    help="number of epochs to train (default: 1)")
 parser.add_argument(
-    '--lr',
+    "--lr",
     type=float,
     default=0.01,
-    metavar='LR',
-    help='learning rate (default: 0.01)')
+    metavar="LR",
+    help="learning rate (default: 0.01)")
 parser.add_argument(
-    '--momentum',
+    "--momentum",
     type=float,
     default=0.5,
-    metavar='M',
-    help='SGD momentum (default: 0.5)')
+    metavar="M",
+    help="SGD momentum (default: 0.5)")
 parser.add_argument(
-    '--no-cuda',
-    action='store_true',
+    "--no-cuda",
+    action="store_true",
     default=False,
-    help='disables CUDA training')
+    help="disables CUDA training")
 parser.add_argument(
-    '--seed',
+    "--seed",
     type=int,
     default=1,
-    metavar='S',
-    help='random seed (default: 1)')
+    metavar="S",
+    help="random seed (default: 1)")
 parser.add_argument(
-    '--smoke-test', action="store_true", help="Finish quickly for testing")
+    "--smoke-test", action="store_true", help="Finish quickly for testing")
 
 
 class Net(nn.Module):
@@ -88,10 +88,10 @@ class TrainMNIST(Trainable):
         if args.cuda:
             torch.cuda.manual_seed(args.seed)
 
-        kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
+        kwargs = {"num_workers": 1, "pin_memory": True} if args.cuda else {}
         self.train_loader = torch.utils.data.DataLoader(
             datasets.MNIST(
-                '~/data',
+                "~/data",
                 train=True,
                 download=False,
                 transform=transforms.Compose([
@@ -103,7 +103,7 @@ class TrainMNIST(Trainable):
             **kwargs)
         self.test_loader = torch.utils.data.DataLoader(
             datasets.MNIST(
-                '~/data',
+                "~/data",
                 train=False,
                 transform=transforms.Compose([
                     transforms.ToTensor(),
@@ -142,7 +142,7 @@ class TrainMNIST(Trainable):
                     data, target = data.cuda(), target.cuda()
                 output = self.model(data)
                 # sum up batch loss
-                test_loss += F.nll_loss(output, target, reduction='sum').item()
+                test_loss += F.nll_loss(output, target, reduction="sum").item()
                 # get the index of the max log-probability
                 pred = output.argmax(dim=1, keepdim=True)
                 correct += pred.eq(
@@ -166,7 +166,7 @@ class TrainMNIST(Trainable):
 
 
 if __name__ == "__main__":
-    datasets.MNIST('~/data', train=True, download=True)
+    datasets.MNIST("~/data", train=True, download=True)
     args = parser.parse_args()
 
     import numpy as np
diff --git a/python/ray/tune/examples/sigopt_example.py b/python/ray/tune/examples/sigopt_example.py
index c84958875..fe2ef21f2 100644
--- a/python/ray/tune/examples/sigopt_example.py
+++ b/python/ray/tune/examples/sigopt_example.py
@@ -38,19 +38,19 @@ if __name__ == "__main__":
 
     space = [
         {
-            'name': 'width',
-            'type': 'int',
-            'bounds': {
-                'min': 0,
-                'max': 20
+            "name": "width",
+            "type": "int",
+            "bounds": {
+                "min": 0,
+                "max": 20
             },
         },
         {
-            'name': 'height',
-            'type': 'int',
-            'bounds': {
-                'min': -100,
-                'max': 100
+            "name": "height",
+            "type": "int",
+            "bounds": {
+                "min": -100,
+                "max": 100
             },
         },
     ]
diff --git a/python/ray/tune/examples/tune_mnist_async_hyperband.py b/python/ray/tune/examples/tune_mnist_async_hyperband.py
index b99e39f5d..1fc92614e 100755
--- a/python/ray/tune/examples/tune_mnist_async_hyperband.py
+++ b/python/ray/tune/examples/tune_mnist_async_hyperband.py
@@ -60,32 +60,32 @@ def deepnn(x):
     # Reshape to use within a convolutional neural net.
     # Last dimension is for "features" - there is only one here, since images
     # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
-    with tf.name_scope('reshape'):
+    with tf.name_scope("reshape"):
         x_image = tf.reshape(x, [-1, 28, 28, 1])
 
     # First convolutional layer - maps one grayscale image to 32 feature maps.
-    with tf.name_scope('conv1'):
+    with tf.name_scope("conv1"):
         W_conv1 = weight_variable([5, 5, 1, 32])
         b_conv1 = bias_variable([32])
         h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
 
     # Pooling layer - downsamples by 2X.
-    with tf.name_scope('pool1'):
+    with tf.name_scope("pool1"):
         h_pool1 = max_pool_2x2(h_conv1)
 
     # Second convolutional layer -- maps 32 feature maps to 64.
-    with tf.name_scope('conv2'):
+    with tf.name_scope("conv2"):
         W_conv2 = weight_variable([5, 5, 32, 64])
         b_conv2 = bias_variable([64])
         h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
 
     # Second pooling layer.
-    with tf.name_scope('pool2'):
+    with tf.name_scope("pool2"):
         h_pool2 = max_pool_2x2(h_conv2)
 
     # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
     # is down to 7x7x64 feature maps -- maps this to 1024 features.
-    with tf.name_scope('fc1'):
+    with tf.name_scope("fc1"):
         W_fc1 = weight_variable([7 * 7 * 64, 1024])
         b_fc1 = bias_variable([1024])
 
@@ -94,12 +94,12 @@ def deepnn(x):
 
     # Dropout - controls the complexity of the model, prevents co-adaptation of
     # features.
-    with tf.name_scope('dropout'):
+    with tf.name_scope("dropout"):
         keep_prob = tf.placeholder(tf.float32)
         h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
 
     # Map the 1024 features to 10 classes, one for each digit
-    with tf.name_scope('fc2'):
+    with tf.name_scope("fc2"):
         W_fc2 = weight_variable([1024, 10])
         b_fc2 = bias_variable([10])
 
@@ -109,13 +109,13 @@ def deepnn(x):
 
 def conv2d(x, W):
     """conv2d returns a 2d convolution layer with full stride."""
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
+    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
 
 
 def max_pool_2x2(x):
     """max_pool_2x2 downsamples a feature map by 2X."""
     return tf.nn.max_pool(
-        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
 
 
 def weight_variable(shape):
@@ -148,21 +148,21 @@ def main(_):
     # Build the graph for the deep net
     y_conv, keep_prob = deepnn(x)
 
-    with tf.name_scope('loss'):
+    with tf.name_scope("loss"):
         cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
             labels=y_, logits=y_conv)
     cross_entropy = tf.reduce_mean(cross_entropy)
 
-    with tf.name_scope('adam_optimizer'):
+    with tf.name_scope("adam_optimizer"):
         train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
 
-    with tf.name_scope('accuracy'):
+    with tf.name_scope("accuracy"):
         correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
         correct_prediction = tf.cast(correct_prediction, tf.float32)
     accuracy = tf.reduce_mean(correct_prediction)
 
     graph_location = tempfile.mkdtemp()
-    print('Saving graph to: %s' % graph_location)
+    print("Saving graph to: %s" % graph_location)
     train_writer = tf.summary.FileWriter(graph_location)
     train_writer.add_graph(tf.get_default_graph())
 
@@ -182,14 +182,14 @@ def main(_):
                     status_reporter(
                         timesteps_total=i, mean_accuracy=train_accuracy)
 
-                print('step %d, training accuracy %g' % (i, train_accuracy))
+                print("step %d, training accuracy %g" % (i, train_accuracy))
             train_step.run(feed_dict={
                 x: batch[0],
                 y_: batch[1],
                 keep_prob: 0.5
             })
 
-        print('test accuracy %g' % accuracy.eval(feed_dict={
+        print("test accuracy %g" % accuracy.eval(feed_dict={
             x: mnist.test.images,
             y_: mnist.test.labels,
             keep_prob: 1.0
@@ -197,16 +197,16 @@ def main(_):
 
 
 # !!! Entrypoint for ray.tune !!!
-def train(config={'activation': 'relu'}, reporter=None):
+def train(config={"activation": "relu"}, reporter=None):
     global FLAGS, status_reporter, activation_fn
     status_reporter = reporter
-    activation_fn = getattr(tf.nn, config['activation'])
+    activation_fn = getattr(tf.nn, config["activation"])
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--data_dir',
+        "--data_dir",
         type=str,
-        default='/tmp/tensorflow/mnist/input_data',
-        help='Directory for storing input data')
+        default="/tmp/tensorflow/mnist/input_data",
+        help="Directory for storing input data")
     FLAGS, unparsed = parser.parse_known_args()
     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
 
@@ -215,29 +215,29 @@ def train(config={'activation': 'relu'}, reporter=None):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--smoke-test', action='store_true', help='Finish quickly for testing')
+        "--smoke-test", action="store_true", help="Finish quickly for testing")
     args, _ = parser.parse_known_args()
 
     mnist_spec = {
-        'num_samples': 10,
-        'stop': {
-            'mean_accuracy': 0.99,
-            'timesteps_total': 600,
+        "num_samples": 10,
+        "stop": {
+            "mean_accuracy": 0.99,
+            "timesteps_total": 600,
         },
-        'config': {
-            'activation': grid_search(['relu', 'elu', 'tanh']),
+        "config": {
+            "activation": grid_search(["relu", "elu", "tanh"]),
         },
     }
 
     if args.smoke_test:
-        mnist_spec['stop']['training_iteration'] = 2
-        mnist_spec['num_samples'] = 1
+        mnist_spec["stop"]["training_iteration"] = 2
+        mnist_spec["num_samples"] = 1
 
     ray.init()
 
     from ray.tune.schedulers import AsyncHyperBandScheduler
     run(train,
-        name='tune_mnist_test',
+        name="tune_mnist_test",
         scheduler=AsyncHyperBandScheduler(
             time_attr="timesteps_total",
             reward_attr="mean_accuracy",
diff --git a/python/ray/tune/examples/tune_mnist_keras.py b/python/ray/tune/examples/tune_mnist_keras.py
index 44a9b7d63..485dd818d 100644
--- a/python/ray/tune/examples/tune_mnist_keras.py
+++ b/python/ray/tune/examples/tune_mnist_keras.py
@@ -50,7 +50,7 @@ def train_mnist(args, cfg, reporter):
     # the data, split between train and test sets
     (x_train, y_train), (x_test, y_test) = mnist.load_data()
 
-    if K.image_data_format() == 'channels_first':
+    if K.image_data_format() == "channels_first":
         x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
         x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
         input_shape = (1, img_rows, img_cols)
@@ -59,13 +59,13 @@ def train_mnist(args, cfg, reporter):
         x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
         input_shape = (img_rows, img_cols, 1)
 
-    x_train = x_train.astype('float32')
-    x_test = x_test.astype('float32')
+    x_train = x_train.astype("float32")
+    x_test = x_test.astype("float32")
     x_train /= 255
     x_test /= 255
-    print('x_train shape:', x_train.shape)
-    print(x_train.shape[0], 'train samples')
-    print(x_test.shape[0], 'test samples')
+    print("x_train shape:", x_train.shape)
+    print(x_train.shape[0], "train samples")
+    print(x_test.shape[0], "test samples")
 
     # convert class vectors to binary class matrices
     y_train = keras.utils.to_categorical(y_train, num_classes)
@@ -76,20 +76,20 @@ def train_mnist(args, cfg, reporter):
         Conv2D(
             32,
             kernel_size=(args.kernel1, args.kernel1),
-            activation='relu',
+            activation="relu",
             input_shape=input_shape))
-    model.add(Conv2D(64, (args.kernel2, args.kernel2), activation='relu'))
+    model.add(Conv2D(64, (args.kernel2, args.kernel2), activation="relu"))
     model.add(MaxPooling2D(pool_size=(args.poolsize, args.poolsize)))
     model.add(Dropout(args.dropout1))
     model.add(Flatten())
-    model.add(Dense(args.hidden, activation='relu'))
+    model.add(Dense(args.hidden, activation="relu"))
     model.add(Dropout(args.dropout2))
-    model.add(Dense(num_classes, activation='softmax'))
+    model.add(Dense(num_classes, activation="softmax"))
 
     model.compile(
         loss=keras.losses.categorical_crossentropy,
         optimizer=keras.optimizers.SGD(lr=args.lr, momentum=args.momentum),
-        metrics=['accuracy'])
+        metrics=["accuracy"])
 
     model.fit(
         x_train,
@@ -102,66 +102,66 @@ def train_mnist(args, cfg, reporter):
 
 
 def create_parser():
-    parser = argparse.ArgumentParser(description='Keras MNIST Example')
+    parser = argparse.ArgumentParser(description="Keras MNIST Example")
     parser.add_argument(
         "--smoke-test", action="store_true", help="Finish quickly for testing")
     parser.add_argument(
         "--use-gpu", action="store_true", help="Use GPU in training.")
     parser.add_argument(
-        '--jobs',
+        "--jobs",
         type=int,
         default=1,
-        help='number of jobs to run concurrently (default: 1)')
+        help="number of jobs to run concurrently (default: 1)")
     parser.add_argument(
-        '--threads',
+        "--threads",
         type=int,
         default=2,
-        help='threads used in operations (default: 2)')
+        help="threads used in operations (default: 2)")
     parser.add_argument(
-        '--steps',
+        "--steps",
         type=float,
         default=0.01,
-        metavar='LR',
-        help='learning rate (default: 0.01)')
+        metavar="LR",
+        help="learning rate (default: 0.01)")
     parser.add_argument(
-        '--lr',
+        "--lr",
         type=float,
         default=0.01,
-        metavar='LR',
-        help='learning rate (default: 0.01)')
+        metavar="LR",
+        help="learning rate (default: 0.01)")
     parser.add_argument(
-        '--momentum',
+        "--momentum",
         type=float,
         default=0.5,
-        metavar='M',
-        help='SGD momentum (default: 0.5)')
+        metavar="M",
+        help="SGD momentum (default: 0.5)")
     parser.add_argument(
-        '--kernel1',
+        "--kernel1",
         type=int,
         default=3,
-        help='Size of first kernel (default: 3)')
+        help="Size of first kernel (default: 3)")
     parser.add_argument(
-        '--kernel2',
+        "--kernel2",
         type=int,
         default=3,
-        help='Size of second kernel (default: 3)')
+        help="Size of second kernel (default: 3)")
     parser.add_argument(
-        '--poolsize', type=int, default=2, help='Size of Pooling (default: 2)')
+        "--poolsize", type=int, default=2, help="Size of Pooling (default: 2)")
     parser.add_argument(
-        '--dropout1',
+        "--dropout1",
         type=float,
         default=0.25,
-        help='Size of first kernel (default: 0.25)')
+        help="Size of first kernel (default: 0.25)")
     parser.add_argument(
-        '--hidden',
+        "--hidden",
         type=int,
         default=128,
-        help='Size of Hidden Layer (default: 128)')
+        help="Size of Hidden Layer (default: 128)")
     parser.add_argument(
-        '--dropout2',
+        "--dropout2",
         type=float,
         default=0.5,
-        help='Size of first kernel (default: 0.5)')
+        help="Size of first kernel (default: 0.5)")
     return parser
 
 
diff --git a/python/ray/tune/examples/tune_mnist_ray.py b/python/ray/tune/examples/tune_mnist_ray.py
index 74c61df36..3807cc0d2 100755
--- a/python/ray/tune/examples/tune_mnist_ray.py
+++ b/python/ray/tune/examples/tune_mnist_ray.py
@@ -62,32 +62,32 @@ def deepnn(x):
     # Reshape to use within a convolutional neural net.
     # Last dimension is for "features" - there is only one here, since images
     # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
-    with tf.name_scope('reshape'):
+    with tf.name_scope("reshape"):
         x_image = tf.reshape(x, [-1, 28, 28, 1])
 
     # First convolutional layer - maps one grayscale image to 32 feature maps.
-    with tf.name_scope('conv1'):
+    with tf.name_scope("conv1"):
         W_conv1 = weight_variable([5, 5, 1, 32])
         b_conv1 = bias_variable([32])
         h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
 
     # Pooling layer - downsamples by 2X.
-    with tf.name_scope('pool1'):
+    with tf.name_scope("pool1"):
         h_pool1 = max_pool_2x2(h_conv1)
 
     # Second convolutional layer -- maps 32 feature maps to 64.
-    with tf.name_scope('conv2'):
+    with tf.name_scope("conv2"):
         W_conv2 = weight_variable([5, 5, 32, 64])
         b_conv2 = bias_variable([64])
         h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
 
     # Second pooling layer.
-    with tf.name_scope('pool2'):
+    with tf.name_scope("pool2"):
         h_pool2 = max_pool_2x2(h_conv2)
 
     # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
     # is down to 7x7x64 feature maps -- maps this to 1024 features.
-    with tf.name_scope('fc1'):
+    with tf.name_scope("fc1"):
         W_fc1 = weight_variable([7 * 7 * 64, 1024])
         b_fc1 = bias_variable([1024])
 
@@ -96,12 +96,12 @@ def deepnn(x):
 
     # Dropout - controls the complexity of the model, prevents co-adaptation of
     # features.
-    with tf.name_scope('dropout'):
+    with tf.name_scope("dropout"):
         keep_prob = tf.placeholder(tf.float32)
         h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
 
     # Map the 1024 features to 10 classes, one for each digit
-    with tf.name_scope('fc2'):
+    with tf.name_scope("fc2"):
         W_fc2 = weight_variable([1024, 10])
         b_fc2 = bias_variable([10])
 
@@ -111,13 +111,13 @@ def deepnn(x):
 
 def conv2d(x, W):
     """conv2d returns a 2d convolution layer with full stride."""
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
+    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
 
 
 def max_pool_2x2(x):
     """max_pool_2x2 downsamples a feature map by 2X."""
     return tf.nn.max_pool(
-        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
 
 
 def weight_variable(shape):
@@ -150,21 +150,21 @@ def main(_):
     # Build the graph for the deep net
     y_conv, keep_prob = deepnn(x)
 
-    with tf.name_scope('loss'):
+    with tf.name_scope("loss"):
         cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
             labels=y_, logits=y_conv)
     cross_entropy = tf.reduce_mean(cross_entropy)
 
-    with tf.name_scope('adam_optimizer'):
+    with tf.name_scope("adam_optimizer"):
         train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
 
-    with tf.name_scope('accuracy'):
+    with tf.name_scope("accuracy"):
         correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
         correct_prediction = tf.cast(correct_prediction, tf.float32)
     accuracy = tf.reduce_mean(correct_prediction)
 
     graph_location = tempfile.mkdtemp()
-    print('Saving graph to: %s' % graph_location)
+    print("Saving graph to: %s" % graph_location)
     train_writer = tf.summary.FileWriter(graph_location)
     train_writer.add_graph(tf.get_default_graph())
 
@@ -184,14 +184,14 @@ def main(_):
                     status_reporter(
                         timesteps_total=i, mean_accuracy=train_accuracy)
 
-                print('step %d, training accuracy %g' % (i, train_accuracy))
+                print("step %d, training accuracy %g" % (i, train_accuracy))
             train_step.run(feed_dict={
                 x: batch[0],
                 y_: batch[1],
                 keep_prob: 0.5
             })
 
-        print('test accuracy %g' % accuracy.eval(feed_dict={
+        print("test accuracy %g" % accuracy.eval(feed_dict={
             x: mnist.test.images,
             y_: mnist.test.labels,
             keep_prob: 1.0
@@ -199,16 +199,16 @@ def main(_):
 
 
 # !!! Entrypoint for ray.tune !!!
-def train(config={'activation': 'relu'}, reporter=None):
+def train(config={"activation": "relu"}, reporter=None):
     global FLAGS, status_reporter, activation_fn
     status_reporter = reporter
-    activation_fn = getattr(tf.nn, config['activation'])
+    activation_fn = getattr(tf.nn, config["activation"])
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--data_dir',
+        "--data_dir",
         type=str,
-        default='/tmp/tensorflow/mnist/input_data',
-        help='Directory for storing input data')
+        default="/tmp/tensorflow/mnist/input_data",
+        help="Directory for storing input data")
     FLAGS, unparsed = parser.parse_known_args()
     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
 
@@ -217,25 +217,25 @@ def train(config={'activation': 'relu'}, reporter=None):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--smoke-test', action='store_true', help='Finish quickly for testing')
+        "--smoke-test", action="store_true", help="Finish quickly for testing")
     args, _ = parser.parse_known_args()
 
-    register_trainable('train_mnist', train)
+    register_trainable("train_mnist", train)
     mnist_spec = {
-        'stop': {
-            'mean_accuracy': 0.99,
-            'time_total_s': 600,
+        "stop": {
+            "mean_accuracy": 0.99,
+            "time_total_s": 600,
         },
-        'config': {
-            'activation': grid_search(['relu', 'elu', 'tanh']),
+        "config": {
+            "activation": grid_search(["relu", "elu", "tanh"]),
             # You can pass any serializable object as well
-            'foo': grid_search([np.array([1, 2]),
+            "foo": grid_search([np.array([1, 2]),
                                 np.array([2, 3])]),
         },
     }
 
     if args.smoke_test:
-        mnist_spec['stop']['training_iteration'] = 2
+        mnist_spec["stop"]["training_iteration"] = 2
 
     ray.init()
-    tune.run('train_mnist', name='tune_mnist_test', **mnist_spec)
+    tune.run("train_mnist", name="tune_mnist_test", **mnist_spec)
diff --git a/python/ray/tune/examples/tune_mnist_ray_hyperband.py b/python/ray/tune/examples/tune_mnist_ray_hyperband.py
index 69ebcdf87..9401fb38c 100755
--- a/python/ray/tune/examples/tune_mnist_ray_hyperband.py
+++ b/python/ray/tune/examples/tune_mnist_ray_hyperband.py
@@ -55,32 +55,32 @@ def setupCNN(x):
     # Reshape to use within a convolutional neural net.
     # Last dimension is for "features" - there is only one here, since images
     # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
-    with tf.name_scope('reshape'):
+    with tf.name_scope("reshape"):
         x_image = tf.reshape(x, [-1, 28, 28, 1])
 
     # First convolutional layer - maps one grayscale image to 32 feature maps.
-    with tf.name_scope('conv1'):
+    with tf.name_scope("conv1"):
         W_conv1 = weight_variable([5, 5, 1, 32])
         b_conv1 = bias_variable([32])
         h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1)
 
     # Pooling layer - downsamples by 2X.
-    with tf.name_scope('pool1'):
+    with tf.name_scope("pool1"):
         h_pool1 = max_pool_2x2(h_conv1)
 
     # Second convolutional layer -- maps 32 feature maps to 64.
-    with tf.name_scope('conv2'):
+    with tf.name_scope("conv2"):
         W_conv2 = weight_variable([5, 5, 32, 64])
         b_conv2 = bias_variable([64])
         h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2)
 
     # Second pooling layer.
-    with tf.name_scope('pool2'):
+    with tf.name_scope("pool2"):
         h_pool2 = max_pool_2x2(h_conv2)
 
     # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
     # is down to 7x7x64 feature maps -- maps this to 1024 features.
-    with tf.name_scope('fc1'):
+    with tf.name_scope("fc1"):
         W_fc1 = weight_variable([7 * 7 * 64, 1024])
         b_fc1 = bias_variable([1024])
 
@@ -89,12 +89,12 @@ def setupCNN(x):
 
     # Dropout - controls the complexity of the model, prevents co-adaptation of
     # features.
-    with tf.name_scope('dropout'):
+    with tf.name_scope("dropout"):
         keep_prob = tf.placeholder(tf.float32)
         h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
 
     # Map the 1024 features to 10 classes, one for each digit
-    with tf.name_scope('fc2'):
+    with tf.name_scope("fc2"):
         W_fc2 = weight_variable([1024, 10])
         b_fc2 = bias_variable([10])
 
@@ -104,13 +104,13 @@ def setupCNN(x):
 
 def conv2d(x, W):
     """conv2d returns a 2d convolution layer with full stride."""
-    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
+    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
 
 
 def max_pool_2x2(x):
     """max_pool_2x2 downsamples a feature map by 2X."""
     return tf.nn.max_pool(
-        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
 
 
 def weight_variable(shape):
@@ -148,23 +148,23 @@ class TrainMNIST(Trainable):
         self.x = tf.placeholder(tf.float32, [None, 784])
         self.y_ = tf.placeholder(tf.float32, [None, 10])
 
-        activation_fn = getattr(tf.nn, config['activation'])
+        activation_fn = getattr(tf.nn, config["activation"])
 
         # Build the graph for the deep net
         y_conv, self.keep_prob = setupCNN(self.x)
 
-        with tf.name_scope('loss'):
+        with tf.name_scope("loss"):
             cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                 labels=self.y_, logits=y_conv)
         cross_entropy = tf.reduce_mean(cross_entropy)
 
-        with tf.name_scope('adam_optimizer'):
+        with tf.name_scope("adam_optimizer"):
             train_step = tf.train.AdamOptimizer(
-                config['learning_rate']).minimize(cross_entropy)
+                config["learning_rate"]).minimize(cross_entropy)
 
         self.train_step = train_step
 
-        with tf.name_scope('accuracy'):
+        with tf.name_scope("accuracy"):
             correct_prediction = tf.equal(
                 tf.argmax(y_conv, 1), tf.argmax(self.y_, 1))
             correct_prediction = tf.cast(correct_prediction, tf.float32)
@@ -212,24 +212,24 @@ class TrainMNIST(Trainable):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--smoke-test', action='store_true', help='Finish quickly for testing')
+        "--smoke-test", action="store_true", help="Finish quickly for testing")
     args, _ = parser.parse_known_args()
     mnist_spec = {
-        'stop': {
-            'mean_accuracy': 0.99,
-            'time_total_s': 600,
+        "stop": {
+            "mean_accuracy": 0.99,
+            "time_total_s": 600,
         },
-        'config': {
-            'learning_rate': sample_from(
+        "config": {
+            "learning_rate": sample_from(
                 lambda spec: 10**np.random.uniform(-5, -3)),
-            'activation': grid_search(['relu', 'elu', 'tanh']),
+            "activation": grid_search(["relu", "elu", "tanh"]),
         },
         "num_samples": 10,
     }
 
     if args.smoke_test:
-        mnist_spec['stop']['training_iteration'] = 20
-        mnist_spec['num_samples'] = 2
+        mnist_spec["stop"]["training_iteration"] = 20
+        mnist_spec["num_samples"] = 2
 
     ray.init()
     hyperband = HyperBandScheduler(
@@ -237,6 +237,6 @@ if __name__ == "__main__":
 
     tune.run(
         TrainMNIST,
-        name='mnist_hyperband_test',
+        name="mnist_hyperband_test",
         scheduler=hyperband,
         **mnist_spec)
diff --git a/python/ray/tune/log_sync.py b/python/ray/tune/log_sync.py
index 9a6e7ea4d..a2662af6d 100644
--- a/python/ray/tune/log_sync.py
+++ b/python/ray/tune/log_sync.py
@@ -148,8 +148,8 @@ class _LogSyncer(object):
         if not distutils.spawn.find_executable("rsync"):
             logger.error("Log sync requires rsync to be installed.")
             return
-        source = '{}/'.format(self.local_dir)
-        target = '{}@{}:{}/'.format(ssh_user, self.worker_ip, self.local_dir)
+        source = "{}/".format(self.local_dir)
+        target = "{}@{}:{}/".format(ssh_user, self.worker_ip, self.local_dir)
         final_cmd = (("""rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """
                       """-o StrictHostKeyChecking=no" {} {}""").format(
                           quote(ssh_key), quote(source), quote(target)))
@@ -180,9 +180,9 @@ class _LogSyncer(object):
             if not distutils.spawn.find_executable("rsync"):
                 logger.error("Log sync requires rsync to be installed.")
                 return
-            source = '{}@{}:{}/'.format(ssh_user, self.worker_ip,
+            source = "{}@{}:{}/".format(ssh_user, self.worker_ip,
                                         self.local_dir)
-            target = '{}/'.format(self.local_dir)
+            target = "{}/".format(self.local_dir)
             worker_to_local_sync_cmd = ((
                 """rsync -savz -e "ssh -i {} -o ConnectTimeout=120s """
                 """-o StrictHostKeyChecking=no" {} {}""").format(
diff --git a/python/ray/tune/ray_trial_executor.py b/python/ray/tune/ray_trial_executor.py
index f0f32e6fb..7f10a9ee1 100644
--- a/python/ray/tune/ray_trial_executor.py
+++ b/python/ray/tune/ray_trial_executor.py
@@ -166,7 +166,7 @@ class RayTrialExecutor(TrialExecutor):
 
         try:
             trial.write_error_log(error_msg)
-            if hasattr(trial, 'runner') and trial.runner:
+            if hasattr(trial, "runner") and trial.runner:
                 if (not error and self._reuse_actors
                         and self._cached_actor is None):
                     logger.debug("Reusing actor for {}".format(trial.runner))
diff --git a/python/ray/tune/schedulers/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py
index 156b7d22d..55bf169f0 100644
--- a/python/ray/tune/schedulers/async_hyperband.py
+++ b/python/ray/tune/schedulers/async_hyperband.py
@@ -39,8 +39,8 @@ class AsyncHyperBandScheduler(FIFOScheduler):
     """
 
     def __init__(self,
-                 time_attr='training_iteration',
-                 reward_attr='episode_reward_mean',
+                 time_attr="training_iteration",
+                 reward_attr="episode_reward_mean",
                  max_t=100,
                  grace_period=10,
                  reduction_factor=3,
diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py
index 71c69b306..c9bdde8ab 100644
--- a/python/ray/tune/schedulers/hyperband.py
+++ b/python/ray/tune/schedulers/hyperband.py
@@ -73,8 +73,8 @@ class HyperBandScheduler(FIFOScheduler):
     """
 
     def __init__(self,
-                 time_attr='training_iteration',
-                 reward_attr='episode_reward_mean',
+                 time_attr="training_iteration",
+                 reward_attr="episode_reward_mean",
                  max_t=81):
         assert max_t > 0, "Max (time_attr) not valid!"
         FIFOScheduler.__init__(self)
diff --git a/python/ray/tune/schedulers/median_stopping_rule.py b/python/ray/tune/schedulers/median_stopping_rule.py
index 33ff0d513..e554a69f0 100644
--- a/python/ray/tune/schedulers/median_stopping_rule.py
+++ b/python/ray/tune/schedulers/median_stopping_rule.py
@@ -104,9 +104,9 @@ class MedianStoppingRule(FIFOScheduler):
         if len(scores) >= self._min_samples_required:
             return np.median(scores)
         else:
-            return float('-inf')
+            return float("-inf")
 
-    def _running_result(self, trial, t_max=float('inf')):
+    def _running_result(self, trial, t_max=float("inf")):
         results = self._results[trial]
         # TODO(ekl) we could do interpolation to be more precise, but for now
         # assume len(results) is large and the time diffs are roughly equal
diff --git a/python/ray/tune/scripts.py b/python/ray/tune/scripts.py
index 64a5b79c6..8eb166c84 100644
--- a/python/ray/tune/scripts.py
+++ b/python/ray/tune/scripts.py
@@ -43,9 +43,9 @@ def list_trials(experiment_path, sort, output, filter_op, columns,
                 result_columns):
     """Lists trials in the directory subtree starting at the given path."""
     if columns:
-        columns = columns.split(',')
+        columns = columns.split(",")
     if result_columns:
-        result_columns = result_columns.split(',')
+        result_columns = result_columns.split(",")
     commands.list_trials(experiment_path, sort, output, filter_op, columns,
                          result_columns)
 
@@ -75,7 +75,7 @@ def list_trials(experiment_path, sort, output, filter_op, columns,
 def list_experiments(project_path, sort, output, filter_op, columns):
     """Lists experiments in the directory subtree."""
     if columns:
-        columns = columns.split(',')
+        columns = columns.split(",")
     commands.list_experiments(project_path, sort, output, filter_op, columns)
 
 
diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py
index 91e0af7de..533e320c0 100644
--- a/python/ray/tune/suggest/hyperopt.py
+++ b/python/ray/tune/suggest/hyperopt.py
@@ -120,8 +120,8 @@ class HyperOptSearch(SuggestionAlgorithm):
         if ho_trial is None:
             return
         now = hpo.utils.coarse_utcnow()
-        ho_trial['book_time'] = now
-        ho_trial['refresh_time'] = now
+        ho_trial["book_time"] = now
+        ho_trial["refresh_time"] = now
 
     def on_trial_complete(self,
                           trial_id,
@@ -136,17 +136,17 @@ class HyperOptSearch(SuggestionAlgorithm):
         ho_trial = self._get_hyperopt_trial(trial_id)
         if ho_trial is None:
             return
-        ho_trial['refresh_time'] = hpo.utils.coarse_utcnow()
+        ho_trial["refresh_time"] = hpo.utils.coarse_utcnow()
         if error:
-            ho_trial['state'] = hpo.base.JOB_STATE_ERROR
-            ho_trial['misc']['error'] = (str(TuneError), "Tune Error")
+            ho_trial["state"] = hpo.base.JOB_STATE_ERROR
+            ho_trial["misc"]["error"] = (str(TuneError), "Tune Error")
         elif early_terminated:
-            ho_trial['state'] = hpo.base.JOB_STATE_ERROR
-            ho_trial['misc']['error'] = (str(TuneError), "Tune Removed")
+            ho_trial["state"] = hpo.base.JOB_STATE_ERROR
+            ho_trial["misc"]["error"] = (str(TuneError), "Tune Removed")
         else:
-            ho_trial['state'] = hpo.base.JOB_STATE_DONE
+            ho_trial["state"] = hpo.base.JOB_STATE_DONE
             hp_result = self._to_hyperopt_result(result)
-            ho_trial['result'] = hp_result
+            ho_trial["result"] = hp_result
         self._hpopt_trials.refresh()
         del self._live_trial_mapping[trial_id]
 
diff --git a/python/ray/tune/suggest/sigopt.py b/python/ray/tune/suggest/sigopt.py
index 4c1e4c9ee..72d2d0afc 100644
--- a/python/ray/tune/suggest/sigopt.py
+++ b/python/ray/tune/suggest/sigopt.py
@@ -67,7 +67,7 @@ class SigOptSearch(SuggestionAlgorithm):
         self._live_trial_mapping = {}
 
         # Create a connection with SigOpt API, requires API key
-        self.conn = sgo.Connection(client_token=os.environ['SIGOPT_KEY'])
+        self.conn = sgo.Connection(client_token=os.environ["SIGOPT_KEY"])
 
         self.experiment = self.conn.experiments().create(
             name=name,
diff --git a/python/ray/tune/tests/test_automl_searcher.py b/python/ray/tune/tests/test_automl_searcher.py
index 86528834e..1dd153558 100644
--- a/python/ray/tune/tests/test_automl_searcher.py
+++ b/python/ray/tune/tests/test_automl_searcher.py
@@ -17,26 +17,26 @@ class AutoMLSearcherTest(unittest.TestCase):
         register_trainable("f1", dummy_train)
 
     def testExpandSearchSpace(self):
-        exp = {"test-exp": {"run": "f1", "config": {"a": {'d': 'dummy'}}}}
+        exp = {"test-exp": {"run": "f1", "config": {"a": {"d": "dummy"}}}}
         space = SearchSpace([
-            DiscreteSpace('a.b.c', [1, 2]),
-            DiscreteSpace('a.d', ['a', 'b']),
+            DiscreteSpace("a.b.c", [1, 2]),
+            DiscreteSpace("a.d", ["a", "b"]),
         ])
-        searcher = GridSearch(space, 'reward')
+        searcher = GridSearch(space, "reward")
         searcher.add_configurations(exp)
         trials = searcher.next_trials()
 
         self.assertEqual(len(trials), 4)
-        self.assertTrue(trials[0].config['a']['b']['c'] in [1, 2])
-        self.assertTrue(trials[1].config['a']['d'] in ['a', 'b'])
+        self.assertTrue(trials[0].config["a"]["b"]["c"] in [1, 2])
+        self.assertTrue(trials[1].config["a"]["d"] in ["a", "b"])
 
     def testSearchRound(self):
-        exp = {"test-exp": {"run": "f1", "config": {"a": {'d': 'dummy'}}}}
+        exp = {"test-exp": {"run": "f1", "config": {"a": {"d": "dummy"}}}}
         space = SearchSpace([
-            DiscreteSpace('a.b.c', [1, 2]),
-            DiscreteSpace('a.d', ['a', 'b']),
+            DiscreteSpace("a.b.c", [1, 2]),
+            DiscreteSpace("a.d", ["a", "b"]),
         ])
-        searcher = GridSearch(space, 'reward')
+        searcher = GridSearch(space, "reward")
         searcher.add_configurations(exp)
         trials = searcher.next_trials()
 
@@ -48,12 +48,12 @@ class AutoMLSearcherTest(unittest.TestCase):
         self.assertTrue(searcher.is_finished())
 
     def testBestTrial(self):
-        exp = {"test-exp": {"run": "f1", "config": {"a": {'d': 'dummy'}}}}
+        exp = {"test-exp": {"run": "f1", "config": {"a": {"d": "dummy"}}}}
         space = SearchSpace([
-            DiscreteSpace('a.b.c', [1, 2]),
-            DiscreteSpace('a.d', ['a', 'b']),
+            DiscreteSpace("a.b.c", [1, 2]),
+            DiscreteSpace("a.d", ["a", "b"]),
         ])
-        searcher = GridSearch(space, 'reward')
+        searcher = GridSearch(space, "reward")
         searcher.add_configurations(exp)
         trials = searcher.next_trials()
 
@@ -66,4 +66,4 @@ class AutoMLSearcherTest(unittest.TestCase):
 
         best_trial = searcher.get_best_trial()
         self.assertEqual(best_trial, trials[-1])
-        self.assertEqual(best_trial.best_result['reward'], 3 + 10 - 1)
+        self.assertEqual(best_trial.best_result["reward"], 3 + 10 - 1)
diff --git a/python/ray/tune/tests/test_commands.py b/python/ray/tune/tests/test_commands.py
index bd075dba7..2f3e1e214 100644
--- a/python/ray/tune/tests/test_commands.py
+++ b/python/ray/tune/tests/test_commands.py
@@ -90,7 +90,7 @@ def test_ls(start_ray, tmpdir):
     assert sum("TERMINATED" in line for line in lines) == num_samples
     columns = ["status", "episode_reward_mean", "training_iteration"]
     assert all(col in lines[1] for col in columns)
-    assert lines[1].count('|') == 4
+    assert lines[1].count("|") == 4
 
     with Capturing() as output:
         commands.list_trials(
@@ -123,7 +123,7 @@ def test_lsx(start_ray, tmpdir):
     lines = output.captured
     assert sum("1" in line for line in lines) >= num_experiments
     assert "total_trials" in lines[1]
-    assert lines[1].count('|') == 2
+    assert lines[1].count("|") == 2
 
     with Capturing() as output:
         commands.list_experiments(
diff --git a/python/ray/tune/tests/test_dependency.py b/python/ray/tune/tests/test_dependency.py
index 446d9271d..6aa9d066c 100644
--- a/python/ray/tune/tests/test_dependency.py
+++ b/python/ray/tune/tests/test_dependency.py
@@ -25,4 +25,4 @@ if __name__ == "__main__":
             }
         }
     })
-    assert 'ray.rllib' not in sys.modules, "RLlib should not be imported"
+    assert "ray.rllib" not in sys.modules, "RLlib should not be imported"
diff --git a/python/ray/tune/tests/test_trial_runner.py b/python/ray/tune/tests/test_trial_runner.py
index 32e5253f2..19930559c 100644
--- a/python/ray/tune/tests/test_trial_runner.py
+++ b/python/ray/tune/tests/test_trial_runner.py
@@ -331,21 +331,21 @@ class TrainableFunctionApiTest(unittest.TestCase):
         self.assertFalse(trial.upload_dir)
 
     def testLogdirStartingWithTilde(self):
-        local_dir = '~/ray_results/local_dir'
+        local_dir = "~/ray_results/local_dir"
 
         def train(config, reporter):
             cwd = os.getcwd()
             assert cwd.startswith(os.path.expanduser(local_dir)), cwd
-            assert not cwd.startswith('~'), cwd
+            assert not cwd.startswith("~"), cwd
             reporter(timesteps_total=1)
 
-        register_trainable('f1', train)
+        register_trainable("f1", train)
         run_experiments({
-            'foo': {
-                'run': 'f1',
-                'local_dir': local_dir,
-                'config': {
-                    'a': 'b'
+            "foo": {
+                "run": "f1",
+                "local_dir": local_dir,
+                "config": {
+                    "a": "b"
                 },
             }
         })
@@ -501,7 +501,7 @@ class TrainableFunctionApiTest(unittest.TestCase):
     def testReportInfinity(self):
         def train(config, reporter):
             for i in range(100):
-                reporter(mean_accuracy=float('inf'))
+                reporter(mean_accuracy=float("inf"))
 
         register_trainable("f1", train)
         [trial] = run_experiments({
@@ -510,7 +510,7 @@ class TrainableFunctionApiTest(unittest.TestCase):
             }
         })
         self.assertEqual(trial.status, Trial.TERMINATED)
-        self.assertEqual(trial.last_result['mean_accuracy'], float('inf'))
+        self.assertEqual(trial.last_result["mean_accuracy"], float("inf"))
 
     def testReportTimeStep(self):
         # Test that no timestep count are logged if never the Trainable never
@@ -1532,7 +1532,7 @@ class TrialRunnerTest(unittest.TestCase):
         runner.add_trial(Trial("__fake", **kwargs))
         trials = runner.get_trials()
 
-        with patch('ray.global_state.cluster_resources') as resource_mock:
+        with patch("ray.global_state.cluster_resources") as resource_mock:
             resource_mock.return_value = {"CPU": 1, "GPU": 1}
             runner.step()
             self.assertEqual(trials[0].status, Trial.RUNNING)
@@ -1717,12 +1717,12 @@ class TrialRunnerTest(unittest.TestCase):
 
         def on_step_begin(self):
             self._update_avail_resources()
-            cnt = self.pre_step if hasattr(self, 'pre_step') else 0
-            setattr(self, 'pre_step', cnt + 1)
+            cnt = self.pre_step if hasattr(self, "pre_step") else 0
+            setattr(self, "pre_step", cnt + 1)
 
         def on_step_end(self):
-            cnt = self.pre_step if hasattr(self, 'post_step') else 0
-            setattr(self, 'post_step', 1 + cnt)
+            cnt = self.pre_step if hasattr(self, "post_step") else 0
+            setattr(self, "post_step", 1 + cnt)
 
         import types
         runner.trial_executor.on_step_begin = types.MethodType(
diff --git a/python/ray/tune/tests/test_trial_scheduler.py b/python/ray/tune/tests/test_trial_scheduler.py
index 0f68e39f9..aaa0dc49c 100644
--- a/python/ray/tune/tests/test_trial_scheduler.py
+++ b/python/ray/tune/tests/test_trial_scheduler.py
@@ -135,8 +135,8 @@ class EarlyStoppingSuite(unittest.TestCase):
         rule = MedianStoppingRule(
             grace_period=0,
             min_samples_required=1,
-            time_attr='training_iteration',
-            reward_attr='neg_mean_loss')
+            time_attr="training_iteration",
+            reward_attr="neg_mean_loss")
         t1 = Trial("PPO")  # mean is 450, max 900, t_max=10
         t2 = Trial("PPO")  # mean is 450, max 450, t_max=5
         for i in range(10):
@@ -495,7 +495,7 @@ class HyperbandSuite(unittest.TestCase):
             return dict(time_total_s=t, neg_mean_loss=rew)
 
         sched = HyperBandScheduler(
-            time_attr='time_total_s', reward_attr='neg_mean_loss')
+            time_attr="time_total_s", reward_attr="neg_mean_loss")
         stats = self.default_statistics()
 
         for i in range(stats["max_trials"]):
@@ -987,8 +987,8 @@ class AsyncHyperBandSuite(unittest.TestCase):
 
         scheduler = AsyncHyperBandScheduler(
             grace_period=1,
-            time_attr='training_iteration',
-            reward_attr='neg_mean_loss',
+            time_attr="training_iteration",
+            reward_attr="neg_mean_loss",
             brackets=1)
         t1 = Trial("PPO")  # mean is 450, max 900, t_max=10
         t2 = Trial("PPO")  # mean is 450, max 450, t_max=5
diff --git a/python/ray/tune/tests/test_tune_server.py b/python/ray/tune/tests/test_tune_server.py
index 7d9143544..7df7a698b 100644
--- a/python/ray/tune/tests/test_tune_server.py
+++ b/python/ray/tune/tests/test_tune_server.py
@@ -69,8 +69,8 @@ class TuneServerSuite(unittest.TestCase):
                 "training_iteration": 3
             },
             "resources_per_trial": {
-                'cpu': 1,
-                'gpu': 1
+                "cpu": 1,
+                "gpu": 1
             },
         }
         client.add_trial("test", spec)
@@ -134,8 +134,8 @@ class TuneServerSuite(unittest.TestCase):
         for i in range(2):
             runner.step()
         stdout = subprocess.check_output(
-            'curl "http://{}:{}/trials"'.format(client.server_address,
-                                                client.server_port),
+            "curl \"http://{}:{}/trials\"".format(client.server_address,
+                                                  client.server_port),
             shell=True)
         self.assertNotEqual(stdout, None)
         curl_trials = json.loads(stdout.decode())["trials"]
diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py
index 83d7fd05e..ad61e8d4b 100644
--- a/python/ray/tune/trial.py
+++ b/python/ray/tune/trial.py
@@ -437,39 +437,39 @@ class Trial(object):
 
         def location_string(hostname, pid):
             if hostname == os.uname()[1]:
-                return 'pid={}'.format(pid)
+                return "pid={}".format(pid)
             else:
-                return '{} pid={}'.format(hostname, pid)
+                return "{} pid={}".format(hostname, pid)
 
         pieces = [
-            '{}'.format(self._status_string()), '[{}]'.format(
-                self.resources.summary_string()), '[{}]'.format(
+            "{}".format(self._status_string()), "[{}]".format(
+                self.resources.summary_string()), "[{}]".format(
                     location_string(
                         self.last_result.get(HOSTNAME),
-                        self.last_result.get(PID))), '{} s'.format(
+                        self.last_result.get(PID))), "{} s".format(
                             int(self.last_result.get(TIME_TOTAL_S)))
         ]
 
         if self.last_result.get(TRAINING_ITERATION) is not None:
-            pieces.append('{} iter'.format(
+            pieces.append("{} iter".format(
                 self.last_result[TRAINING_ITERATION]))
 
         if self.last_result.get(TIMESTEPS_TOTAL) is not None:
-            pieces.append('{} ts'.format(self.last_result[TIMESTEPS_TOTAL]))
+            pieces.append("{} ts".format(self.last_result[TIMESTEPS_TOTAL]))
 
         if self.last_result.get(EPISODE_REWARD_MEAN) is not None:
-            pieces.append('{} rew'.format(
-                format(self.last_result[EPISODE_REWARD_MEAN], '.3g')))
+            pieces.append("{} rew".format(
+                format(self.last_result[EPISODE_REWARD_MEAN], ".3g")))
 
         if self.last_result.get(MEAN_LOSS) is not None:
-            pieces.append('{} loss'.format(
-                format(self.last_result[MEAN_LOSS], '.3g')))
+            pieces.append("{} loss".format(
+                format(self.last_result[MEAN_LOSS], ".3g")))
 
         if self.last_result.get(MEAN_ACCURACY) is not None:
-            pieces.append('{} acc'.format(
-                format(self.last_result[MEAN_ACCURACY], '.3g')))
+            pieces.append("{} acc".format(
+                format(self.last_result[MEAN_ACCURACY], ".3g")))
 
-        return ', '.join(pieces)
+        return ", ".join(pieces)
 
     def _status_string(self):
         return "{}{}".format(
diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py
index 4348f7e0d..7382065ab 100644
--- a/python/ray/tune/trial_runner.py
+++ b/python/ray/tune/trial_runner.py
@@ -127,7 +127,7 @@ class TrialRunner(object):
         # For debugging, it may be useful to halt trials after some time has
         # elapsed. TODO(ekl) consider exposing this in the API.
         self._global_time_limit = float(
-            os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float('inf')))
+            os.environ.get("TRIALRUNNER_WALLTIME_LIMIT", float("inf")))
         self._total_time = 0
         self._iteration = 0
         self._verbose = verbose
diff --git a/python/ray/tune/web_server.py b/python/ray/tune/web_server.py
index 4c27f92fd..022ccf12b 100644
--- a/python/ray/tune/web_server.py
+++ b/python/ray/tune/web_server.py
@@ -110,7 +110,7 @@ def RunnerHandler(runner):
                 headers (list[tuples]): Standard HTTP response headers
             """
             if headers is None:
-                headers = [('Content-type', 'application/json')]
+                headers = [("Content-type", "application/json")]
 
             self.send_response(response_code)
             for key, value in headers:
@@ -170,14 +170,14 @@ def RunnerHandler(runner):
             """HTTP POST handler method."""
             response_code = 201
 
-            content_len = int(self.headers.get('Content-Length'), 0)
+            content_len = int(self.headers.get("Content-Length"), 0)
             raw_body = self.rfile.read(content_len)
             parsed_input = json.loads(raw_body.decode())
             resource = self._add_trials(parsed_input["name"],
                                         parsed_input["spec"])
 
-            headers = [('Content-type', 'application/json'), ('Location',
-                                                              '/trials/')]
+            headers = [("Content-type", "application/json"), ("Location",
+                                                              "/trials/")]
             self._do_header(response_code=response_code, headers=headers)
             self.wfile.write(json.dumps(resource).encode())
 
@@ -237,7 +237,7 @@ class TuneServer(threading.Thread):
         """Initialize HTTPServer and serve forever by invoking self.run()"""
         threading.Thread.__init__(self)
         self._port = port if port else self.DEFAULT_PORT
-        address = ('localhost', self._port)
+        address = ("localhost", self._port)
         logger.info("Starting Tune Server...")
         self._server = HTTPServer(address, RunnerHandler(runner))
         self.daemon = True
diff --git a/python/ray/worker.py b/python/ray/worker.py
index 64c8fbda7..f3c17601f 100644
--- a/python/ray/worker.py
+++ b/python/ray/worker.py
@@ -183,7 +183,7 @@ class Worker(object):
         put_index: The number of objects that have been put from the current
             task.
         """
-        if not hasattr(self._task_context, 'initialized'):
+        if not hasattr(self._task_context, "initialized"):
             # Initialize task_context for the current thread.
             if ray.utils.is_main_thread():
                 # If this is running on the main thread, initialize it to
@@ -196,7 +196,7 @@ class Worker(object):
                 # random task ID so that the backend can differentiate
                 # between different threads.
                 self._task_context.current_task_id = TaskID(_random_string())
-                if getattr(self, '_multithreading_warned', False) is not True:
+                if getattr(self, "_multithreading_warned", False) is not True:
                     logger.warning(
                         "Calling ray.get or ray.wait in a separate thread "
                         "may lead to deadlock if the main thread blocks on "
diff --git a/python/ray/workers/default_worker.py b/python/ray/workers/default_worker.py
index a8b7040d1..43ecd0658 100644
--- a/python/ray/workers/default_worker.py
+++ b/python/ray/workers/default_worker.py
@@ -60,7 +60,7 @@ parser.add_argument(
 parser.add_argument(
     "--load-code-from-local",
     default=False,
-    action='store_true',
+    action="store_true",
     help="True if code is loaded from local files, as opposed to the GCS.")
 
 if __name__ == "__main__":