diff --git a/.travis.yml b/.travis.yml
index 1bf9f7dda..cde858639 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -261,6 +261,8 @@ matrix:
     - os: linux
       env:
         - RLLIB_TESTING=1 RLLIB_QUICK_TRAIN_AND_MISC_TESTS=1
+        # TODO (sven): Remove this after fixing rllib tests num_cpus.
+        - RAY_USE_MULTIPROCESSING_CPU_COUNT=1 
         - PYTHON=3.6
         - TF_VERSION=2.1.0
         - TFP_VERSION=0.8
@@ -271,15 +273,17 @@ matrix:
       before_script:
         - . ./ci/travis/ci.sh build
       script:
-        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=quick_train rllib/...
+        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=quick_train --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
         # Test everything that does not have any of the "main" labels:
         # "learning_tests|quick_train|examples|tests_dir".
-        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-learning_tests_tf,-learning_tests_torch,-quick_train,-examples,-tests_dir rllib/...
+        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-learning_tests_tf,-learning_tests_torch,-quick_train,-examples,-tests_dir --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
 
     # RLlib: Everything in rllib/examples/ directory.
     - os: linux
       env:
         - RLLIB_TESTING=1 RLLIB_EXAMPLE_DIR_TESTS=1
+        # TODO (sven): Remove this after fixing rllib tests num_cpus.
+        - RAY_USE_MULTIPROCESSING_CPU_COUNT=1 
         - PYTHON=3.6
         - TF_VERSION=2.1.0
         - TFP_VERSION=0.8
@@ -290,15 +294,17 @@ matrix:
       before_script:
         - . ./ci/travis/ci.sh build
       script:
-        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=examples_A,examples_B rllib/...
-        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=examples_C,examples_D rllib/...
-        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=examples_E,examples_F,examples_G,examples_H,examples_I,examples_J,examples_K,examples_L,examples_M,examples_N,examples_O,examples_P rllib/...
-        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=examples_Q,examples_R,examples_S,examples_T,examples_U,examples_V,examples_W,examples_X,examples_Y,examples_Z rllib/...
+        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=examples_A,examples_B --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
+        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=examples_C,examples_D --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
+        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=examples_E,examples_F,examples_G,examples_H,examples_I,examples_J,examples_K,examples_L,examples_M,examples_N,examples_O,examples_P --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1  rllib/...
+        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=examples_Q,examples_R,examples_S,examples_T,examples_U,examples_V,examples_W,examples_X,examples_Y,examples_Z --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
 
     # RLlib: tests_dir: Everything in rllib/tests/ directory (A-L).
     - os: linux
       env:
         - RLLIB_TESTING=1 RLLIB_TESTS_DIR_TESTS_A_TO_L=1
+        # TODO (sven): Remove this after fixing rllib tests num_cpus.
+        - RAY_USE_MULTIPROCESSING_CPU_COUNT=1 
         - PYTHON=3.6
         - TF_VERSION=2.1.0
         - TFP_VERSION=0.8
@@ -309,12 +315,14 @@ matrix:
       before_script:
         - . ./ci/travis/ci.sh build
       script:
-        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=tests_dir_A,tests_dir_B,tests_dir_C,tests_dir_D,tests_dir_E,tests_dir_F,tests_dir_G,tests_dir_H,tests_dir_I,tests_dir_J,tests_dir_K,tests_dir_L rllib/...
+        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=tests_dir_A,tests_dir_B,tests_dir_C,tests_dir_D,tests_dir_E,tests_dir_F,tests_dir_G,tests_dir_H,tests_dir_I,tests_dir_J,tests_dir_K,tests_dir_L --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
 
     # RLlib: tests_dir: Everything in rllib/tests/ directory (M-Z).
     - os: linux
       env:
         - RLLIB_TESTING=1 RLLIB_TESTS_DIR_TESTS_M_TO_Z=1
+        # TODO (sven): Remove this after fixing rllib tests num_cpus.
+        - RAY_USE_MULTIPROCESSING_CPU_COUNT=1 
         - PYTHON=3.6
         - TF_VERSION=2.1.0
         - TFP_VERSION=0.8
@@ -325,7 +333,7 @@ matrix:
       before_script:
         - . ./ci/travis/ci.sh build
       script:
-        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=tests_dir_M,tests_dir_N,tests_dir_O,tests_dir_P,tests_dir_Q,tests_dir_R,tests_dir_S,tests_dir_T,tests_dir_U,tests_dir_V,tests_dir_W,tests_dir_X,tests_dir_Y,tests_dir_Z rllib/...
+        - ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=tests_dir_M,tests_dir_N,tests_dir_O,tests_dir_P,tests_dir_Q,tests_dir_R,tests_dir_S,tests_dir_T,tests_dir_U,tests_dir_V,tests_dir_W,tests_dir_X,tests_dir_Y,tests_dir_Z --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
 
 
     # Tune: Tests and examples.
diff --git a/doc/source/serve/index.rst b/doc/source/serve/index.rst
index a7b74ced0..b73bf6822 100644
--- a/doc/source/serve/index.rst
+++ b/doc/source/serve/index.rst
@@ -50,11 +50,13 @@ Serve a function by defining a function, an endpoint, and a backend (in this cas
 connecting the two by setting traffic from the endpoint to the backend.
 
 .. literalinclude:: ../../../python/ray/serve/examples/doc/quickstart_function.py
+   :lines: 2-4,6-7,9-
 
 Serve a stateful class by defining a class (``Counter``), creating an endpoint and a backend, then connecting
 the two by setting traffic from the endpoint to the backend.
 
 .. literalinclude:: ../../../python/ray/serve/examples/doc/quickstart_class.py
+   :lines: 2-4,6-7,9-
 
 See :doc:`key-concepts` for more exhaustive coverage about Ray Serve and its core concepts.
 
diff --git a/doc/source/tune/_tutorials/tune-serve-integration-mnist.py b/doc/source/tune/_tutorials/tune-serve-integration-mnist.py
index 04690129a..2c703cf0d 100644
--- a/doc/source/tune/_tutorials/tune-serve-integration-mnist.py
+++ b/doc/source/tune/_tutorials/tune-serve-integration-mnist.py
@@ -97,6 +97,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+import ray
 from ray import tune, serve
 from ray.serve.exceptions import RayServeException
 from ray.tune import CLIReporter
@@ -613,6 +614,9 @@ if __name__ == "__main__":
 
     args = parser.parse_args()
 
+    if args.smoke_test:
+        ray.init(num_cpus=2)
+
     model_dir = os.path.expanduser(args.model_dir)
 
     if args.query >= 0:
diff --git a/python/ray/resource_spec.py b/python/ray/resource_spec.py
index 92845518f..0bc753c42 100644
--- a/python/ray/resource_spec.py
+++ b/python/ray/resource_spec.py
@@ -1,7 +1,6 @@
 import math
 from collections import namedtuple
 import logging
-import multiprocessing
 import os
 import re
 import subprocess
@@ -105,7 +104,9 @@ class ResourceSpec(
         # Check types.
         for resource_label, resource_quantity in resources.items():
             assert (isinstance(resource_quantity, int)
-                    or isinstance(resource_quantity, float))
+                    or isinstance(resource_quantity, float)), (
+                        f"{resource_label} ({type(resource_quantity)}): "
+                        f"{resource_quantity}")
             if (isinstance(resource_quantity, float)
                     and not resource_quantity.is_integer()):
                 raise ValueError(
@@ -148,7 +149,7 @@ class ResourceSpec(
 
         num_cpus = self.num_cpus
         if num_cpus is None:
-            num_cpus = multiprocessing.cpu_count()
+            num_cpus = ray.utils.get_num_cpus()
 
         num_gpus = self.num_gpus
         gpu_ids = ray.utils.get_cuda_visible_devices()
diff --git a/python/ray/serve/examples/doc/quickstart_class.py b/python/ray/serve/examples/doc/quickstart_class.py
index f8aa26aee..890aed094 100644
--- a/python/ray/serve/examples/doc/quickstart_class.py
+++ b/python/ray/serve/examples/doc/quickstart_class.py
@@ -1,6 +1,8 @@
+import ray
 from ray import serve
 import requests
 
+ray.init(num_cpus=8)
 client = serve.start()
 
 
diff --git a/python/ray/serve/examples/doc/quickstart_function.py b/python/ray/serve/examples/doc/quickstart_function.py
index 61ea3816a..a0e26d3f2 100644
--- a/python/ray/serve/examples/doc/quickstart_function.py
+++ b/python/ray/serve/examples/doc/quickstart_function.py
@@ -1,6 +1,8 @@
+import ray
 from ray import serve
 import requests
 
+ray.init(num_cpus=8)
 client = serve.start()
 
 
diff --git a/python/ray/serve/examples/doc/snippet_model_composition.py b/python/ray/serve/examples/doc/snippet_model_composition.py
index 25705a7f8..67a1890bf 100644
--- a/python/ray/serve/examples/doc/snippet_model_composition.py
+++ b/python/ray/serve/examples/doc/snippet_model_composition.py
@@ -3,7 +3,7 @@ import requests
 import ray
 from ray import serve
 
-ray.init(num_cpus=10)
+ray.init(num_cpus=8)
 client = serve.start()
 
 # Our pipeline will be structured as follows:
diff --git a/python/ray/serve/examples/doc/tutorial_batch.py b/python/ray/serve/examples/doc/tutorial_batch.py
index 271e69afa..3f8129a05 100644
--- a/python/ray/serve/examples/doc/tutorial_batch.py
+++ b/python/ray/serve/examples/doc/tutorial_batch.py
@@ -28,8 +28,8 @@ def batch_adder_v0(flask_requests: List):
 
 # __doc_define_servable_v0_end__
 
+ray.init(num_cpus=8)
 # __doc_deploy_begin__
-ray.init(num_cpus=10)
 client = serve.start()
 client.create_backend("adder:v0", batch_adder_v0, config={"max_batch_size": 4})
 client.create_endpoint(
diff --git a/python/ray/serve/examples/doc/tutorial_pytorch.py b/python/ray/serve/examples/doc/tutorial_pytorch.py
index c7e8593a6..80e3b9d32 100644
--- a/python/ray/serve/examples/doc/tutorial_pytorch.py
+++ b/python/ray/serve/examples/doc/tutorial_pytorch.py
@@ -1,4 +1,5 @@
 # yapf: disable
+import ray
 # __doc_import_begin__
 from ray import serve
 
@@ -45,6 +46,7 @@ class ImageModel:
 
 # __doc_define_servable_end__
 
+ray.init(num_cpus=8)
 # __doc_deploy_begin__
 client = serve.start()
 client.create_backend("resnet18:v0", ImageModel)
diff --git a/python/ray/serve/examples/doc/tutorial_sklearn.py b/python/ray/serve/examples/doc/tutorial_sklearn.py
index 976f883a2..69f17953b 100644
--- a/python/ray/serve/examples/doc/tutorial_sklearn.py
+++ b/python/ray/serve/examples/doc/tutorial_sklearn.py
@@ -1,4 +1,5 @@
 # yapf: disable
+import ray
 # __doc_import_begin__
 from ray import serve
 
@@ -70,6 +71,7 @@ class BoostingModel:
 
 # __doc_define_servable_end__
 
+ray.init(num_cpus=8)
 # __doc_deploy_begin__
 client = serve.start()
 client.create_backend("lr:v1", BoostingModel)
diff --git a/python/ray/serve/examples/doc/tutorial_tensorflow.py b/python/ray/serve/examples/doc/tutorial_tensorflow.py
index 71f974697..07fb36381 100644
--- a/python/ray/serve/examples/doc/tutorial_tensorflow.py
+++ b/python/ray/serve/examples/doc/tutorial_tensorflow.py
@@ -1,4 +1,5 @@
 # yapf: disable
+import ray
 # __doc_import_begin__
 from ray import serve
 
@@ -68,6 +69,7 @@ class TFMnistModel:
 
 # __doc_define_servable_end__
 
+ray.init(num_cpus=8)
 # __doc_deploy_begin__
 client = serve.start()
 client.create_backend("tf:v1", TFMnistModel, TRAINED_MODEL_PATH)
diff --git a/python/ray/tests/test_memory_scheduling.py b/python/ray/tests/test_memory_scheduling.py
index fa642a4b7..8da6026d8 100644
--- a/python/ray/tests/test_memory_scheduling.py
+++ b/python/ray/tests/test_memory_scheduling.py
@@ -70,6 +70,7 @@ class TestMemoryScheduling(unittest.TestCase):
 
     def testTuneDriverHeapLimit(self):
         try:
+            ray.init(num_cpus=4, _memory=100 * MB)
             _register_all()
             result = tune.run(
                 "PG",
@@ -89,6 +90,11 @@ class TestMemoryScheduling(unittest.TestCase):
 
     def testTuneDriverStoreLimit(self):
         try:
+            ray.init(
+                num_cpus=4,
+                _memory=100 * MB,
+                object_store_memory=100 * MB,
+            )
             _register_all()
             self.assertRaisesRegexp(
                 ray.tune.error.TuneError,
@@ -107,6 +113,7 @@ class TestMemoryScheduling(unittest.TestCase):
 
     def testTuneWorkerHeapLimit(self):
         try:
+            ray.init(num_cpus=4, _memory=100 * MB)
             _register_all()
             result = tune.run(
                 "PG",
@@ -127,6 +134,11 @@ class TestMemoryScheduling(unittest.TestCase):
 
     def testTuneWorkerStoreLimit(self):
         try:
+            ray.init(
+                num_cpus=4,
+                _memory=100 * MB,
+                object_store_memory=100 * MB,
+            )
             _register_all()
             self.assertRaisesRegexp(
                 ray.tune.error.TuneError,
@@ -144,6 +156,7 @@ class TestMemoryScheduling(unittest.TestCase):
 
     def testTuneObjectLimitApplied(self):
         try:
+            ray.init(num_cpus=2, object_store_memory=500 * MB)
             result = tune.run(
                 train_oom,
                 resources_per_trial={"object_store_memory": 150 * 1024 * 1024},
diff --git a/python/ray/tune/examples/cifar10_pytorch.py b/python/ray/tune/examples/cifar10_pytorch.py
index 1b9b75afb..2e1b1cbe2 100644
--- a/python/ray/tune/examples/cifar10_pytorch.py
+++ b/python/ray/tune/examples/cifar10_pytorch.py
@@ -12,6 +12,7 @@ import torch.optim as optim
 from torch.utils.data import random_split
 import torchvision
 import torchvision.transforms as transforms
+import ray
 from ray import tune
 from ray.tune import CLIReporter
 from ray.tune.schedulers import ASHAScheduler
@@ -231,6 +232,7 @@ if __name__ == "__main__":
     args, _ = parser.parse_known_args()
 
     if args.smoke_test:
+        ray.init(num_cpus=2)
         main(num_samples=1, max_num_epochs=1, gpus_per_trial=0)
     else:
         # Change this to activate training on GPUs
diff --git a/python/ray/tune/examples/horovod_simple.py b/python/ray/tune/examples/horovod_simple.py
index e566edf0f..f2a141529 100644
--- a/python/ray/tune/examples/horovod_simple.py
+++ b/python/ray/tune/examples/horovod_simple.py
@@ -97,6 +97,9 @@ if __name__ == "__main__":
     parser.add_argument("--hosts-per-trial", type=int, default=1)
     parser.add_argument("--slots-per-host", type=int, default=2)
     args = parser.parse_args()
+    if args.smoke_test:
+        import ray
+        ray.init(num_cpus=2)
 
     # import ray
     # ray.init(address="auto")  # assumes ray is started with ray up
diff --git a/python/ray/tune/examples/pbt_convnet_example.py b/python/ray/tune/examples/pbt_convnet_example.py
index d996d0edd..f54f036b9 100644
--- a/python/ray/tune/examples/pbt_convnet_example.py
+++ b/python/ray/tune/examples/pbt_convnet_example.py
@@ -76,7 +76,7 @@ if __name__ == "__main__":
         "--smoke-test", action="store_true", help="Finish quickly for testing")
     args, _ = parser.parse_known_args()
 
-    ray.init()
+    ray.init(num_cpus=2)
     datasets.MNIST("~/data", train=True, download=True)
 
     # check if PytorchTrainble will save/restore correctly before execution
diff --git a/python/ray/tune/tests/ext_pytorch.py b/python/ray/tune/tests/ext_pytorch.py
index cc9759c07..5b9db0193 100644
--- a/python/ray/tune/tests/ext_pytorch.py
+++ b/python/ray/tune/tests/ext_pytorch.py
@@ -58,6 +58,7 @@ import torch.optim as optim
 from torch.utils.data import random_split
 import torchvision
 import torchvision.transforms as transforms
+import ray
 from ray import tune
 from ray.tune import CLIReporter
 from ray.tune.schedulers import ASHAScheduler
@@ -434,6 +435,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
 
 if __name__ == "__main__":
     # You can change the number of GPUs per trial here:
+    ray.init(num_cpus=2)  # for testing purposes only
     main(num_samples=2, max_num_epochs=2, gpus_per_trial=0)
 
 
diff --git a/python/ray/tune/tests/test_dependency.py b/python/ray/tune/tests/test_dependency.py
index ce5d7a28b..c2e2f2c7c 100644
--- a/python/ray/tune/tests/test_dependency.py
+++ b/python/ray/tune/tests/test_dependency.py
@@ -11,7 +11,8 @@ def f(config, reporter):
 
 
 if __name__ == "__main__":
-    ray.init()
+    ray.init(num_cpus=2)
+
     register_trainable("my_class", f)
     run_experiments({
         "test": {
diff --git a/python/ray/tune/tests/test_ray_trial_executor.py b/python/ray/tune/tests/test_ray_trial_executor.py
index fc06b7aeb..e2ee69013 100644
--- a/python/ray/tune/tests/test_ray_trial_executor.py
+++ b/python/ray/tune/tests/test_ray_trial_executor.py
@@ -16,7 +16,7 @@ from ray.cluster_utils import Cluster
 class RayTrialExecutorTest(unittest.TestCase):
     def setUp(self):
         self.trial_executor = RayTrialExecutor(queue_trials=False)
-        ray.init(ignore_reinit_error=True)
+        ray.init(num_cpus=2, ignore_reinit_error=True)
         _register_all()  # Needed for flaky tests
 
     def tearDown(self):
diff --git a/python/ray/tune/tests/test_sync.py b/python/ray/tune/tests/test_sync.py
index 229fb1afc..e51b518bd 100644
--- a/python/ray/tune/tests/test_sync.py
+++ b/python/ray/tune/tests/test_sync.py
@@ -17,7 +17,7 @@ from ray.tune.syncer import CommandBasedClient
 
 class TestSyncFunctionality(unittest.TestCase):
     def setUp(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
     def tearDown(self):
         ray.shutdown()
diff --git a/python/ray/tune/tests/test_track.py b/python/ray/tune/tests/test_track.py
index 7abb8df4e..346892865 100644
--- a/python/ray/tune/tests/test_track.py
+++ b/python/ray/tune/tests/test_track.py
@@ -20,7 +20,7 @@ class TrackApiTest(unittest.TestCase):
     def testSoftDeprecation(self):
         """Checks that tune.track.log code does not break."""
         from ray.tune import track
-        ray.init()
+        ray.init(num_cpus=2)
 
         def testme(config):
             for i in range(config["iters"]):
diff --git a/python/ray/tune/tests/test_trial_runner.py b/python/ray/tune/tests/test_trial_runner.py
index 9a50a88d5..c947a1dae 100644
--- a/python/ray/tune/tests/test_trial_runner.py
+++ b/python/ray/tune/tests/test_trial_runner.py
@@ -23,7 +23,8 @@ class TrialRunnerTest(unittest.TestCase):
         ray.shutdown()
 
     def testTrialStatus(self):
-        ray.init()
+        ray.init(num_cpus=2)
+
         trial = Trial("__fake")
         trial_executor = RayTrialExecutor()
         self.assertEqual(trial.status, Trial.PENDING)
@@ -35,7 +36,7 @@ class TrialRunnerTest(unittest.TestCase):
         self.assertEqual(trial.status, Trial.ERROR)
 
     def testExperimentTagTruncation(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
         def train(config, reporter):
             reporter(timesteps_total=1)
diff --git a/python/ray/tune/tests/test_trial_runner_3.py b/python/ray/tune/tests/test_trial_runner_3.py
index ed07b0504..a3fc80479 100644
--- a/python/ray/tune/tests/test_trial_runner_3.py
+++ b/python/ray/tune/tests/test_trial_runner_3.py
@@ -540,7 +540,8 @@ class TrialRunnerTest3(unittest.TestCase):
         runner2.step()
 
     def testCheckpointWithFunction(self):
-        ray.init()
+        ray.init(num_cpus=2)
+
         trial = Trial(
             "__fake",
             config={"callbacks": {
@@ -565,7 +566,8 @@ class TrialRunnerTest3(unittest.TestCase):
                         and fname.endswith(".json"))
                        for fname in os.listdir(cdir))
 
-        ray.init()
+        ray.init(num_cpus=2)
+
         trial = Trial("__fake", checkpoint_freq=1)
         tmpdir = tempfile.mkdtemp()
         runner = TrialRunner(local_checkpoint_dir=tmpdir, checkpoint_period=0)
diff --git a/python/ray/tune/tests/test_trial_scheduler.py b/python/ray/tune/tests/test_trial_scheduler.py
index 507ae81f0..2588a2e64 100644
--- a/python/ray/tune/tests/test_trial_scheduler.py
+++ b/python/ray/tune/tests/test_trial_scheduler.py
@@ -38,7 +38,7 @@ def mock_trial_runner(trials=None):
 
 class EarlyStoppingSuite(unittest.TestCase):
     def setUp(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
     def tearDown(self):
         ray.shutdown()
@@ -782,7 +782,7 @@ class _MockTrial(Trial):
 
 class PopulationBasedTestingSuite(unittest.TestCase):
     def setUp(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
     def tearDown(self):
         ray.shutdown()
@@ -1802,7 +1802,7 @@ class E2EPopulationBasedTestingSuite(unittest.TestCase):
 
 class AsyncHyperBandSuite(unittest.TestCase):
     def setUp(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
     def tearDown(self):
         ray.shutdown()
diff --git a/python/ray/tune/tests/test_trial_scheduler_pbt.py b/python/ray/tune/tests/test_trial_scheduler_pbt.py
index 5af7cb467..09831fa01 100644
--- a/python/ray/tune/tests/test_trial_scheduler_pbt.py
+++ b/python/ray/tune/tests/test_trial_scheduler_pbt.py
@@ -104,7 +104,7 @@ class PopulationBasedTrainingSynchTest(unittest.TestCase):
 
 class PopulationBasedTrainingConfigTest(unittest.TestCase):
     def setUp(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
     def tearDown(self):
         ray.shutdown()
@@ -145,7 +145,7 @@ class PopulationBasedTrainingConfigTest(unittest.TestCase):
 
 class PopulationBasedTrainingResumeTest(unittest.TestCase):
     def setUp(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
     def tearDown(self):
         ray.shutdown()
diff --git a/python/ray/tune/tests/test_tune_restore.py b/python/ray/tune/tests/test_tune_restore.py
index 89077da90..d94e1a7ae 100644
--- a/python/ray/tune/tests/test_tune_restore.py
+++ b/python/ray/tune/tests/test_tune_restore.py
@@ -85,7 +85,7 @@ class TuneRestoreTest(unittest.TestCase):
 
 class TuneExampleTest(unittest.TestCase):
     def setUp(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
     def tearDown(self):
         ray.shutdown()
diff --git a/python/ray/tune/tests/test_var.py b/python/ray/tune/tests/test_var.py
index 082a9fe03..bf9e0c4f9 100644
--- a/python/ray/tune/tests/test_var.py
+++ b/python/ray/tune/tests/test_var.py
@@ -15,7 +15,7 @@ from ray.tune.suggest.variant_generator import (RecursiveDependencyError,
 
 class VariantGeneratorTest(unittest.TestCase):
     def setUp(self):
-        ray.init()
+        ray.init(num_cpus=2)
 
     def tearDown(self):
         ray.shutdown()
diff --git a/python/ray/util/sgd/BUILD b/python/ray/util/sgd/BUILD
index 784016de9..664ac40a3 100644
--- a/python/ray/util/sgd/BUILD
+++ b/python/ray/util/sgd/BUILD
@@ -67,7 +67,7 @@ py_test(
     srcs = ["tf/examples/tensorflow_train_example.py"],
     tags = ["exclusive", "tf"],
     deps = [":sgd_lib"],
-    args = ["--num-replicas=1"]
+    args = ["--num-replicas=1", "--smoke-test"]
 )
 
 py_test(
@@ -77,7 +77,7 @@ py_test(
     srcs = ["tf/examples/tensorflow_train_example.py"],
     tags = ["exclusive", "tf"],
     deps = [":sgd_lib"],
-    args = ["--num-replicas=2"]
+    args = ["--num-replicas=2", "--smoke-test"]
 )
 
 py_test(
@@ -87,7 +87,7 @@ py_test(
     srcs = ["tf/examples/tensorflow_train_example.py"],
     tags = ["exclusive", "tf"],
     deps = [":sgd_lib"],
-    args = ["--tune"]
+    args = ["--tune", "--smoke-test"]
 )
 
 # --------------------------------------------------------------------
@@ -148,7 +148,7 @@ py_test(
     srcs = ["torch/examples/train_example.py"],
     tags = ["exclusive", "pytorch"],
     deps = [":sgd_lib"],
-    args = ["--num-workers=1"]
+    args = ["--num-workers=1", "--smoke-test"]
 )
 
 py_test(
@@ -158,7 +158,7 @@ py_test(
     srcs = ["torch/examples/train_example.py"],
     tags = ["exclusive", "pytorch"],
     deps = [":sgd_lib"],
-    args = ["--num-workers=2"]
+    args = ["--num-workers=2", "--smoke-test"]
 )
 
 py_test(
@@ -168,7 +168,7 @@ py_test(
     srcs = ["torch/examples/tune_example.py"],
     tags = ["exclusive", "pytorch"],
     deps = [":sgd_lib"],
-    args = ["--num-workers=1"]
+    args = ["--num-workers=1", "--smoke-test"]
 )
 
 py_test(
@@ -178,7 +178,7 @@ py_test(
     srcs = ["torch/examples/tune_example.py"],
     tags = ["exclusive", "pytorch"],
     deps = [":sgd_lib"],
-    args = ["--num-workers=2"]
+    args = ["--num-workers=2", "--smoke-test"]
 )
 
 
diff --git a/python/ray/util/sgd/tf/examples/cifar_tf_example.py b/python/ray/util/sgd/tf/examples/cifar_tf_example.py
index 407ed27a5..a348ed882 100644
--- a/python/ray/util/sgd/tf/examples/cifar_tf_example.py
+++ b/python/ray/util/sgd/tf/examples/cifar_tf_example.py
@@ -177,7 +177,10 @@ if __name__ == "__main__":
         help="Finish quickly for testing. Assume False for users.")
 
     args, _ = parser.parse_known_args()
-    ray.init(address=args.address)
+    if args.smoke_test:
+        ray.init(num_cpus=2)
+    else:
+        ray.init(address=args.address)
     data_size = 60000
     test_size = 10000
     batch_size = args.batch_size
diff --git a/python/ray/util/sgd/tf/examples/tensorflow_train_example.py b/python/ray/util/sgd/tf/examples/tensorflow_train_example.py
index 41f4505ea..b872594a0 100644
--- a/python/ray/util/sgd/tf/examples/tensorflow_train_example.py
+++ b/python/ray/util/sgd/tf/examples/tensorflow_train_example.py
@@ -114,6 +114,8 @@ def tune_example(num_replicas=1, use_gpu=False):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--smoke-test", action="store_true", help="Finish quickly for testing")
     parser.add_argument(
         "--address",
         required=False,
@@ -135,7 +137,10 @@ if __name__ == "__main__":
 
     args, _ = parser.parse_known_args()
 
-    ray.init(address=args.address)
+    if args.smoke_test:
+        ray.init(num_cpus=2)
+    else:
+        ray.init(address=args.address)
 
     if args.tune:
         tune_example(num_replicas=args.num_replicas, use_gpu=args.use_gpu)
diff --git a/python/ray/util/sgd/torch/examples/benchmarks/benchmark.py b/python/ray/util/sgd/torch/examples/benchmarks/benchmark.py
index 2cae22f7d..d40d14e2f 100644
--- a/python/ray/util/sgd/torch/examples/benchmarks/benchmark.py
+++ b/python/ray/util/sgd/torch/examples/benchmarks/benchmark.py
@@ -109,7 +109,10 @@ class Training(TrainingOperator):
 
 
 if __name__ == "__main__":
-    ray.init(address=None if args.local else "auto")
+    if args.local:
+        ray.init(num_cpus=2)
+    else:
+        ray.init(address="auto")
     num_workers = 2 if args.local else int(ray.cluster_resources().get(device))
     from ray.util.sgd.torch.examples.train_example import LinearDataset
 
diff --git a/python/ray/util/sgd/torch/examples/dcgan.py b/python/ray/util/sgd/torch/examples/dcgan.py
index 90f59458f..9bbf34f41 100644
--- a/python/ray/util/sgd/torch/examples/dcgan.py
+++ b/python/ray/util/sgd/torch/examples/dcgan.py
@@ -289,7 +289,10 @@ if __name__ == "__main__":
         default=False,
         help="Enables GPU training")
     args = parser.parse_args()
-    ray.init(address=args.address)
+    if args.smoke_test:
+        ray.init(num_cpus=2)
+    else:
+        ray.init(address=args.address)
 
     trainer = train_example(
         num_workers=args.num_workers,
diff --git a/python/ray/util/sgd/torch/examples/image_models/train.py b/python/ray/util/sgd/torch/examples/image_models/train.py
index e9fe05176..48fe4579c 100644
--- a/python/ray/util/sgd/torch/examples/image_models/train.py
+++ b/python/ray/util/sgd/torch/examples/image_models/train.py
@@ -128,8 +128,10 @@ def main():
     setup_default_logging()
 
     args, args_text = parse_args()
-
-    ray.init(address=args.ray_address)
+    if args.smoke_test:
+        ray.init(num_cpus=int(args.ray_num_workers))
+    else:
+        ray.init(address=args.ray_address)
 
     CustomTrainingOperator = TrainingOperator.from_creators(
         model_creator=model_creator,
diff --git a/python/ray/util/sgd/torch/examples/train_example.py b/python/ray/util/sgd/torch/examples/train_example.py
index 71e8446ec..d0eaf8d6b 100644
--- a/python/ray/util/sgd/torch/examples/train_example.py
+++ b/python/ray/util/sgd/torch/examples/train_example.py
@@ -115,10 +115,17 @@ if __name__ == "__main__":
         help="Enables GPU training")
     parser.add_argument(
         "--tune", action="store_true", default=False, help="Tune training")
+    parser.add_argument(
+        "--smoke-test",
+        action="store_true",
+        default=False,
+        help="Finish quickly for testing.")
 
     args, _ = parser.parse_known_args()
 
     import ray
-
-    ray.init(address=args.address)
+    if args.smoke_test:
+        ray.init(num_cpus=2)
+    else:
+        ray.init(address=args.address)
     train_example(num_workers=args.num_workers, use_gpu=args.use_gpu)
diff --git a/python/ray/util/sgd/torch/examples/tune_example.py b/python/ray/util/sgd/torch/examples/tune_example.py
index ef6c32893..da04bc455 100644
--- a/python/ray/util/sgd/torch/examples/tune_example.py
+++ b/python/ray/util/sgd/torch/examples/tune_example.py
@@ -59,6 +59,8 @@ def tune_example(operator_cls, num_workers=1, use_gpu=False):
 if __name__ == "__main__":
     import argparse
     parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--smoke-test", action="store_true", help="Finish quickly for testing")
     parser.add_argument(
         "--address",
         type=str,
@@ -77,7 +79,10 @@ if __name__ == "__main__":
 
     args, _ = parser.parse_known_args()
 
-    ray.init(address=args.address)
+    if args.smoke_test:
+        ray.init(num_cpus=2)
+    else:
+        ray.init(address=args.address)
     CustomTrainingOperator = TrainingOperator.from_creators(
         model_creator=model_creator, optimizer_creator=optimizer_creator,
         data_creator=data_creator, loss_creator=nn.MSELoss)
diff --git a/python/ray/utils.py b/python/ray/utils.py
index 3dd4379fd..c06352b1a 100644
--- a/python/ray/utils.py
+++ b/python/ray/utils.py
@@ -3,6 +3,7 @@ import errno
 import hashlib
 import inspect
 import logging
+import multiprocessing
 import numpy as np
 import os
 import signal
@@ -498,6 +499,56 @@ def get_system_memory():
     return psutil_memory_in_bytes
 
 
+def _get_docker_cpus():
+    # 1. Try using CFS Quota (https://bugs.openjdk.java.net/browse/JDK-8146115)
+    # 2. Try Nproc (CPU sets)
+    cpu_quota_file_name = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
+    cpu_share_file_name = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
+    num_cpus = 0
+    if os.path.exists(cpu_quota_file_name) and os.path.exists(
+            cpu_quota_file_name):
+        with open(cpu_quota_file_name, "r") as f:
+            num_cpus = int(f.read())
+        if num_cpus != -1:
+            with open(cpu_share_file_name, "r") as f:
+                num_cpus /= int(f.read())
+            return num_cpus
+
+    return int(subprocess.check_output("nproc"))
+
+
+def get_num_cpus():
+    cpu_count = multiprocessing.cpu_count()
+    if "RAY_USE_MULTIPROCESSING_CPU_COUNT" in os.environ:
+        logger.info(
+            "Using multiprocessing.cpu_count() to detect the number of CPUs. "
+            "This method of CPU detection is buggy when used inside docker. "
+            "To correctly detect CPUs remove the enivronment variable: "
+            "`RAY_USE_MULTIPROCESSING_CPU_COUNT`.")
+        return cpu_count
+    try:
+        # Not easy to get cpu count in docker, see:
+        # https://bugs.python.org/issue36054
+        docker_count = _get_docker_cpus()
+        if docker_count != cpu_count:
+            cpu_count = docker_count
+            if "RAY_DISABLE_DOCKER_CPU_WARNING" not in os.environ:
+                logger.warning(
+                    "Detecting limited number of CPUs due to docker. In "
+                    "previous versions of Ray, CPU detection in containers "
+                    "was buggy. Please ensure that Ray has enough CPUs "
+                    "allocated. This message will be removed in future "
+                    "version of Ray. You can set the environment variable: "
+                    "`RAY_DISABLE_DOCKER_CPU_WARNING` to remove it now.")
+
+    except Exception:
+        # `nproc` and cgroup are linux-only. If docker only works on linux
+        # (will run in a linux VM on other platforms), so this is fine.
+        pass
+
+    return cpu_count
+
+
 def get_used_memory():
     """Return the currently used system memory in bytes