diff --git a/python/ray/autoscaler/_private/constants.py b/python/ray/autoscaler/_private/constants.py
index 3fd3ec65e..2fbf6ec32 100644
--- a/python/ray/autoscaler/_private/constants.py
+++ b/python/ray/autoscaler/_private/constants.py
@@ -15,6 +15,9 @@ def env_integer(key, default):
 # Whether event logging to driver is enabled. Set to 0 to disable.
 AUTOSCALER_EVENTS = env_integer("AUTOSCALER_EVENTS", 1)
 
+# Whether to avoid launching GPU nodes for CPU only tasks.
+AUTOSCALER_CONSERVE_GPU_NODES = env_integer("AUTOSCALER_CONSERVE_GPU_NODES", 1)
+
 # How long to wait for a node to start, in seconds
 NODE_START_WAIT_S = env_integer("AUTOSCALER_NODE_START_WAIT_S", 900)
 
diff --git a/python/ray/autoscaler/_private/resource_demand_scheduler.py b/python/ray/autoscaler/_private/resource_demand_scheduler.py
index 523fd7d2f..0a08e0579 100644
--- a/python/ray/autoscaler/_private/resource_demand_scheduler.py
+++ b/python/ray/autoscaler/_private/resource_demand_scheduler.py
@@ -17,6 +17,7 @@ from typing import List, Dict
 from ray.autoscaler.node_provider import NodeProvider
 from ray.gcs_utils import PlacementGroupTableData
 from ray.core.generated.common_pb2 import PlacementStrategy
+from ray.autoscaler._private.constants import AUTOSCALER_CONSERVE_GPU_NODES
 from ray.autoscaler.tags import (
     TAG_RAY_USER_NODE_TYPE, NODE_KIND_UNMANAGED, NODE_TYPE_LEGACY_WORKER,
     NODE_KIND_WORKER, NODE_TYPE_LEGACY_HEAD, TAG_RAY_NODE_KIND, NODE_KIND_HEAD)
@@ -639,7 +640,7 @@ def get_nodes_for(node_types: Dict[NodeType, NodeTypeConfigDict],
             # resources. This will behave properly with the current utilization
             # score heuristic, but it's a little dangerous and misleading.
             logger.warning(
-                f"The autoscaler could not find a node type to satisfy the"
+                f"The autoscaler could not find a node type to satisfy the "
                 f"request: {resources}. If this request is related to "
                 f"placement groups the resource request will resolve itself, "
                 f"otherwise please specify a node type with the necessary "
@@ -664,8 +665,16 @@ def get_nodes_for(node_types: Dict[NodeType, NodeTypeConfigDict],
 
 
 def _utilization_score(node_resources: ResourceDict,
-                       resources: ResourceDict) -> float:
+                       resources: List[ResourceDict]) -> float:
     remaining = copy.deepcopy(node_resources)
+    is_gpu_node = "GPU" in node_resources
+    any_gpu_task = any("GPU" in r for r in resources)
+
+    # Avoid launching GPU nodes if there aren't any GPU tasks at all. Note that
+    # if there *is* a GPU task, then CPU tasks can be scheduled as well.
+    if AUTOSCALER_CONSERVE_GPU_NODES:
+        if is_gpu_node and not any_gpu_task:
+            return None
 
     fittable = []
     for r in resources:
diff --git a/python/ray/tests/test_resource_demand_scheduler.py b/python/ray/tests/test_resource_demand_scheduler.py
index 536cbe18b..977c2f2b8 100644
--- a/python/ray/tests/test_resource_demand_scheduler.py
+++ b/python/ray/tests/test_resource_demand_scheduler.py
@@ -105,6 +105,14 @@ def test_util_score():
         (8, 8)
 
 
+def test_gpu_node_util_score():
+    # Avoid scheduling CPU tasks on GPU node.
+    assert _utilization_score({"GPU": 1, "CPU": 1}, [{"CPU": 1}]) is None
+    assert _utilization_score({"GPU": 1, "CPU": 1}, [{"CPU": 1, "GPU": 1}]) \
+        == (1.0, 1.0)
+    assert _utilization_score({"GPU": 1, "CPU": 1}, [{"GPU": 1}]) == (0.0, 0.5)
+
+
 def test_bin_pack():
     assert get_bin_pack_residual([], [{"GPU": 2}, {"GPU": 2}])[0] == \
         [{"GPU": 2}, {"GPU": 2}]
@@ -247,6 +255,32 @@ def test_get_nodes_packing_heuristic():
         }
 
 
+def test_gpu_node_avoid_cpu_task():
+    types = {
+        "cpu": {
+            "resources": {
+                "CPU": 1
+            },
+            "max_workers": 10,
+        },
+        "gpu": {
+            "resources": {
+                "GPU": 1,
+                "CPU": 100,
+            },
+            "max_workers": 10,
+        },
+    }
+    r1 = [{"CPU": 1}] * 100
+    assert get_nodes_for(types, {}, "empty_node", 100, r1) == {"cpu": 10}
+    r2 = [{"GPU": 1}] + [{"CPU": 1}] * 100
+    assert get_nodes_for(types, {}, "empty_node", 100, r2) == \
+        {"gpu": 1}
+    r3 = [{"GPU": 1}] * 4 + [{"CPU": 1}] * 404
+    assert get_nodes_for(types, {}, "empty_node", 100, r3) == \
+        {"gpu": 4, "cpu": 4}
+
+
 def test_get_nodes_respects_max_limit():
     types = {
         "m4.large": {
@@ -2029,7 +2063,6 @@ class AutoscalingTest(unittest.TestCase):
                 "node_config": {},
                 "resources": {
                     "CPU": 2,
-                    "GPU": 1,
                     "WORKER": 1
                 },
                 "max_workers": 3
@@ -2146,7 +2179,6 @@ class AutoscalingTest(unittest.TestCase):
                 "node_config": {},
                 "resources": {
                     "CPU": 2,
-                    "GPU": 1,
                     "WORKER": 1
                 },
                 "max_workers": 3,
@@ -2260,7 +2292,6 @@ class AutoscalingTest(unittest.TestCase):
                 "node_config": {},
                 "resources": {
                     "CPU": 2,
-                    "GPU": 1,
                     "WORKER": 1
                 },
                 "max_workers": 3,