Pipe num_cpus and num_gpus through from start_ray.py. (#275)

* Pipe num_cpus and num_gpus through from start_ray.py.

* Improve load balancing tests.

* Fix bug.

* Factor out some testing code.
This commit is contained in:
Robert Nishihara
2017-02-13 17:43:23 -08:00
committed by Philipp Moritz
parent 3934d5f6eb
commit 072eadd57f
6 changed files with 63 additions and 48 deletions
+1 -1
View File
@@ -90,7 +90,7 @@ class TestGlobalScheduler(unittest.TestCase):
plasma_manager_name=plasma_manager_name,
plasma_address=plasma_address,
redis_address=redis_address,
static_resource_list=[None, 0])
static_resource_list=[10, 0])
# Connect to the scheduler.
photon_client = photon.PhotonClient(local_scheduler_name)
self.photon_clients.append(photon_client)
+3 -18
View File
@@ -2,7 +2,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import multiprocessing
import os
import random
import subprocess
@@ -88,23 +87,9 @@ def start_local_scheduler(plasma_store_name,
command += ["-r", redis_address]
if plasma_address is not None:
command += ["-a", plasma_address]
# We want to be able to support independently setting capacity for each of the
# supported resource types. Thus, the list can be None or contain any number
# of None values.
if static_resource_list is None:
static_resource_list = [None, None]
if static_resource_list[0] is None:
# By default, use the number of hardware execution threads for the number of
# cores.
static_resource_list[0] = multiprocessing.cpu_count()
if static_resource_list[1] is None:
# By default, do not configure any GPUs on this node.
static_resource_list[1] = 0
# Pass the resource capacity string to the photon scheduler in all cases.
# Sanity check to make sure all resource capacities in the list are numeric
# (int or float).
assert(all([x == int or x == float for x in map(type, static_resource_list)]))
command += ["-c", ",".join(map(str, static_resource_list))]
if static_resource_list is not None:
assert all([isinstance(resource, int) or isinstance(resource, float) for resource in static_resource_list])
command += ["-c", ",".join([str(resource) for resource in static_resource_list])]
with open(os.devnull, "w") as FNULL:
stdout = FNULL if redirect_output else None
+17 -7
View File
@@ -2,8 +2,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import psutil
from collections import namedtuple, OrderedDict
import multiprocessing
import os
import psutil
import random
import redis
import signal
@@ -12,7 +14,6 @@ import string
import subprocess
import sys
import time
from collections import namedtuple, OrderedDict
import threading
# Ray modules
@@ -360,7 +361,8 @@ def start_local_scheduler(redis_address,
plasma_address=None,
cleanup=True,
redirect_output=False,
static_resource_list=None,
num_cpus=None,
num_gpus=None,
num_workers=0):
"""Start a local scheduler process.
@@ -378,14 +380,21 @@ def start_local_scheduler(redis_address,
that imported services exits.
redirect_output (bool): True if stdout and stderr should be redirected to
/dev/null.
static_resource_list (list): An ordered list of the configured resource
capacities for this local scheduler.
num_cpus: The number of CPUs the local scheduler should be configured with.
num_gpus: The number of GPUs the local scheduler should be configured with.
num_workers (int): The number of workers that the local scheduler should
start.
Return:
The name of the local scheduler socket.
"""
if num_cpus is None:
# By default, use the number of hardware execution threads for the number of
# cores.
num_cpus = multiprocessing.cpu_count()
if num_gpus is None:
# By default, assume this node has no GPUs.
num_gpus = 0
local_scheduler_name, p = photon.start_local_scheduler(plasma_store_name,
plasma_manager_name,
worker_path=worker_path,
@@ -394,7 +403,7 @@ def start_local_scheduler(redis_address,
plasma_address=plasma_address,
use_profiler=RUN_PHOTON_PROFILER,
redirect_output=redirect_output,
static_resource_list=static_resource_list,
static_resource_list=[num_cpus, num_gpus],
num_workers=num_workers)
if cleanup:
all_processes[PROCESS_TYPE_LOCAL_SCHEDULER].append(p)
@@ -637,7 +646,8 @@ def start_ray_processes(address_info=None,
plasma_address=plasma_address,
cleanup=cleanup,
redirect_output=redirect_output,
static_resource_list=[num_cpus[i], num_gpus[i]],
num_cpus=num_cpus[i],
num_gpus=num_gpus[i],
num_workers=num_local_scheduler_workers)
local_scheduler_socket_names.append(local_scheduler_name)
time.sleep(0.1)