diff --git a/build.sh b/build.sh index e3e81c72a..b84763733 100755 --- a/build.sh +++ b/build.sh @@ -25,7 +25,7 @@ function usage() # Determine how many parallel jobs to use for make based on the number of cores unamestr="$(uname)" if [[ "$unamestr" == "Linux" ]]; then - PARALLEL=$(nproc) + PARALLEL=$(nproc --all) elif [[ "$unamestr" == "Darwin" ]]; then PARALLEL=$(sysctl -n hw.ncpu) else diff --git a/python/ray/rllib/train.py b/python/ray/rllib/train.py index b9e7b72ef..a1d8e13a1 100755 --- a/python/ray/rllib/train.py +++ b/python/ray/rllib/train.py @@ -8,6 +8,7 @@ import argparse import yaml import ray +from ray.test.cluster_utils import Cluster from ray.tune.config_parser import make_parser, resources_to_json from ray.tune.tune import _make_scheduler, run_experiments @@ -50,6 +51,17 @@ def create_parser(parser_creator=None): type=int, help="--num-gpus to pass to Ray." " This only has an affect in local mode.") + parser.add_argument( + "--ray-num-local-schedulers", + default=None, + type=int, + help="Emulate multiple cluster nodes for debugging.") + parser.add_argument( + "--ray-object-store-memory", + default=None, + type=int, + help="--object-store-memory to pass to Ray." + " This only has an affect in local mode.") parser.add_argument( "--experiment-name", default="default", @@ -102,10 +114,22 @@ def run(args, parser): if not exp.get("env") and not exp.get("config", {}).get("env"): parser.error("the following arguments are required: --env") - ray.init( - redis_address=args.redis_address, - num_cpus=args.ray_num_cpus, - num_gpus=args.ray_num_gpus) + if args.ray_num_local_schedulers: + cluster = Cluster() + for _ in range(args.ray_num_local_schedulers): + cluster.add_node( + resources={ + "num_cpus": args.ray_num_cpus or 1, + "num_gpus": args.ray_num_gpus or 0, + }, + object_store_memory=args.ray_object_store_memory) + ray.init(redis_address=cluster.redis_address) + else: + ray.init( + redis_address=args.redis_address, + object_store_memory=args.ray_object_store_memory, + num_cpus=args.ray_num_cpus, + num_gpus=args.ray_num_gpus) run_experiments( experiments, scheduler=_make_scheduler(args), diff --git a/src/ray/object_manager/object_manager.cc b/src/ray/object_manager/object_manager.cc index d19e81e0b..033a40b01 100644 --- a/src/ray/object_manager/object_manager.cc +++ b/src/ray/object_manager/object_manager.cc @@ -820,7 +820,7 @@ ray::Status ObjectManager::ExecuteReceiveObject( // TODO(hme): This chunk failed, so create a pull request for this chunk. } } else { - RAY_LOG(ERROR) << "Create Chunk Failed index = " << chunk_index << ": " + RAY_LOG(DEBUG) << "Create Chunk Failed index = " << chunk_index << ": " << chunk_status.second.message(); // Read object into empty buffer. uint64_t buffer_length = buffer_pool_.GetBufferLength(chunk_index, data_size);