mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 19:00:36 +08:00
Shard Redis. (#539)
* Implement sharding in the Ray core * Single node Python modifications to do sharding * Do the sharding in redis.cc * Pipe num_redis_shards through start_ray.py and worker.py. * Use multiple redis shards in multinode tests. * first steps for sharding ray.global_state * Fix problem in multinode docker test. * fix runtest.py * fix some tests * fix redis shard startup * fix redis sharding * fix * fix bug introduced by the map-iterator being consumed * fix sharding bug * shard event table * update number of Redis clients to be 64K * Fix object table tests by flushing shards in between unit tests * Fix local scheduler tests * Documentation * Register shard locations in the primary shard * Add plasma unit tests back to build * lint * lint and fix build * Fix * Address Robert's comments * Refactor start_ray_processes to start Redis shard * lint * Fix global scheduler python tests * Fix redis module test * Fix plasma test * Fix component failure test * Fix local scheduler test * Fix runtest.py * Fix global scheduler test for python3 * Fix task_table_test_and_update bug, from actor task table submission race * Fix jenkins tests. * Retry Redis shard connections * Fix test cases * Convert database clients to DBClient struct * Fix race condition when subscribing to db client table * Remove unused lines, add APITest for sharded Ray * Fix * Fix memory leak * Suppress ReconstructionTests output * Suppress output for APITestSharded * Reissue task table add/update commands if initial command does not publish to any subscribers. * fix * Fix linting. * fix tests * fix linting * fix python test * fix linting
This commit is contained in:
committed by
Philipp Moritz
parent
0a4304725f
commit
ee08c8274b
+16
-11
@@ -15,6 +15,9 @@ parser.add_argument("--redis-address", required=False, type=str,
|
||||
help="the address to use for connecting to Redis")
|
||||
parser.add_argument("--redis-port", required=False, type=str,
|
||||
help="the port to use for starting Redis")
|
||||
parser.add_argument("--num-redis-shards", required=False, type=int,
|
||||
help=("the number of additional Redis shards to use in "
|
||||
"addition to the primary Redis shard"))
|
||||
parser.add_argument("--object-manager-port", required=False, type=int,
|
||||
help="the port to use for starting the object manager")
|
||||
parser.add_argument("--num-workers", required=False, type=int,
|
||||
@@ -75,23 +78,22 @@ if __name__ == "__main__":
|
||||
print("Using IP address {} for this node.".format(node_ip_address))
|
||||
|
||||
address_info = {}
|
||||
# Use the provided Redis port if there is one.
|
||||
if args.redis_port is not None:
|
||||
address_info["redis_address"] = "{}:{}".format(node_ip_address,
|
||||
args.redis_port)
|
||||
# Use the provided object manager port if there is one.
|
||||
if args.object_manager_port is not None:
|
||||
address_info["object_manager_ports"] = [args.object_manager_port]
|
||||
if address_info == {}:
|
||||
address_info = None
|
||||
|
||||
address_info = services.start_ray_head(address_info=address_info,
|
||||
node_ip_address=node_ip_address,
|
||||
num_workers=args.num_workers,
|
||||
cleanup=False,
|
||||
redirect_output=True,
|
||||
num_cpus=args.num_cpus,
|
||||
num_gpus=args.num_gpus)
|
||||
address_info = services.start_ray_head(
|
||||
address_info=address_info,
|
||||
node_ip_address=node_ip_address,
|
||||
redis_port=args.redis_port,
|
||||
num_workers=args.num_workers,
|
||||
cleanup=False,
|
||||
redirect_output=True,
|
||||
num_cpus=args.num_cpus,
|
||||
num_gpus=args.num_gpus,
|
||||
num_redis_shards=args.num_redis_shards)
|
||||
print(address_info)
|
||||
print("\nStarted Ray on this node. You can add additional nodes to the "
|
||||
"cluster by calling\n\n"
|
||||
@@ -113,6 +115,9 @@ if __name__ == "__main__":
|
||||
if args.redis_address is None:
|
||||
raise Exception("If --head is not passed in, --redis-address must be "
|
||||
"provided.")
|
||||
if args.num_redis_shards is not None:
|
||||
raise Exception("If --head is not passed in, --num-redis-shards must "
|
||||
"not be provided.")
|
||||
redis_ip_address, redis_port = args.redis_address.split(":")
|
||||
# Wait for the Redis server to be started. And throw an exception if we
|
||||
# can't connect to it.
|
||||
|
||||
Reference in New Issue
Block a user