mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 21:46:57 +08:00
Experimental Actor Pool (#6055)
* mod_table * Example fix for gallery * lint * nit * nit * fix * gallery * remove table for now * training, object store, tune, actors, advanced * start tf code * first cut tf * yapf * pytorch * add torch example * torch * parallel * tune * tuning * reviewsready * finetune * fix * move_code * update conf * compile * init hyperparameter * Start images * overview * extra * fix * works * update-ps-example * param_actor * fix * examples * simple * simplify_pong * flake8 and run hyperopt * add comments * add comments * add suggestion * add suggestion * suggestions * add suggestion * add suggestions * fixed in wrong area * last edit * finish changes * add line * format * reset * tests and docs * fix tests * bazelify Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
@@ -6,6 +6,7 @@ from .gcs_flush_policy import (set_flushing_policy, GcsFlushPolicy,
|
||||
SimpleGcsFlushPolicy)
|
||||
from .named_actors import get_actor, register_actor
|
||||
from .api import get, wait
|
||||
from .actor_pool import ActorPool
|
||||
from .dynamic_resources import set_resource
|
||||
|
||||
|
||||
@@ -18,5 +19,5 @@ def TensorFlowVariables(*args, **kwargs):
|
||||
__all__ = [
|
||||
"TensorFlowVariables", "get_actor", "register_actor", "get", "wait",
|
||||
"set_flushing_policy", "GcsFlushPolicy", "SimpleGcsFlushPolicy",
|
||||
"set_resource"
|
||||
"set_resource", "ActorPool"
|
||||
]
|
||||
|
||||
@@ -0,0 +1,216 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import ray
|
||||
|
||||
|
||||
class ActorPool(object):
|
||||
"""Utility class to operate on a fixed pool of actors.
|
||||
|
||||
Arguments:
|
||||
actors (list): List of Ray actor handles to use in this pool.
|
||||
|
||||
Examples:
|
||||
>>> a1, a2 = Actor.remote(), Actor.remote()
|
||||
>>> pool = ActorPool([a1, a2])
|
||||
>>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4]))
|
||||
[2, 4, 6, 8]
|
||||
"""
|
||||
|
||||
def __init__(self, actors):
|
||||
# actors to be used
|
||||
self._idle_actors = list(actors)
|
||||
|
||||
# get actor from future
|
||||
self._future_to_actor = {}
|
||||
|
||||
# get future from index
|
||||
self._index_to_future = {}
|
||||
|
||||
# next task to do
|
||||
self._next_task_index = 0
|
||||
|
||||
# next task to return
|
||||
self._next_return_index = 0
|
||||
|
||||
# next work depending when actors free
|
||||
self._pending_submits = []
|
||||
|
||||
def map(self, fn, values):
|
||||
"""Apply the given function in parallel over the actors and values.
|
||||
|
||||
This returns an ordered iterator that will return results of the map
|
||||
as they finish. Note that you must iterate over the iterator to force
|
||||
the computation to finish.
|
||||
|
||||
Arguments:
|
||||
fn (func): Function that takes (actor, value) as argument and
|
||||
returns an ObjectID computing the result over the value. The
|
||||
actor will be considered busy until the ObjectID completes.
|
||||
values (list): List of values that fn(actor, value) should be
|
||||
applied to.
|
||||
|
||||
Returns:
|
||||
Iterator over results from applying fn to the actors and values.
|
||||
|
||||
Examples:
|
||||
>>> pool = ActorPool(...)
|
||||
>>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4]))
|
||||
[2, 4, 6, 8]
|
||||
"""
|
||||
for v in values:
|
||||
self.submit(fn, v)
|
||||
while self.has_next():
|
||||
yield self.get_next()
|
||||
|
||||
def map_unordered(self, fn, values):
|
||||
"""Similar to map(), but returning an unordered iterator.
|
||||
|
||||
This returns an unordered iterator that will return results of the map
|
||||
as they finish. This can be more efficient that map() if some results
|
||||
take longer to compute than others.
|
||||
|
||||
Arguments:
|
||||
fn (func): Function that takes (actor, value) as argument and
|
||||
returns an ObjectID computing the result over the value. The
|
||||
actor will be considered busy until the ObjectID completes.
|
||||
values (list): List of values that fn(actor, value) should be
|
||||
applied to.
|
||||
|
||||
Returns:
|
||||
Iterator over results from applying fn to the actors and values.
|
||||
|
||||
Examples:
|
||||
>>> pool = ActorPool(...)
|
||||
>>> print(pool.map(lambda a, v: a.double.remote(v), [1, 2, 3, 4]))
|
||||
[6, 2, 4, 8]
|
||||
"""
|
||||
for v in values:
|
||||
self.submit(fn, v)
|
||||
while self.has_next():
|
||||
yield self.get_next_unordered()
|
||||
|
||||
def submit(self, fn, value):
|
||||
"""Schedule a single task to run in the pool.
|
||||
|
||||
This has the same argument semantics as map(), but takes on a single
|
||||
value instead of a list of values. The result can be retrieved using
|
||||
get_next() / get_next_unordered().
|
||||
|
||||
Arguments:
|
||||
fn (func): Function that takes (actor, value) as argument and
|
||||
returns an ObjectID computing the result over the value. The
|
||||
actor will be considered busy until the ObjectID completes.
|
||||
value (object): Value to compute a result for.
|
||||
|
||||
Examples:
|
||||
>>> pool = ActorPool(...)
|
||||
>>> pool.submit(lambda a, v: a.double.remote(v), 1)
|
||||
>>> pool.submit(lambda a, v: a.double.remote(v), 2)
|
||||
>>> print(pool.get_next(), pool.get_next())
|
||||
2, 4
|
||||
"""
|
||||
if self._idle_actors:
|
||||
actor = self._idle_actors.pop()
|
||||
future = fn(actor, value)
|
||||
self._future_to_actor[future] = (self._next_task_index, actor)
|
||||
self._index_to_future[self._next_task_index] = future
|
||||
self._next_task_index += 1
|
||||
else:
|
||||
self._pending_submits.append((fn, value))
|
||||
|
||||
def has_next(self):
|
||||
"""Returns whether there are any pending results to return.
|
||||
|
||||
Returns:
|
||||
True if there are any pending results not yet returned.
|
||||
|
||||
Examples:
|
||||
>>> pool = ActorPool(...)
|
||||
>>> pool.submit(lambda a, v: a.double.remote(v), 1)
|
||||
>>> print(pool.has_next())
|
||||
True
|
||||
>>> print(pool.get_next())
|
||||
2
|
||||
>>> print(pool.has_next())
|
||||
False
|
||||
"""
|
||||
return bool(self._future_to_actor)
|
||||
|
||||
def get_next(self, timeout=None):
|
||||
"""Returns the next pending result in order.
|
||||
|
||||
This returns the next result produced by submit(), blocking for up to
|
||||
the specified timeout until it is available.
|
||||
|
||||
Returns:
|
||||
The next result.
|
||||
|
||||
Raises:
|
||||
TimeoutError if the timeout is reached.
|
||||
|
||||
Examples:
|
||||
>>> pool = ActorPool(...)
|
||||
>>> pool.submit(lambda a, v: a.double.remote(v), 1)
|
||||
>>> print(pool.get_next())
|
||||
2
|
||||
"""
|
||||
if not self.has_next():
|
||||
raise StopIteration("No more results to get")
|
||||
if self._next_return_index >= self._next_task_index:
|
||||
raise ValueError("It is not allowed to call get_next() after "
|
||||
"get_next_unordered().")
|
||||
future = self._index_to_future[self._next_return_index]
|
||||
if timeout is not None:
|
||||
res, _ = ray.wait([future], timeout=timeout)
|
||||
if not res:
|
||||
raise TimeoutError("Timed out waiting for result")
|
||||
del self._index_to_future[self._next_return_index]
|
||||
self._next_return_index += 1
|
||||
i, a = self._future_to_actor.pop(future)
|
||||
self._return_actor(a)
|
||||
return ray.get(future)
|
||||
|
||||
def get_next_unordered(self, timeout=None):
|
||||
"""Returns any of the next pending results.
|
||||
|
||||
This returns some result produced by submit(), blocking for up to
|
||||
the specified timeout until it is available. Unlike get_next(), the
|
||||
results are not always returned in same order as submitted, which can
|
||||
improve performance.
|
||||
|
||||
Returns:
|
||||
The next result.
|
||||
|
||||
Raises:
|
||||
TimeoutError if the timeout is reached.
|
||||
|
||||
Examples:
|
||||
>>> pool = ActorPool(...)
|
||||
>>> pool.submit(lambda a, v: a.double.remote(v), 1)
|
||||
>>> pool.submit(lambda a, v: a.double.remote(v), 2)
|
||||
>>> print(pool.get_next_unordered())
|
||||
4
|
||||
>>> print(pool.get_next_unordered())
|
||||
2
|
||||
"""
|
||||
if not self.has_next():
|
||||
raise StopIteration("No more results to get")
|
||||
# TODO(ekl) bulk wait for performance
|
||||
res, _ = ray.wait(
|
||||
list(self._future_to_actor), num_returns=1, timeout=timeout)
|
||||
if res:
|
||||
[future] = res
|
||||
else:
|
||||
raise TimeoutError("Timed out waiting for result")
|
||||
i, a = self._future_to_actor.pop(future)
|
||||
self._return_actor(a)
|
||||
del self._index_to_future[i]
|
||||
self._next_return_index = max(self._next_return_index, i + 1)
|
||||
return ray.get(future)
|
||||
|
||||
def _return_actor(self, actor):
|
||||
self._idle_actors.append(actor)
|
||||
if self._pending_submits:
|
||||
self.submit(*self._pending_submits.pop(0))
|
||||
@@ -32,18 +32,20 @@ class SimpleGcsFlushPolicy(GcsFlushPolicy):
|
||||
"""A simple policy with constant flush rate, after a warmup period.
|
||||
|
||||
Example policy values:
|
||||
|
||||
flush_when_at_least_bytes 2GB
|
||||
|
||||
flush_period_secs 10s
|
||||
|
||||
flush_num_entries_each_time 10k
|
||||
|
||||
This means
|
||||
(1) If the GCS shard uses less than 2GB of memory, no flushing would take
|
||||
place. This should cover most Ray runs.
|
||||
(2) The GCS shard will only honor a flush request, if it's issued after 10
|
||||
seconds since the last processed flush. In particular this means it's
|
||||
okay for the Monitor to issue requests more frequently than this param.
|
||||
This means: (1) If the GCS shard uses less than 2GB of memory,
|
||||
no flushing would take place. This should cover most Ray runs. (2) The
|
||||
GCS shard will only honor a flush request, if it's issued after 10
|
||||
seconds since the last processed flush. In particular this means it's
|
||||
okay for the Monitor to issue requests more frequently than this param.
|
||||
(3) When processing a flush, the shard will flush at most 10k entries.
|
||||
This is to control the latency of each request.
|
||||
This is to control the latency of each request.
|
||||
|
||||
Note, flush rate == (flush period) * (num entries each time). So
|
||||
applications that have a heavier GCS load can tune these params.
|
||||
|
||||
@@ -6,6 +6,14 @@ py_test(
|
||||
deps = ["//:ray_lib"],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_actor_pool",
|
||||
size = "small",
|
||||
srcs = ["test_actor_pool.py"],
|
||||
tags = ["exclusive"],
|
||||
deps = ["//:ray_lib"],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_actor_resources",
|
||||
size = "medium",
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import time
|
||||
import pytest
|
||||
|
||||
import ray
|
||||
from ray.experimental import ActorPool
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def init():
|
||||
ray.init(num_cpus=4)
|
||||
yield
|
||||
ray.shutdown()
|
||||
|
||||
|
||||
def test_get_next(init):
|
||||
@ray.remote
|
||||
class MyActor(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def f(self, x):
|
||||
return x + 1
|
||||
|
||||
def double(self, x):
|
||||
return 2 * x
|
||||
|
||||
actors = [MyActor.remote() for _ in range(4)]
|
||||
pool = ActorPool(actors)
|
||||
for i in range(5):
|
||||
pool.submit(lambda a, v: a.f.remote(v), i)
|
||||
assert pool.get_next() == i + 1
|
||||
|
||||
|
||||
def test_get_next_unordered(init):
|
||||
@ray.remote
|
||||
class MyActor(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def f(self, x):
|
||||
return x + 1
|
||||
|
||||
def double(self, x):
|
||||
return 2 * x
|
||||
|
||||
actors = [MyActor.remote() for _ in range(4)]
|
||||
pool = ActorPool(actors)
|
||||
|
||||
total = []
|
||||
|
||||
for i in range(5):
|
||||
pool.submit(lambda a, v: a.f.remote(v), i)
|
||||
while pool.has_next():
|
||||
total += [pool.get_next_unordered()]
|
||||
|
||||
assert all(elem in [1, 2, 3, 4, 5] for elem in total)
|
||||
|
||||
|
||||
def test_map(init):
|
||||
@ray.remote
|
||||
class MyActor(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def f(self, x):
|
||||
return x + 1
|
||||
|
||||
def double(self, x):
|
||||
return 2 * x
|
||||
|
||||
actors = [MyActor.remote() for _ in range(4)]
|
||||
pool = ActorPool(actors)
|
||||
|
||||
index = 0
|
||||
for v in pool.map(lambda a, v: a.double.remote(v), range(5)):
|
||||
assert v == 2 * index
|
||||
index += 1
|
||||
|
||||
|
||||
def test_map_unordered(init):
|
||||
@ray.remote
|
||||
class MyActor(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def f(self, x):
|
||||
return x + 1
|
||||
|
||||
def double(self, x):
|
||||
return 2 * x
|
||||
|
||||
actors = [MyActor.remote() for _ in range(4)]
|
||||
pool = ActorPool(actors)
|
||||
|
||||
total = []
|
||||
for v in pool.map(lambda a, v: a.double.remote(v), range(5)):
|
||||
total += [v]
|
||||
|
||||
assert all(elem in [0, 2, 4, 6, 8] for elem in total)
|
||||
|
||||
|
||||
def test_get_next_timeout(init):
|
||||
@ray.remote
|
||||
class MyActor(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def f(self, x):
|
||||
while (True):
|
||||
x = x + 1
|
||||
time.sleep(1)
|
||||
return None
|
||||
|
||||
def double(self, x):
|
||||
return 2 * x
|
||||
|
||||
actors = [MyActor.remote() for _ in range(4)]
|
||||
pool = ActorPool(actors)
|
||||
pool.submit(lambda a, v: a.f.remote(v), 0)
|
||||
with pytest.raises(TimeoutError):
|
||||
pool.get_next_unordered(5)
|
||||
|
||||
|
||||
def test_get_next_unordered_timeout(init):
|
||||
@ray.remote
|
||||
class MyActor(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def f(self, x):
|
||||
while (True):
|
||||
x + 1
|
||||
time.sleep(1)
|
||||
return
|
||||
|
||||
def double(self, x):
|
||||
return 2 * x
|
||||
|
||||
actors = [MyActor.remote() for _ in range(4)]
|
||||
pool = ActorPool(actors)
|
||||
|
||||
pool.submit(lambda a, v: a.f.remote(v), 0)
|
||||
with pytest.raises(TimeoutError):
|
||||
pool.get_next_unordered(5)
|
||||
Reference in New Issue
Block a user