Allow remote functions to specify max executions and kill worker once limit is reached. (#660)

* implement restarting workers after certain number of task executions

* Clean up python code.

* Don't start new worker when an actor disconnects.

* Move wait_for_pid_to_exit to test_utils.py.

* Add test.

* Fix linting errors.

* Fix linting.

* Fix typo.
This commit is contained in:
Philipp Moritz
2017-06-13 07:34:58 +00:00
committed by Robert Nishihara
parent 4374ad1453
commit 54925996ca
9 changed files with 214 additions and 116 deletions
+13 -2
View File
@@ -13,7 +13,8 @@ import traceback
import ray.local_scheduler
import ray.signature as signature
import ray.worker
from ray.utils import random_string, binary_to_hex, hex_to_binary
from ray.utils import (FunctionProperties, binary_to_hex, hex_to_binary,
random_string)
def random_actor_id():
@@ -70,6 +71,12 @@ def fetch_and_register_actor(actor_class_key, worker):
function_id = get_actor_method_function_id(actor_method_name).id()
worker.functions[driver_id][function_id] = (actor_method_name,
temporary_actor_method)
worker.function_properties[driver_id][function_id] = FunctionProperties(
num_return_vals=1,
num_cpus=1,
num_gpus=0,
max_calls=0)
worker.num_task_executions[driver_id][function_id] = 0
try:
unpickled_class = pickle.loads(pickled_class)
@@ -236,7 +243,11 @@ def export_actor(actor_id, class_id, actor_method_names, num_cpus, num_gpus,
# TODO(rkn): When we create a second actor, we are probably overwriting
# the values from the first actor here. This may or may not be a problem.
function_id = get_actor_method_function_id(actor_method_name).id()
worker.function_properties[driver_id][function_id] = (1, num_cpus, 0)
worker.function_properties[driver_id][function_id] = FunctionProperties(
num_return_vals=1,
num_cpus=1,
num_gpus=0,
max_calls=0)
# Get a list of the local schedulers from the client table.
client_table = ray.global_state.client_table()