mirror of
https://github.com/wassname/ray.git
synced 2026-07-04 15:05:49 +08:00
Actor fault tolerance using object lineage reconstruction (#902)
* Revert Python actor reconstruction * Actor reconstruction using object lineage * Add dummy arguments and return values for actor tasks * Pin dummy outputs for actor tasks * Skip checkpointing test for now * TODOs * minor edits * Generate dummy object dependencies in Python, not C * Fix linting. * Move actor counter and dummy objects inside of the actor handle * Refactor Worker._process_task, suppress exception propagation for sequential actor tasks
This commit is contained in:
committed by
Robert Nishihara
parent
d8aa826e63
commit
99c8b1f38c
@@ -79,17 +79,6 @@ if __name__ == "__main__":
|
||||
|
||||
ray.worker.connect(info, mode=ray.WORKER_MODE, actor_id=actor_id)
|
||||
|
||||
# If this is an actor started in reconstruct mode, rerun tasks to
|
||||
# reconstruct its state.
|
||||
if args.reconstruct:
|
||||
try:
|
||||
ray.actor.reconstruct_actor_state(actor_id,
|
||||
ray.worker.global_worker)
|
||||
except Exception as e:
|
||||
redis_client = create_redis_client(args.redis_address)
|
||||
push_error_to_all_drivers(redis_client, traceback.format_exc())
|
||||
raise e
|
||||
|
||||
error_explanation = """
|
||||
This error is unexpected and should not have happened. Somehow a worker
|
||||
crashed in an unanticipated way causing the main_loop to throw an exception,
|
||||
|
||||
Reference in New Issue
Block a user