mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 14:39:44 +08:00
[tune] fix for sync_on_checkpoint bug (#9057)
* #9056 fix for sync_on_checkpoint bug * fix for failing checks * update help string
This commit is contained in:
@@ -73,13 +73,13 @@ def make_parser(parser_creator=None, **kwargs):
|
||||
help="Whether to checkpoint at the end of the experiment. "
|
||||
"Default is False.")
|
||||
parser.add_argument(
|
||||
"--no-sync-on-checkpoint",
|
||||
"--sync-on-checkpoint",
|
||||
action="store_true",
|
||||
help="Disable sync-down of trial checkpoint, which is enabled by "
|
||||
"default to guarantee recoverability. If set, checkpoint syncing from "
|
||||
"worker to driver is asynchronous. Set this only if synchronous "
|
||||
"checkpointing is too slow and trial restoration failures can be "
|
||||
"tolerated")
|
||||
help="Enable sync-down of trial checkpoint to guarantee "
|
||||
"recoverability. If unset, checkpoint syncing from worker "
|
||||
"to driver is asynchronous, so unset this only if synchronous "
|
||||
"checkpointing is too slow and trial restoration failures "
|
||||
"can be tolerated.")
|
||||
parser.add_argument(
|
||||
"--keep-checkpoints-num",
|
||||
default=None,
|
||||
@@ -182,7 +182,7 @@ def create_trial_from_spec(spec, output_path, parser, **trial_kwargs):
|
||||
remote_checkpoint_dir=spec.get("remote_checkpoint_dir"),
|
||||
checkpoint_freq=args.checkpoint_freq,
|
||||
checkpoint_at_end=args.checkpoint_at_end,
|
||||
sync_on_checkpoint=not args.no_sync_on_checkpoint,
|
||||
sync_on_checkpoint=args.sync_on_checkpoint,
|
||||
keep_checkpoints_num=args.keep_checkpoints_num,
|
||||
checkpoint_score_attr=args.checkpoint_score_attr,
|
||||
export_formats=spec.get("export_formats", []),
|
||||
|
||||
Reference in New Issue
Block a user