[tune] fix for sync_on_checkpoint bug (#9057)

* #9056 fix for sync_on_checkpoint bug

* fix for failing checks

* update help string
This commit is contained in:
Vishnu Deva
2020-06-21 13:37:11 +05:30
committed by GitHub
parent e6ee39a6a3
commit 432ce1be50
+7 -7
View File
@@ -73,13 +73,13 @@ def make_parser(parser_creator=None, **kwargs):
help="Whether to checkpoint at the end of the experiment. "
"Default is False.")
parser.add_argument(
"--no-sync-on-checkpoint",
"--sync-on-checkpoint",
action="store_true",
help="Disable sync-down of trial checkpoint, which is enabled by "
"default to guarantee recoverability. If set, checkpoint syncing from "
"worker to driver is asynchronous. Set this only if synchronous "
"checkpointing is too slow and trial restoration failures can be "
"tolerated")
help="Enable sync-down of trial checkpoint to guarantee "
"recoverability. If unset, checkpoint syncing from worker "
"to driver is asynchronous, so unset this only if synchronous "
"checkpointing is too slow and trial restoration failures "
"can be tolerated.")
parser.add_argument(
"--keep-checkpoints-num",
default=None,
@@ -182,7 +182,7 @@ def create_trial_from_spec(spec, output_path, parser, **trial_kwargs):
remote_checkpoint_dir=spec.get("remote_checkpoint_dir"),
checkpoint_freq=args.checkpoint_freq,
checkpoint_at_end=args.checkpoint_at_end,
sync_on_checkpoint=not args.no_sync_on_checkpoint,
sync_on_checkpoint=args.sync_on_checkpoint,
keep_checkpoints_num=args.keep_checkpoints_num,
checkpoint_score_attr=args.checkpoint_score_attr,
export_formats=spec.get("export_formats", []),