From 2b2eb4debb381f9261a615125399c66c9413bbfb Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Sun, 15 Sep 2019 15:58:58 -0700 Subject: [PATCH] [tune] Checkpoint and Sync at end (#5699) --- python/ray/tune/logger.py | 2 +- python/ray/tune/tests/test_trial_runner.py | 6 +++--- python/ray/tune/trial_runner.py | 5 ++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py index 059769995..023218e45 100644 --- a/python/ray/tune/logger.py +++ b/python/ray/tune/logger.py @@ -224,7 +224,7 @@ class TFLogger(Logger): """ def _init(self): - logger.info("Initializing TFLogger instead of TF2Logger.") + logger.debug("Initializing TFLogger instead of TF2Logger.") self._file_writer = tf.compat.v1.summary.FileWriter(self.logdir) def on_result(self, result): diff --git a/python/ray/tune/tests/test_trial_runner.py b/python/ray/tune/tests/test_trial_runner.py index d6164bd13..995ba42c3 100644 --- a/python/ray/tune/tests/test_trial_runner.py +++ b/python/ray/tune/tests/test_trial_runner.py @@ -1153,6 +1153,8 @@ class TestSyncFunctionality(unittest.TestCase): os.remove(test_file_path) def testNoSync(self): + """Sync should not run on a single node.""" + def sync_func(source, target): pass @@ -1165,9 +1167,7 @@ class TestSyncFunctionality(unittest.TestCase): "stop": { "training_iteration": 1 }, - "upload_dir": "test", - "sync_to_driver": sync_func, - "sync_to_cloud": sync_func + "sync_to_driver": sync_func }).trials self.assertEqual(mock_sync.call_count, 0) diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index 0b03957a0..21bd11191 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -271,7 +271,10 @@ class TrialRunner(object): json.dump(runner_state, f, indent=2, cls=_TuneFunctionEncoder) os.rename(tmp_file_name, self.checkpoint_file) - self._syncer.sync_up_if_needed() + if force: + self._syncer.sync_up() + else: + self._syncer.sync_up_if_needed() return self._local_checkpoint_dir def resume(self):