[wingman -> tune] Add system performance tracking (#4924)

This commit is contained in:
Dušan Josipović
2019-07-06 09:57:35 +02:00
committed by Richard Liaw
parent c3e9d94b18
commit e9b88dcbed
4 changed files with 97 additions and 6 deletions
+8 -1
View File
@@ -22,6 +22,8 @@ from ray.tune.result import (DEFAULT_RESULTS_DIR, TIME_THIS_ITER_S,
EPISODES_THIS_ITER, EPISODES_TOTAL,
TRAINING_ITERATION, RESULT_DUPLICATE)
from ray.tune.util import UtilMonitor
logger = logging.getLogger(__name__)
@@ -64,6 +66,7 @@ class Trainable(object):
self._experiment_id = uuid.uuid4().hex
self.config = config or {}
log_sys_usage = self.config.get("log_sys_usage", False)
if logger_creator:
self._result_logger = logger_creator(self.config)
@@ -86,6 +89,7 @@ class Trainable(object):
self._restored = False
self._setup(copy.deepcopy(self.config))
self._local_ip = ray.services.get_node_ip_address()
self._monitor = UtilMonitor(start=log_sys_usage)
@classmethod
def default_resource_request(cls, config):
@@ -205,6 +209,10 @@ class Trainable(object):
timesteps_since_restore=self._timesteps_since_restore,
iterations_since_restore=self._iterations_since_restore)
monitor_data = self._monitor.get_data()
if monitor_data:
result.update(monitor_data)
self._log_result(result)
return result
@@ -442,7 +450,6 @@ class Trainable(object):
Args:
result (dict): Training result returned by _train().
"""
self._result_logger.on_result(result)
def _stop(self):