mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 18:23:25 +08:00
Separate python logger module-wise (#2703)
## What do these changes do? 1. Separate the log related code to logger.py from services.py. 2. Allow users to modify logging formatter in `ray start`. ## Related issue number https://github.com/ray-project/ray/pull/2664
This commit is contained in:
committed by
Robert Nishihara
parent
26d3c0655c
commit
0b6e08ebee
+46
-30
@@ -54,9 +54,7 @@ LOCAL_SCHEDULER_CLIENT_TYPE = b"local_scheduler"
|
||||
PLASMA_MANAGER_CLIENT_TYPE = b"plasma_manager"
|
||||
|
||||
# Set up logging.
|
||||
logging.basicConfig()
|
||||
log = logging.getLogger()
|
||||
log.setLevel(logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Monitor(object):
|
||||
@@ -125,8 +123,8 @@ class Monitor(object):
|
||||
# that redis server.
|
||||
addr_port = self.redis.lrange("RedisShards", 0, -1)
|
||||
if len(addr_port) > 1:
|
||||
log.warning("TODO: if launching > 1 redis shard, flushing "
|
||||
"needs to touch shards in parallel.")
|
||||
logger.warning("TODO: if launching > 1 redis shard, flushing "
|
||||
"needs to touch shards in parallel.")
|
||||
self.issue_gcs_flushes = False
|
||||
else:
|
||||
addr_port = addr_port[0].split(b":")
|
||||
@@ -135,7 +133,7 @@ class Monitor(object):
|
||||
try:
|
||||
self.redis_shard.execute_command("HEAD.FLUSH 0")
|
||||
except redis.exceptions.ResponseError as e:
|
||||
log.info(
|
||||
logger.info(
|
||||
"Turning off flushing due to exception: {}".format(
|
||||
str(e)))
|
||||
self.issue_gcs_flushes = False
|
||||
@@ -194,8 +192,8 @@ class Monitor(object):
|
||||
dummy_object_id, "RAY.OBJECT_TABLE_REMOVE",
|
||||
dummy_object_id.id(), hex_to_binary(manager))
|
||||
if ok != b"OK":
|
||||
log.warn("Failed to remove object location for "
|
||||
"dead plasma manager.")
|
||||
logger.warn("Failed to remove object location for "
|
||||
"dead plasma manager.")
|
||||
|
||||
# If the task is scheduled on a dead local scheduler, mark the
|
||||
# task as lost.
|
||||
@@ -205,11 +203,11 @@ class Monitor(object):
|
||||
ray.experimental.state.TASK_STATUS_LOST, NIL_ID,
|
||||
task["ExecutionDependenciesString"], task["SpillbackCount"])
|
||||
if ok != b"OK":
|
||||
log.warn("Failed to update lost task for dead scheduler.")
|
||||
logger.warn("Failed to update lost task for dead scheduler.")
|
||||
num_tasks_updated += 1
|
||||
|
||||
if num_tasks_updated > 0:
|
||||
log.warn("Marked {} tasks as lost.".format(num_tasks_updated))
|
||||
logger.warn("Marked {} tasks as lost.".format(num_tasks_updated))
|
||||
|
||||
def cleanup_object_table(self):
|
||||
"""Clean up global state for failed plasma managers.
|
||||
@@ -234,11 +232,12 @@ class Monitor(object):
|
||||
object_id, "RAY.OBJECT_TABLE_REMOVE", object_id.id(),
|
||||
hex_to_binary(manager))
|
||||
if ok != b"OK":
|
||||
log.warn("Failed to remove object location for dead "
|
||||
"plasma manager.")
|
||||
logger.warn("Failed to remove object location for "
|
||||
"dead plasma manager.")
|
||||
num_objects_removed += 1
|
||||
if num_objects_removed > 0:
|
||||
log.warn("Marked {} objects as lost.".format(num_objects_removed))
|
||||
logger.warn("Marked {} objects as lost."
|
||||
.format(num_objects_removed))
|
||||
|
||||
def scan_db_client_table(self):
|
||||
"""Scan the database client table for dead clients.
|
||||
@@ -285,7 +284,8 @@ class Monitor(object):
|
||||
|
||||
# If the update was a deletion, add them to our accounting for dead
|
||||
# local schedulers and plasma managers.
|
||||
log.warn("Removed {}, client ID {}".format(client_type, db_client_id))
|
||||
logger.warn("Removed {}, client ID {}".format(client_type,
|
||||
db_client_id))
|
||||
if client_type == LOCAL_SCHEDULER_CLIENT_TYPE:
|
||||
if db_client_id not in self.dead_local_schedulers:
|
||||
self.dead_local_schedulers.add(db_client_id)
|
||||
@@ -429,11 +429,11 @@ class Monitor(object):
|
||||
return
|
||||
# Remove with best effort.
|
||||
num_deleted = redis.delete(*keys)
|
||||
log.info(
|
||||
logger.info(
|
||||
"Removed {} dead redis entries of the driver from redis shard {}.".
|
||||
format(num_deleted, shard_index))
|
||||
if num_deleted != len(keys):
|
||||
log.warning(
|
||||
logger.warning(
|
||||
"Failed to remove {} relevant redis entries"
|
||||
" from redis shard {}.".format(len(keys) - num_deleted))
|
||||
|
||||
@@ -494,7 +494,7 @@ class Monitor(object):
|
||||
message = ray.gcs_utils.DriverTableMessage.GetRootAsDriverTableMessage(
|
||||
data, 0)
|
||||
driver_id = message.DriverId()
|
||||
log.info("Driver {} has been removed.".format(
|
||||
logger.info("Driver {} has been removed.".format(
|
||||
binary_to_hex(driver_id)))
|
||||
|
||||
self._clean_up_entries_for_driver(driver_id)
|
||||
@@ -556,12 +556,12 @@ class Monitor(object):
|
||||
continue
|
||||
redis = self.state.redis_clients[shard_index]
|
||||
num_deleted = redis.delete(*keys)
|
||||
log.info("Removed {} dead redis entries of the driver"
|
||||
" from redis shard {}.".format(num_deleted, shard_index))
|
||||
logger.info("Removed {} dead redis entries of the driver from"
|
||||
" redis shard {}.".format(num_deleted, shard_index))
|
||||
if num_deleted != len(keys):
|
||||
log.warning("Failed to remove {} relevant redis entries"
|
||||
" from redis shard {}.".format(
|
||||
len(keys) - num_deleted, shard_index))
|
||||
logger.warning("Failed to remove {} relevant redis entries"
|
||||
" from redis shard {}.".format(
|
||||
len(keys) - num_deleted, shard_index))
|
||||
|
||||
def xray_driver_removed_handler(self, unused_channel, data):
|
||||
"""Handle a notification that a driver has been removed.
|
||||
@@ -576,7 +576,7 @@ class Monitor(object):
|
||||
message = ray.gcs_utils.DriverTableData.GetRootAsDriverTableData(
|
||||
driver_data, 0)
|
||||
driver_id = message.DriverId()
|
||||
log.info("XRay Driver {} has been removed.".format(
|
||||
logger.info("XRay Driver {} has been removed.".format(
|
||||
binary_to_hex(driver_id)))
|
||||
self._xray_clean_up_entries_for_driver(driver_id)
|
||||
|
||||
@@ -616,7 +616,7 @@ class Monitor(object):
|
||||
message_handler = self.db_client_notification_handler
|
||||
elif channel == DRIVER_DEATH_CHANNEL:
|
||||
# The message was a notification that a driver was removed.
|
||||
log.info("message-handler: driver_removed_handler")
|
||||
logger.info("message-handler: driver_removed_handler")
|
||||
message_handler = self.driver_removed_handler
|
||||
elif channel == XRAY_HEARTBEAT_CHANNEL:
|
||||
# Similar functionality as local scheduler info channel
|
||||
@@ -669,7 +669,7 @@ class Monitor(object):
|
||||
max_entries_to_flush = self.gcs_flush_policy.num_entries_to_flush()
|
||||
num_flushed = self.redis_shard.execute_command(
|
||||
"HEAD.FLUSH {}".format(max_entries_to_flush))
|
||||
log.info("num_flushed {}".format(num_flushed))
|
||||
logger.info("num_flushed {}".format(num_flushed))
|
||||
|
||||
# This flushes event log and log files.
|
||||
ray.experimental.flush_redis_unsafe(self.redis)
|
||||
@@ -706,10 +706,10 @@ class Monitor(object):
|
||||
num_plasma_managers = len(self.live_plasma_managers) + len(
|
||||
self.dead_plasma_managers)
|
||||
|
||||
log.debug("{} dead local schedulers, {} plasma managers total, {} "
|
||||
"dead plasma managers".format(
|
||||
len(self.dead_local_schedulers), num_plasma_managers,
|
||||
len(self.dead_plasma_managers)))
|
||||
logger.debug("{} dead local schedulers, {} plasma managers total, {} "
|
||||
"dead plasma managers".format(
|
||||
len(self.dead_local_schedulers), num_plasma_managers,
|
||||
len(self.dead_plasma_managers)))
|
||||
|
||||
# Handle messages from the subscription channels.
|
||||
while True:
|
||||
@@ -743,7 +743,8 @@ class Monitor(object):
|
||||
for plasma_manager_id in plasma_manager_ids:
|
||||
if ((self.live_plasma_managers[plasma_manager_id]) >=
|
||||
ray._config.num_heartbeats_timeout()):
|
||||
log.warn("Timed out {}".format(PLASMA_MANAGER_CLIENT_TYPE))
|
||||
logger.warn("Timed out {}"
|
||||
.format(PLASMA_MANAGER_CLIENT_TYPE))
|
||||
# Remove the plasma manager from the managers whose
|
||||
# heartbeats we're tracking.
|
||||
del self.live_plasma_managers[plasma_manager_id]
|
||||
@@ -782,7 +783,22 @@ if __name__ == "__main__":
|
||||
required=False,
|
||||
type=str,
|
||||
help="the path to the autoscaling config file")
|
||||
parser.add_argument(
|
||||
"--logging-level",
|
||||
required=False,
|
||||
type=str,
|
||||
default=ray_constants.LOGGER_LEVEL,
|
||||
choices=ray_constants.LOGGER_LEVEL_CHOICES,
|
||||
help=ray_constants.LOGGER_LEVEL_HELP)
|
||||
parser.add_argument(
|
||||
"--logging-format",
|
||||
required=False,
|
||||
type=str,
|
||||
default=ray_constants.LOGGER_FORMAT,
|
||||
help=ray_constants.LOGGER_FORMAT_HELP)
|
||||
args = parser.parse_args()
|
||||
level = logging.getLevelName(args.logging_level.upper())
|
||||
logging.basicConfig(level=level, format=args.logging_format)
|
||||
|
||||
redis_ip_address = get_ip_address(args.redis_address)
|
||||
redis_port = get_port(args.redis_address)
|
||||
|
||||
Reference in New Issue
Block a user