ray/python/ray/profiling.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import time
import threading
import traceback

import ray

LOG_POINT = 0
LOG_SPAN_START = 1
LOG_SPAN_END = 2


class _NullLogSpan(object):
    """A log span context manager that does nothing"""

    def __enter__(self):
        pass

    def __exit__(self, type, value, tb):
        pass


NULL_LOG_SPAN = _NullLogSpan()


def profile(event_type, extra_data=None, worker=None):
    """Profile a span of time so that it appears in the timeline visualization.

    Note that this only works in the raylet code path.

    This function can be used as follows (both on the driver or within a task).

    .. code-block:: python

        with ray.profile("custom event", extra_data={'key': 'value'}):
            # Do some computation here.

    Optionally, a dictionary can be passed as the "extra_data" argument, and
    it can have keys "name" and "cname" if you want to override the default
    timeline display text and box color. Other values will appear at the bottom
    of the chrome tracing GUI when you click on the box corresponding to this
    profile span.

    Args:
        event_type: A string describing the type of the event.
        extra_data: This must be a dictionary mapping strings to strings. This
            data will be added to the json objects that are used to populate
            the timeline, so if you want to set a particular color, you can
            simply set the "cname" attribute to an appropriate color.
            Similarly, if you set the "name" attribute, then that will set the
            text displayed on the box in the timeline.

    Returns:
        An object that can profile a span of time via a "with" statement.
    """
    if worker is None:
        worker = ray.worker.global_worker
    if not worker.use_raylet:
        # Log the event if this is a worker and not a driver, since the
        # driver's event log never gets flushed.
        if worker.mode == ray.WORKER_MODE:
            return RayLogSpanNonRaylet(
                worker.profiler, event_type, contents=extra_data)
        else:
            return NULL_LOG_SPAN
    else:
        return RayLogSpanRaylet(
            worker.profiler, event_type, extra_data=extra_data)


class Profiler(object):
    """A class that holds the profiling states.

    Attributes:
        worker: the worker to profile.
        events: the buffer of events.
        lock: the lock to protect access of events.
    """

    def __init__(self, worker):
        self.worker = worker
        self.events = []
        self.lock = threading.Lock()

    def start_flush_thread(self):
        t = threading.Thread(target=self._periodically_flush_profile_events)
        # Making the thread a daemon causes it to exit when the main thread
        # exits.
        t.daemon = True
        t.start()

    def _periodically_flush_profile_events(self):
        """Drivers run this as a thread to flush profile data in the
        background."""
        # Note(rkn): This is run on a background thread in the driver. It uses
        # the local scheduler client. This should be ok because it doesn't read
        # from the local scheduler client and we have the GIL here. However,
        # if either of those things changes, then we could run into issues.
        try:
            while True:
                time.sleep(1)
                self.flush_profile_data()
        except AttributeError:
            # This is to suppress errors that occur at shutdown.
            pass

    def flush_profile_data(self):
        """Push the logged profiling data to the global control store.

        By default, profiling information for a given task won't appear in the
        timeline until after the task has completed. For very long-running
        tasks, we may want profiling information to appear more quickly.
        In such cases, this function can be called. Note that as an
        aalternative, we could start thread in the background on workers that
        calls this automatically.
        """
        with self.lock:
            events = self.events
            self.events = []

        if not self.worker.use_raylet:
            event_log_key = b"event_log:" + self.worker.worker_id
            event_log_value = json.dumps(events)
            self.worker.local_scheduler_client.log_event(
                event_log_key, event_log_value, time.time())
        else:
            if self.worker.mode == ray.WORKER_MODE:
                component_type = "worker"
            else:
                component_type = "driver"

            self.worker.local_scheduler_client.push_profile_events(
                component_type, ray.ObjectID(self.worker.worker_id),
                self.worker.node_ip_address, events)

    def add_event(self, event):
        with self.lock:
            self.events.append(event)


class RayLogSpanNonRaylet(object):
    """An object used to enable logging a span of events with a with statement.

    Attributes:
        event_type (str): The type of the event being logged.
        contents: Additional information to log.
    """

    def __init__(self, profiler, event_type, contents=None):
        """Initialize a RayLogSpanNonRaylet object."""
        self.profiler = profiler
        self.event_type = event_type
        self.contents = contents

    def _log(self, event_type, kind, contents=None):
        """Log an event to the global state store.

        This adds the event to a buffer of events locally. The buffer can be
        flushed and written to the global state store by calling
        flush_profile_data().

        Args:
            event_type (str): The type of the event.
            contents: More general data to store with the event.
            kind (int): Either LOG_POINT, LOG_SPAN_START, or LOG_SPAN_END. This
                is LOG_POINT if the event being logged happens at a single
                point in time. It is LOG_SPAN_START if we are starting to log a
                span of time, and it is LOG_SPAN_END if we are finishing
                logging a span of time.
        """
        # TODO(rkn): This code currently takes around half a microsecond. Since
        # we call it tens of times per task, this adds up. We will need to redo
        # the logging code, perhaps in C.
        contents = {} if contents is None else contents
        assert isinstance(contents, dict)
        # Make sure all of the keys and values in the dictionary are strings.
        contents = {str(k): str(v) for k, v in contents.items()}
        self.profiler.add_event((time.time(), event_type, kind, contents))

    def __enter__(self):
        """Log the beginning of a span event."""
        self._log(
            event_type=self.event_type,
            contents=self.contents,
            kind=LOG_SPAN_START)

    def __exit__(self, type, value, tb):
        """Log the end of a span event. Log any exception that occurred."""
        if type is None:
            self._log(event_type=self.event_type, kind=LOG_SPAN_END)
        else:
            self._log(
                event_type=self.event_type,
                contents={
                    "type": str(type),
                    "value": value,
                    "traceback": traceback.format_exc()
                },
                kind=LOG_SPAN_END)


class RayLogSpanRaylet(object):
    """An object used to enable logging a span of events with a with statement.

    Attributes:
        event_type (str): The type of the event being logged.
        contents: Additional information to log.
    """

    def __init__(self, profiler, event_type, extra_data=None):
        """Initialize a RayLogSpanRaylet object."""
        self.profiler = profiler
        self.event_type = event_type
        self.extra_data = extra_data if extra_data is not None else {}

    def set_attribute(self, key, value):
        """Add a key-value pair to the extra_data dict.

        This can be used to add attributes that are not available when
        ray.profile was called.

        Args:
            key: The attribute name.
            value: The attribute value.
        """
        if not isinstance(key, str) or not isinstance(value, str):
            raise ValueError("The extra_data argument must be a "
                             "dictionary mapping strings to strings.")
        self.extra_data[key] = value

    def __enter__(self):
        """Log the beginning of a span event.

        Returns:
            The object itself is returned so that if the block is opened using
                "with ray.profile(...) as prof:", we can call
                "prof.set_attribute" inside the block.
        """
        self.start_time = time.time()
        return self

    def __exit__(self, type, value, tb):
        """Log the end of a span event. Log any exception that occurred."""
        for key, value in self.extra_data.items():
            if not isinstance(key, str) or not isinstance(value, str):
                raise ValueError("The extra_data argument must be a "
                                 "dictionary mapping strings to strings.")

        if type is not None:
            extra_data = json.dumps({
                "type": str(type),
                "value": str(value),
                "traceback": str(traceback.format_exc()),
            })
        else:
            extra_data = json.dumps(self.extra_data)

        event = {
            "event_type": self.event_type,
            "start_time": self.start_time,
            "end_time": time.time(),
            "extra_data": extra_data,
        }

        self.profiler.add_event(event)