[Streaming] Implement streaming job-worker. (#8780)

This commit is contained in:
Tianyi Chen
2020-06-10 14:13:55 +08:00
committed by GitHub
parent 04cffb7e65
commit ec5ecb661f
56 changed files with 1078 additions and 1213 deletions
+75 -59
View File
@@ -2,7 +2,6 @@ import enum
import ray
import ray.streaming.generated.remote_call_pb2 as remote_call_pb
import ray.streaming.generated.streaming_pb2 as streaming_pb
import ray.streaming.operator as operator
import ray.streaming.partition as partition
from ray.streaming.generated.streaming_pb2 import Language
@@ -24,32 +23,10 @@ class NodeType(enum.Enum):
SINK = 2
class ExecutionNode:
def __init__(self, node_pb):
self.node_id = node_pb.node_id
self.node_type = NodeType[streaming_pb.NodeType.Name(
node_pb.node_type)]
self.parallelism = node_pb.parallelism
if node_pb.language == Language.PYTHON:
operator_bytes = node_pb.operator # python operator descriptor
self.stream_operator = operator.load_operator(operator_bytes)
self.execution_tasks = [
ExecutionTask(task) for task in node_pb.execution_tasks
]
self.input_edges = [
ExecutionEdge(edge, node_pb.language)
for edge in node_pb.input_edges
]
self.output_edges = [
ExecutionEdge(edge, node_pb.language)
for edge in node_pb.output_edges
]
class ExecutionEdge:
def __init__(self, edge_pb, language):
self.src_node_id = edge_pb.src_node_id
self.target_node_id = edge_pb.target_node_id
self.source_execution_vertex_id = edge_pb.source_execution_vertex_id
self.target_execution_vertex_id = edge_pb.target_execution_vertex_id
partition_bytes = edge_pb.partition
# Sink node doesn't have partition function,
# so we only deserialize partition_bytes when it's not None or empty
@@ -57,46 +34,85 @@ class ExecutionEdge:
self.partition = partition.load_partition(partition_bytes)
class ExecutionTask:
def __init__(self, task_pb):
self.task_id = task_pb.task_id
self.task_index = task_pb.task_index
self.worker_actor = ray.actor.ActorHandle.\
_deserialization_helper(task_pb.worker_actor)
class ExecutionVertex:
def __init__(self, vertex_pb):
self.execution_vertex_id = vertex_pb.execution_vertex_id
self.execution_job_vertex_Id = vertex_pb.execution_job_vertex_Id
self.execution_job_vertex_name = vertex_pb.execution_job_vertex_name
self.execution_vertex_index = vertex_pb.execution_vertex_index
self.parallelism = vertex_pb.parallelism
if vertex_pb.language == Language.PYTHON:
operator_bytes = vertex_pb.operator # python operator descriptor
self.stream_operator = operator.load_operator(operator_bytes)
self.worker_actor = ray.actor.ActorHandle. \
_deserialization_helper(vertex_pb.worker_actor)
self.container_id = vertex_pb.container_id
self.build_time = vertex_pb.build_time
self.language = vertex_pb.language
self.config = vertex_pb.config
self.resource = vertex_pb.resource
class ExecutionGraph:
def __init__(self, graph_pb: remote_call_pb.ExecutionGraph):
self._graph_pb = graph_pb
self.execution_nodes = [
ExecutionNode(node) for node in graph_pb.execution_nodes
class ExecutionVertexContext:
def __init__(self,
vertex_context_pb: remote_call_pb.ExecutionVertexContext):
self.execution_vertex = \
ExecutionVertex(vertex_context_pb.current_execution_vertex)
self.upstream_execution_vertices = [
ExecutionVertex(vertex)
for vertex in vertex_context_pb.upstream_execution_vertices
]
self.downstream_execution_vertices = [
ExecutionVertex(vertex)
for vertex in vertex_context_pb.downstream_execution_vertices
]
self.input_execution_edges = [
ExecutionEdge(edge, self.execution_vertex.language)
for edge in vertex_context_pb.input_execution_edges
]
self.output_execution_edges = [
ExecutionEdge(edge, self.execution_vertex.language)
for edge in vertex_context_pb.output_execution_edges
]
def get_parallelism(self):
return self.execution_vertex.parallelism
def get_upstream_parallelism(self):
if self.upstream_execution_vertices:
return self.upstream_execution_vertices[0].parallelism
return 0
def get_downstream_parallelism(self):
if self.downstream_execution_vertices:
return self.downstream_execution_vertices[0].parallelism
return 0
@property
def build_time(self):
return self._graph_pb.build_time
return self.execution_vertex.build_time
def execution_nodes(self):
return self.execution_nodes
@property
def stream_operator(self):
return self.execution_vertex.stream_operator
def get_execution_task_by_task_id(self, task_id):
for execution_node in self.execution_nodes:
for task in execution_node.execution_tasks:
if task.task_id == task_id:
return task
raise Exception("Task %s does not exist!".format(task_id))
@property
def config(self):
return self.execution_vertex.config
def get_execution_node_by_task_id(self, task_id):
for execution_node in self.execution_nodes:
for task in execution_node.execution_tasks:
if task.task_id == task_id:
return execution_node
raise Exception("Task %s does not exist!".format(task_id))
def get_task_id(self):
return self.execution_vertex.execution_vertex_id
def get_task_id2_worker_by_node_id(self, node_id):
for execution_node in self.execution_nodes:
if execution_node.node_id == node_id:
task_id2_worker = {}
for task in execution_node.execution_tasks:
task_id2_worker[task.task_id] = task.worker_actor
return task_id2_worker
raise Exception("Node %s does not exist!".format(node_id))
def get_source_actor_by_vertex_id(self, execution_vertex_id):
for vertex in self.upstream_execution_vertices:
if vertex.execution_vertex_id == execution_vertex_id:
return vertex.worker_actor
raise Exception("ExecutionVertex %s does not exist!"
.format(execution_vertex_id))
def get_target_actor_by_vertex_id(self, execution_vertex_id):
for vertex in self.downstream_execution_vertices:
if vertex.execution_vertex_id == execution_vertex_id:
return vertex.worker_actor
raise Exception("ExecutionVertex %s does not exist!"
.format(execution_vertex_id))
+24 -19
View File
@@ -20,6 +20,7 @@ class StreamTask(ABC):
self.task_id = task_id
self.processor = processor
self.worker = worker
self.config = worker.config
self.reader = None # DataReader
self.writers = {} # ExecutionEdge -> DataWriter
self.thread = None
@@ -35,18 +36,20 @@ class StreamTask(ABC):
channel_conf[Config.CHANNEL_TYPE] = self.worker.config \
.get(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL)
execution_graph = self.worker.execution_graph
execution_node = self.worker.execution_node
execution_vertex_context = self.worker.execution_vertex_context
build_time = execution_vertex_context.build_time
# writers
collectors = []
for edge in execution_node.output_edges:
output_actors_map = {}
task_id2_worker = execution_graph.get_task_id2_worker_by_node_id(
edge.target_node_id)
for target_task_id, target_actor in task_id2_worker.items():
channel_name = ChannelID.gen_id(self.task_id, target_task_id,
execution_graph.build_time())
output_actors_map[channel_name] = target_actor
output_actors_map = {}
for edge in execution_vertex_context.output_execution_edges:
target_task_id = edge.target_execution_vertex_id
target_actor = execution_vertex_context\
.get_target_actor_by_vertex_id(target_task_id)
channel_name = ChannelID.gen_id(self.task_id, target_task_id,
build_time)
output_actors_map[channel_name] = target_actor
if len(output_actors_map) > 0:
channel_ids = list(output_actors_map.keys())
target_actors = list(output_actors_map.values())
@@ -61,13 +64,14 @@ class StreamTask(ABC):
# readers
input_actor_map = {}
for edge in execution_node.input_edges:
task_id2_worker = execution_graph.get_task_id2_worker_by_node_id(
edge.src_node_id)
for src_task_id, src_actor in task_id2_worker.items():
channel_name = ChannelID.gen_id(src_task_id, self.task_id,
execution_graph.build_time())
input_actor_map[channel_name] = src_actor
for edge in execution_vertex_context.input_execution_edges:
source_task_id = edge.source_execution_vertex_id
source_actor = execution_vertex_context\
.get_source_actor_by_vertex_id(source_task_id)
channel_name = ChannelID.gen_id(source_task_id, self.task_id,
build_time)
input_actor_map[channel_name] = source_actor
if len(input_actor_map) > 0:
channel_ids = list(input_actor_map.keys())
from_actors = list(input_actor_map.values())
@@ -85,8 +89,9 @@ class StreamTask(ABC):
# TODO(chaokunyang) add task/job config
runtime_context = RuntimeContextImpl(
self.worker.execution_task.task_id,
self.worker.execution_task.task_index, execution_node.parallelism)
self.worker.task_id,
execution_vertex_context.execution_vertex.execution_vertex_index,
execution_vertex_context.get_parallelism())
logger.info("open Processor {}".format(self.processor))
self.processor.open(collectors, runtime_context)
+26 -19
View File
@@ -5,7 +5,7 @@ import ray.streaming._streaming as _streaming
import ray.streaming.generated.remote_call_pb2 as remote_call_pb
import ray.streaming.runtime.processor as processor
from ray.streaming.config import Config
from ray.streaming.runtime.graph import ExecutionGraph
from ray.streaming.runtime.graph import ExecutionVertexContext
from ray.streaming.runtime.task import SourceStreamTask, OneInputStreamTask
logger = logging.getLogger(__name__)
@@ -21,43 +21,50 @@ class JobWorker(object):
def __init__(self):
self.worker_context = None
self.task_id = None
self.execution_vertex_context = None
self.config = None
self.execution_graph = None
self.execution_task = None
self.execution_node = None
self.stream_processor = None
self.task_id = None
self.task = None
self.stream_processor = None
self.reader_client = None
self.writer_client = None
logger.info("Creating job worker succeeded.")
def init(self, worker_context_bytes):
worker_context = remote_call_pb.WorkerContext()
worker_context = remote_call_pb.PythonJobWorkerContext()
worker_context.ParseFromString(worker_context_bytes)
self.worker_context = worker_context
self.task_id = worker_context.task_id
self.config = worker_context.conf
execution_graph = ExecutionGraph(worker_context.graph)
self.execution_graph = execution_graph
self.execution_task = self.execution_graph. \
get_execution_task_by_task_id(self.task_id)
self.execution_node = self.execution_graph. \
get_execution_node_by_task_id(self.task_id)
operator = self.execution_node.stream_operator
# build vertex context from pb
self.execution_vertex_context = ExecutionVertexContext(
worker_context.execution_vertex_context)
# use vertex id as task id
self.task_id = self.execution_vertex_context.get_task_id()
# build and get processor from operator
operator = self.execution_vertex_context.stream_operator
self.stream_processor = processor.build_processor(operator)
logger.info(
"Initializing JobWorker, task_id: {}, operator: {}.".format(
"Initializing job worker, task_id: {}, operator: {}.".format(
self.task_id, self.stream_processor))
# get config from vertex
self.config = self.execution_vertex_context.config
if self.config.get(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL):
self.reader_client = _streaming.ReaderClient()
self.writer_client = _streaming.WriterClient()
self.task = self.create_stream_task()
self.task.start()
logger.info("JobWorker init succeed")
logger.info("Job worker init succeeded.")
return True
def start(self):
self.task.start()
logger.info("Job worker start succeeded.")
def create_stream_task(self):
if isinstance(self.stream_processor, processor.SourceProcessor):
return SourceStreamTask(self.task_id, self.stream_processor, self)