[Streaming] Fault Tolerance Implementation (#10595)

This commit is contained in:
Lixin Wei
2020-09-05 16:40:47 +08:00
committed by GitHub
parent 31f8ce4768
commit f31ee84bfd
161 changed files with 7071 additions and 1239 deletions
+32 -12
View File
@@ -3,10 +3,11 @@ import importlib
import logging
from abc import ABC, abstractmethod
from ray import streaming
from ray.streaming import function
from ray.streaming import message
from ray.streaming.collector import Collector
from ray.streaming.collector import CollectionCollector
from ray.streaming.function import SourceFunction
from ray.streaming.runtime import gateway_client
logger = logging.getLogger(__name__)
@@ -40,6 +41,14 @@ class Operator(ABC):
def operator_type(self) -> OperatorType:
pass
@abstractmethod
def save_checkpoint(self):
pass
@abstractmethod
def load_checkpoint(self, checkpoint_obj):
pass
class OneInputOperator(Operator, ABC):
"""Interface for stream operators with one input."""
@@ -90,8 +99,20 @@ class StreamOperator(Operator, ABC):
for collector in self.collectors:
collector.collect(record)
def save_checkpoint(self):
self.func.save_checkpoint()
class SourceOperator(StreamOperator):
def load_checkpoint(self, checkpoint_obj):
self.func.load_checkpoint(checkpoint_obj)
class SourceOperator(Operator, ABC):
@abstractmethod
def fetch(self):
pass
class SourceOperatorImpl(SourceOperator, StreamOperator):
"""
Operator to run a :class:`function.SourceFunction`
"""
@@ -104,19 +125,19 @@ class SourceOperator(StreamOperator):
for collector in self.collectors:
collector.collect(message.Record(value))
def __init__(self, func):
def __init__(self, func: SourceFunction):
assert isinstance(func, function.SourceFunction)
super().__init__(func)
self.source_context = None
def open(self, collectors, runtime_context):
super().open(collectors, runtime_context)
self.source_context = SourceOperator.SourceContextImpl(collectors)
self.source_context = SourceOperatorImpl.SourceContextImpl(collectors)
self.func.init(runtime_context.get_parallelism(),
runtime_context.get_task_index())
def run(self):
self.func.run(self.source_context)
def fetch(self):
self.func.fetch(self.source_context)
def operator_type(self):
return OperatorType.SOURCE
@@ -147,8 +168,7 @@ class FlatMapOperator(StreamOperator, OneInputOperator):
def open(self, collectors, runtime_context):
super().open(collectors, runtime_context)
self.collection_collector = streaming.collector.CollectionCollector(
collectors)
self.collection_collector = CollectionCollector(collectors)
def process_element(self, record):
self.func.flat_map(record.value, self.collection_collector)
@@ -286,12 +306,12 @@ class ChainedOperator(StreamOperator, ABC):
raise Exception("Current operator type is not supported")
class ChainedSourceOperator(ChainedOperator):
class ChainedSourceOperator(SourceOperator, ChainedOperator):
def __init__(self, operators, configs):
super().__init__(operators, configs)
def run(self):
self.operators[0].run()
def fetch(self):
self.operators[0].fetch()
class ChainedOneInputOperator(ChainedOperator):
@@ -350,7 +370,7 @@ def load_operator(descriptor_operator_bytes: bytes):
_function_to_operator = {
function.SourceFunction: SourceOperator,
function.SourceFunction: SourceOperatorImpl,
function.MapFunction: MapOperator,
function.FlatMapFunction: FlatMapOperator,
function.FilterFunction: FilterOperator,