[Streaming] Streaming data transfer supports cross language. (#7961)

* add init parameters for java

* fix bug

* cython

* fix compile

* fix test_direct_tranfer

* comment

* ChannelCreationParameter

* fix comment

* builder

* lint and fix tests

* fix single process test

* fix checkstyle and lint

* checkstyle

* lint python

Co-authored-by: wanxing <wanxing@B-458DMD6M-1753.local>
This commit is contained in:
wanxing
2020-04-16 15:16:48 +08:00
committed by GitHub
parent 5a7882bb44
commit 9345d03ffb
36 changed files with 618 additions and 333 deletions
+90 -9
View File
@@ -6,9 +6,11 @@ from typing import List
import ray
import ray.streaming._streaming as _streaming
import ray.streaming.generated.streaming_pb2 as streaming_pb
from ray import ActorID
from ray.actor import ActorHandle
from ray.streaming.config import Config
from ray._raylet import JavaFunctionDescriptor
from ray._raylet import PythonFunctionDescriptor
from ray._raylet import Language
CHANNEL_ID_LEN = 20
@@ -140,6 +142,85 @@ class DataMessage:
return self.__message_id
class ChannelCreationParametersBuilder:
"""
wrap initial parameters needed by a streaming queue
"""
_java_reader_async_function_descriptor = JavaFunctionDescriptor(
"org.ray.streaming.runtime.worker",
"onReaderMessage", "([B)V")
_java_reader_sync_function_descriptor = JavaFunctionDescriptor(
"org.ray.streaming.runtime.worker",
"onReaderMessageSync", "([B)[B")
_java_writer_async_function_descriptor = JavaFunctionDescriptor(
"org.ray.streaming.runtime.worker",
"onWriterMessage", "([B)V")
_java_writer_sync_function_descriptor = JavaFunctionDescriptor(
"org.ray.streaming.runtime.worker",
"onWriterMessageSync", "([B)[B")
_python_reader_async_function_descriptor = PythonFunctionDescriptor(
"ray.streaming.runtime.core.worker",
"on_reader_message", "JobWorker")
_python_reader_sync_function_descriptor = PythonFunctionDescriptor(
"ray.streaming.runtime.core.worker",
"on_reader_message_sync", "JobWorker")
_python_writer_async_function_descriptor = PythonFunctionDescriptor(
"ray.streaming.runtime.core.worker",
"on_writer_message", "JobWorker")
_python_writer_sync_function_descriptor = PythonFunctionDescriptor(
"ray.streaming.runtime.core.worker",
"on_writer_message_sync", "JobWorker")
def get_parameters(self):
return self._parameters
def __init__(self):
self._parameters = []
def build_input_queue_parameters(self, queue_ids_dict):
self.build_parameters(queue_ids_dict,
self._java_writer_async_function_descriptor,
self._java_writer_sync_function_descriptor,
self._python_writer_async_function_descriptor,
self._python_writer_sync_function_descriptor)
return self
def build_output_queue_parameters(self, to_actors):
self.build_parameters(to_actors,
self._java_reader_async_function_descriptor,
self._java_reader_sync_function_descriptor,
self._python_reader_async_function_descriptor,
self._python_reader_sync_function_descriptor)
return self
def build_parameters(self, actors, java_async_func,
java_sync_func, py_async_func, py_sync_func):
for handle in actors:
parameter = None
if handle._ray_actor_language == Language.PYTHON:
parameter = _streaming.ChannelCreationParameter(
handle._ray_actor_id, py_async_func, py_sync_func)
else:
parameter = _streaming.ChannelCreationParameter(
handle._ray_actor_id, java_async_func, java_sync_func)
self._parameters.append(parameter)
return self
@staticmethod
def set_python_writer_function_descriptor(async_function, sync_function):
ChannelCreationParametersBuilder.\
_python_writer_async_function_descriptor = async_function
ChannelCreationParametersBuilder.\
_python_writer_sync_function_descriptor = sync_function
@staticmethod
def set_python_reader_function_descriptor(async_function, sync_function):
ChannelCreationParametersBuilder.\
_python_reader_async_function_descriptor = async_function
ChannelCreationParametersBuilder.\
_python_reader_sync_function_descriptor = sync_function
logger = logging.getLogger(__name__)
@@ -161,16 +242,16 @@ class DataWriter:
py_output_channels = [
channel_id_str_to_bytes(qid_str) for qid_str in output_channels
]
output_actor_ids: List[ActorID] = [
handle._ray_actor_id for handle in to_actors
]
creation_parameters = ChannelCreationParametersBuilder()
creation_parameters.build_output_queue_parameters(to_actors)
channel_size = conf.get(Config.CHANNEL_SIZE,
Config.CHANNEL_SIZE_DEFAULT)
py_msg_ids = [0 for _ in range(len(output_channels))]
config_bytes = _to_native_conf(conf)
is_mock = conf[Config.CHANNEL_TYPE] == Config.MEMORY_CHANNEL
self.writer = _streaming.DataWriter.create(
py_output_channels, output_actor_ids, channel_size, py_msg_ids,
py_output_channels, creation_parameters.get_parameters(),
channel_size, py_msg_ids,
config_bytes, is_mock)
logger.info("create DataWriter succeed")
@@ -215,9 +296,8 @@ class DataReader:
py_input_channels = [
channel_id_str_to_bytes(qid_str) for qid_str in input_channels
]
input_actor_ids: List[ActorID] = [
handle._ray_actor_id for handle in from_actors
]
creation_parameters = ChannelCreationParametersBuilder()
creation_parameters.build_input_queue_parameters(from_actors)
py_seq_ids = [0 for _ in range(len(input_channels))]
py_msg_ids = [0 for _ in range(len(input_channels))]
timer_interval = int(conf.get(Config.TIMER_INTERVAL_MS, -1))
@@ -226,7 +306,8 @@ class DataReader:
self.__queue = Queue(10000)
is_mock = conf[Config.CHANNEL_TYPE] == Config.MEMORY_CHANNEL
self.reader = _streaming.DataReader.create(
py_input_channels, input_actor_ids, py_seq_ids, py_msg_ids,
py_input_channels, creation_parameters.get_parameters(),
py_seq_ids, py_msg_ids,
timer_interval, is_recreate, config_bytes, is_mock)
logger.info("create DataReader succeed")
+2 -17
View File
@@ -4,7 +4,6 @@ import ray
import ray.streaming._streaming as _streaming
import ray.streaming.generated.remote_call_pb2 as remote_call_pb
import ray.streaming.runtime.processor as processor
from ray._raylet import PythonFunctionDescriptor
from ray.streaming.config import Config
from ray.streaming.runtime.graph import ExecutionGraph
from ray.streaming.runtime.task import SourceStreamTask, OneInputStreamTask
@@ -48,22 +47,8 @@ class JobWorker(object):
self.task_id, self.stream_processor))
if self.config.get(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL):
reader_async_func = PythonFunctionDescriptor(
__name__, self.on_reader_message.__name__,
self.__class__.__name__)
reader_sync_func = PythonFunctionDescriptor(
__name__, self.on_reader_message_sync.__name__,
self.__class__.__name__)
self.reader_client = _streaming.ReaderClient(
reader_async_func, reader_sync_func)
writer_async_func = PythonFunctionDescriptor(
__name__, self.on_writer_message.__name__,
self.__class__.__name__)
writer_sync_func = PythonFunctionDescriptor(
__name__, self.on_writer_message_sync.__name__,
self.__class__.__name__)
self.writer_client = _streaming.WriterClient(
writer_async_func, writer_sync_func)
self.reader_client = _streaming.ReaderClient()
self.writer_client = _streaming.WriterClient()
self.task = self.create_stream_task()
self.task.start()