[Streaming] Streaming data transfer supports cross language. (#7961)

* add init parameters for java

* fix bug

* cython

* fix compile

* fix test_direct_tranfer

* comment

* ChannelCreationParameter

* fix comment

* builder

* lint and fix tests

* fix single process test

* fix checkstyle and lint

* checkstyle

* lint python

Co-authored-by: wanxing <wanxing@B-458DMD6M-1753.local>
This commit is contained in:
wanxing
2020-04-16 15:16:48 +08:00
committed by GitHub
parent 5a7882bb44
commit 9345d03ffb
36 changed files with 618 additions and 333 deletions
+11 -6
View File
@@ -97,16 +97,21 @@ cdef extern from "message/message_bundle.h" namespace "ray::streaming" nogil:
void GetMessageListFromRawData(const uint8_t *data, uint32_t size, uint32_t msg_nums,
c_list[shared_ptr[CStreamingMessage]] &msg_list);
cdef extern from "channel.h" namespace "ray::streaming" nogil:
cdef struct CChannelCreationParameter "ray::streaming::ChannelCreationParameter":
CChannelCreationParameter()
CActorID actor_id;
shared_ptr[CRayFunction] async_function;
shared_ptr[CRayFunction] sync_function;
cdef extern from "queue/queue_client.h" namespace "ray::streaming" nogil:
cdef cppclass CReaderClient "ray::streaming::ReaderClient":
CReaderClient(CRayFunction &async_func,
CRayFunction &sync_func)
CReaderClient()
void OnReaderMessage(shared_ptr[CLocalMemoryBuffer] buffer);
shared_ptr[CLocalMemoryBuffer] OnReaderMessageSync(shared_ptr[CLocalMemoryBuffer] buffer);
cdef cppclass CWriterClient "ray::streaming::WriterClient":
CWriterClient(CRayFunction &async_func,
CRayFunction &sync_func)
CWriterClient()
void OnWriterMessage(shared_ptr[CLocalMemoryBuffer] buffer);
shared_ptr[CLocalMemoryBuffer] OnWriterMessageSync(shared_ptr[CLocalMemoryBuffer] buffer);
@@ -122,7 +127,7 @@ cdef extern from "data_reader.h" namespace "ray::streaming" nogil:
cdef cppclass CDataReader "ray::streaming::DataReader"(CStreamingCommon):
CDataReader(shared_ptr[CRuntimeContext] &runtime_context)
void Init(const c_vector[CObjectID] &input_ids,
const c_vector[CActorID] &actor_ids,
const c_vector[CChannelCreationParameter] &params,
const c_vector[uint64_t] &seq_ids,
const c_vector[uint64_t] &msg_ids,
int64_t timer_interval);
@@ -135,7 +140,7 @@ cdef extern from "data_writer.h" namespace "ray::streaming" nogil:
cdef cppclass CDataWriter "ray::streaming::DataWriter"(CStreamingCommon):
CDataWriter(shared_ptr[CRuntimeContext] &runtime_context)
CStreamingStatus Init(const c_vector[CObjectID] &channel_ids,
const c_vector[CActorID] &actor_ids,
const c_vector[CChannelCreationParameter] &params,
const c_vector[uint64_t] &message_ids,
const c_vector[uint64_t] &queue_size_vec);
long WriteMessageToBufferRing(
+42 -28
View File
@@ -10,6 +10,7 @@ from libcpp.list cimport list as c_list
from ray.includes.common cimport (
CRayFunction,
LANGUAGE_PYTHON,
LANGUAGE_JAVA,
CBuffer
)
@@ -36,27 +37,43 @@ from ray.streaming.includes.libstreaming cimport (
CReaderClient,
CWriterClient,
CLocalMemoryBuffer,
CChannelCreationParameter,
)
from ray._raylet import JavaFunctionDescriptor
import logging
channel_logger = logging.getLogger(__name__)
cdef class ChannelCreationParameter:
cdef:
CChannelCreationParameter parameter
def __cinit__(self, ActorID actor_id, FunctionDescriptor async_func, FunctionDescriptor sync_func):
cdef:
shared_ptr[CRayFunction] async_func_ptr
shared_ptr[CRayFunction] sync_func_ptr
self.parameter = CChannelCreationParameter()
self.parameter.actor_id = (<ActorID>actor_id).data
if isinstance(async_func, JavaFunctionDescriptor):
self.parameter.async_function = make_shared[CRayFunction](LANGUAGE_JAVA, async_func.descriptor)
else:
self.parameter.async_function = make_shared[CRayFunction](LANGUAGE_PYTHON, async_func.descriptor)
if isinstance(sync_func, JavaFunctionDescriptor):
self.parameter.sync_function = make_shared[CRayFunction](LANGUAGE_JAVA, sync_func.descriptor)
else:
self.parameter.sync_function = make_shared[CRayFunction](LANGUAGE_PYTHON, sync_func.descriptor)
cdef CChannelCreationParameter get_parameter(self):
return self.parameter
cdef class ReaderClient:
cdef:
CReaderClient *client
def __cinit__(self,
FunctionDescriptor async_func,
FunctionDescriptor sync_func):
cdef:
CRayFunction async_native_func
CRayFunction sync_native_func
async_native_func = CRayFunction(LANGUAGE_PYTHON, async_func.descriptor)
sync_native_func = CRayFunction(LANGUAGE_PYTHON, sync_func.descriptor)
self.client = new CReaderClient(async_native_func, sync_native_func)
def __cinit__(self):
self.client = new CReaderClient()
def __dealloc__(self):
del self.client
@@ -85,15 +102,8 @@ cdef class WriterClient:
cdef:
CWriterClient * client
def __cinit__(self,
FunctionDescriptor async_func,
FunctionDescriptor sync_func):
cdef:
CRayFunction async_native_func
CRayFunction sync_native_func
async_native_func = CRayFunction(LANGUAGE_PYTHON, async_func.descriptor)
sync_native_func = CRayFunction(LANGUAGE_PYTHON, sync_func.descriptor)
self.client = new CWriterClient(async_native_func, sync_native_func)
def __cinit__(self):
self.client = new CWriterClient()
def __dealloc__(self):
del self.client
@@ -127,19 +137,21 @@ cdef class DataWriter:
@staticmethod
def create(list py_output_channels,
list output_actor_ids: list[ActorID],
list output_creation_parameters: list[ChannelCreationParameter],
uint64_t queue_size,
list py_msg_ids,
bytes config_bytes,
c_bool is_mock):
cdef:
c_vector[CObjectID] channel_ids = bytes_list_to_qid_vec(py_output_channels)
c_vector[CActorID] actor_ids
c_vector[CChannelCreationParameter] initial_parameters
c_vector[uint64_t] msg_ids
CDataWriter *c_writer
ChannelCreationParameter parameter
cdef const unsigned char[:] config_data
for actor_id in output_actor_ids:
actor_ids.push_back((<ActorID>actor_id).data)
for param in output_creation_parameters:
parameter = param
initial_parameters.push_back(parameter.get_parameter())
for py_msg_id in py_msg_ids:
msg_ids.push_back(<uint64_t>py_msg_id)
@@ -156,7 +168,7 @@ cdef class DataWriter:
c_vector[uint64_t] queue_size_vec
for i in range(channel_ids.size()):
queue_size_vec.push_back(queue_size)
cdef CStreamingStatus status = c_writer.Init(channel_ids, actor_ids, msg_ids, queue_size_vec)
cdef CStreamingStatus status = c_writer.Init(channel_ids, initial_parameters, msg_ids, queue_size_vec)
if remain_id_vec.size() != 0:
channel_logger.warning("failed queue amounts => %s", remain_id_vec.size())
if <uint32_t>status != <uint32_t> libstreaming.StatusOK:
@@ -205,7 +217,7 @@ cdef class DataReader:
@staticmethod
def create(list py_input_queues,
list input_actor_ids: list[ActorID],
list input_creation_parameters: list[ChannelCreationParameter],
list py_seq_ids,
list py_msg_ids,
int64_t timer_interval,
@@ -214,13 +226,15 @@ cdef class DataReader:
c_bool is_mock):
cdef:
c_vector[CObjectID] queue_id_vec = bytes_list_to_qid_vec(py_input_queues)
c_vector[CActorID] actor_ids
c_vector[CChannelCreationParameter] initial_parameters
c_vector[uint64_t] seq_ids
c_vector[uint64_t] msg_ids
CDataReader *c_reader
ChannelCreationParameter parameter
cdef const unsigned char[:] config_data
for actor_id in input_actor_ids:
actor_ids.push_back((<ActorID>actor_id).data)
for param in input_creation_parameters:
parameter = param
initial_parameters.push_back(parameter.get_parameter())
for py_seq_id in py_seq_ids:
seq_ids.push_back(<uint64_t>py_seq_id)
for py_msg_id in py_msg_ids:
@@ -233,7 +247,7 @@ cdef class DataReader:
if is_mock:
ctx.get().MarkMockTest()
c_reader = new CDataReader(ctx)
c_reader.Init(queue_id_vec, actor_ids, seq_ids, msg_ids, timer_interval)
c_reader.Init(queue_id_vec, initial_parameters, seq_ids, msg_ids, timer_interval)
channel_logger.info("create native reader succeed")
cdef DataReader reader = DataReader.__new__(DataReader)
reader.reader = c_reader
+90 -9
View File
@@ -6,9 +6,11 @@ from typing import List
import ray
import ray.streaming._streaming as _streaming
import ray.streaming.generated.streaming_pb2 as streaming_pb
from ray import ActorID
from ray.actor import ActorHandle
from ray.streaming.config import Config
from ray._raylet import JavaFunctionDescriptor
from ray._raylet import PythonFunctionDescriptor
from ray._raylet import Language
CHANNEL_ID_LEN = 20
@@ -140,6 +142,85 @@ class DataMessage:
return self.__message_id
class ChannelCreationParametersBuilder:
"""
wrap initial parameters needed by a streaming queue
"""
_java_reader_async_function_descriptor = JavaFunctionDescriptor(
"org.ray.streaming.runtime.worker",
"onReaderMessage", "([B)V")
_java_reader_sync_function_descriptor = JavaFunctionDescriptor(
"org.ray.streaming.runtime.worker",
"onReaderMessageSync", "([B)[B")
_java_writer_async_function_descriptor = JavaFunctionDescriptor(
"org.ray.streaming.runtime.worker",
"onWriterMessage", "([B)V")
_java_writer_sync_function_descriptor = JavaFunctionDescriptor(
"org.ray.streaming.runtime.worker",
"onWriterMessageSync", "([B)[B")
_python_reader_async_function_descriptor = PythonFunctionDescriptor(
"ray.streaming.runtime.core.worker",
"on_reader_message", "JobWorker")
_python_reader_sync_function_descriptor = PythonFunctionDescriptor(
"ray.streaming.runtime.core.worker",
"on_reader_message_sync", "JobWorker")
_python_writer_async_function_descriptor = PythonFunctionDescriptor(
"ray.streaming.runtime.core.worker",
"on_writer_message", "JobWorker")
_python_writer_sync_function_descriptor = PythonFunctionDescriptor(
"ray.streaming.runtime.core.worker",
"on_writer_message_sync", "JobWorker")
def get_parameters(self):
return self._parameters
def __init__(self):
self._parameters = []
def build_input_queue_parameters(self, queue_ids_dict):
self.build_parameters(queue_ids_dict,
self._java_writer_async_function_descriptor,
self._java_writer_sync_function_descriptor,
self._python_writer_async_function_descriptor,
self._python_writer_sync_function_descriptor)
return self
def build_output_queue_parameters(self, to_actors):
self.build_parameters(to_actors,
self._java_reader_async_function_descriptor,
self._java_reader_sync_function_descriptor,
self._python_reader_async_function_descriptor,
self._python_reader_sync_function_descriptor)
return self
def build_parameters(self, actors, java_async_func,
java_sync_func, py_async_func, py_sync_func):
for handle in actors:
parameter = None
if handle._ray_actor_language == Language.PYTHON:
parameter = _streaming.ChannelCreationParameter(
handle._ray_actor_id, py_async_func, py_sync_func)
else:
parameter = _streaming.ChannelCreationParameter(
handle._ray_actor_id, java_async_func, java_sync_func)
self._parameters.append(parameter)
return self
@staticmethod
def set_python_writer_function_descriptor(async_function, sync_function):
ChannelCreationParametersBuilder.\
_python_writer_async_function_descriptor = async_function
ChannelCreationParametersBuilder.\
_python_writer_sync_function_descriptor = sync_function
@staticmethod
def set_python_reader_function_descriptor(async_function, sync_function):
ChannelCreationParametersBuilder.\
_python_reader_async_function_descriptor = async_function
ChannelCreationParametersBuilder.\
_python_reader_sync_function_descriptor = sync_function
logger = logging.getLogger(__name__)
@@ -161,16 +242,16 @@ class DataWriter:
py_output_channels = [
channel_id_str_to_bytes(qid_str) for qid_str in output_channels
]
output_actor_ids: List[ActorID] = [
handle._ray_actor_id for handle in to_actors
]
creation_parameters = ChannelCreationParametersBuilder()
creation_parameters.build_output_queue_parameters(to_actors)
channel_size = conf.get(Config.CHANNEL_SIZE,
Config.CHANNEL_SIZE_DEFAULT)
py_msg_ids = [0 for _ in range(len(output_channels))]
config_bytes = _to_native_conf(conf)
is_mock = conf[Config.CHANNEL_TYPE] == Config.MEMORY_CHANNEL
self.writer = _streaming.DataWriter.create(
py_output_channels, output_actor_ids, channel_size, py_msg_ids,
py_output_channels, creation_parameters.get_parameters(),
channel_size, py_msg_ids,
config_bytes, is_mock)
logger.info("create DataWriter succeed")
@@ -215,9 +296,8 @@ class DataReader:
py_input_channels = [
channel_id_str_to_bytes(qid_str) for qid_str in input_channels
]
input_actor_ids: List[ActorID] = [
handle._ray_actor_id for handle in from_actors
]
creation_parameters = ChannelCreationParametersBuilder()
creation_parameters.build_input_queue_parameters(from_actors)
py_seq_ids = [0 for _ in range(len(input_channels))]
py_msg_ids = [0 for _ in range(len(input_channels))]
timer_interval = int(conf.get(Config.TIMER_INTERVAL_MS, -1))
@@ -226,7 +306,8 @@ class DataReader:
self.__queue = Queue(10000)
is_mock = conf[Config.CHANNEL_TYPE] == Config.MEMORY_CHANNEL
self.reader = _streaming.DataReader.create(
py_input_channels, input_actor_ids, py_seq_ids, py_msg_ids,
py_input_channels, creation_parameters.get_parameters(),
py_seq_ids, py_msg_ids,
timer_interval, is_recreate, config_bytes, is_mock)
logger.info("create DataReader succeed")
+2 -17
View File
@@ -4,7 +4,6 @@ import ray
import ray.streaming._streaming as _streaming
import ray.streaming.generated.remote_call_pb2 as remote_call_pb
import ray.streaming.runtime.processor as processor
from ray._raylet import PythonFunctionDescriptor
from ray.streaming.config import Config
from ray.streaming.runtime.graph import ExecutionGraph
from ray.streaming.runtime.task import SourceStreamTask, OneInputStreamTask
@@ -48,22 +47,8 @@ class JobWorker(object):
self.task_id, self.stream_processor))
if self.config.get(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL):
reader_async_func = PythonFunctionDescriptor(
__name__, self.on_reader_message.__name__,
self.__class__.__name__)
reader_sync_func = PythonFunctionDescriptor(
__name__, self.on_reader_message_sync.__name__,
self.__class__.__name__)
self.reader_client = _streaming.ReaderClient(
reader_async_func, reader_sync_func)
writer_async_func = PythonFunctionDescriptor(
__name__, self.on_writer_message.__name__,
self.__class__.__name__)
writer_sync_func = PythonFunctionDescriptor(
__name__, self.on_writer_message_sync.__name__,
self.__class__.__name__)
self.writer_client = _streaming.WriterClient(
writer_async_func, writer_sync_func)
self.reader_client = _streaming.ReaderClient()
self.writer_client = _streaming.WriterClient()
self.task = self.create_stream_task()
self.task.start()
+18 -14
View File
@@ -12,20 +12,8 @@ from ray.streaming.config import Config
@ray.remote
class Worker:
def __init__(self):
writer_async_func = PythonFunctionDescriptor(
__name__, self.on_writer_message.__name__, self.__class__.__name__)
writer_sync_func = PythonFunctionDescriptor(
__name__, self.on_writer_message_sync.__name__,
self.__class__.__name__)
self.writer_client = _streaming.WriterClient(writer_async_func,
writer_sync_func)
reader_async_func = PythonFunctionDescriptor(
__name__, self.on_reader_message.__name__, self.__class__.__name__)
reader_sync_func = PythonFunctionDescriptor(
__name__, self.on_reader_message_sync.__name__,
self.__class__.__name__)
self.reader_client = _streaming.ReaderClient(reader_async_func,
reader_sync_func)
self.writer_client = _streaming.WriterClient()
self.reader_client = _streaming.ReaderClient()
self.writer = None
self.output_channel_id = None
self.reader = None
@@ -35,6 +23,14 @@ class Worker:
Config.TASK_JOB_ID: ray.worker.global_worker.current_job_id,
Config.CHANNEL_TYPE: Config.NATIVE_CHANNEL
}
reader_async_func = PythonFunctionDescriptor(
__name__, self.on_reader_message.__name__, self.__class__.__name__)
reader_sync_func = PythonFunctionDescriptor(
__name__, self.on_reader_message_sync.__name__,
self.__class__.__name__)
transfer.ChannelCreationParametersBuilder.\
set_python_reader_function_descriptor(
reader_async_func, reader_sync_func)
self.writer = transfer.DataWriter([output_channel],
[pickle.loads(reader_actor)], conf)
self.output_channel_id = transfer.ChannelID(output_channel)
@@ -44,6 +40,14 @@ class Worker:
Config.TASK_JOB_ID: ray.worker.global_worker.current_job_id,
Config.CHANNEL_TYPE: Config.NATIVE_CHANNEL
}
writer_async_func = PythonFunctionDescriptor(
__name__, self.on_writer_message.__name__, self.__class__.__name__)
writer_sync_func = PythonFunctionDescriptor(
__name__, self.on_writer_message_sync.__name__,
self.__class__.__name__)
transfer.ChannelCreationParametersBuilder.\
set_python_writer_function_descriptor(
writer_async_func, writer_sync_func)
self.reader = transfer.DataReader([input_channel],
[pickle.loads(writer_actor)], conf)