mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 14:31:15 +08:00
[Streaming] Streaming data transfer supports cross language. (#7961)
* add init parameters for java * fix bug * cython * fix compile * fix test_direct_tranfer * comment * ChannelCreationParameter * fix comment * builder * lint and fix tests * fix single process test * fix checkstyle and lint * checkstyle * lint python Co-authored-by: wanxing <wanxing@B-458DMD6M-1753.local>
This commit is contained in:
@@ -97,16 +97,21 @@ cdef extern from "message/message_bundle.h" namespace "ray::streaming" nogil:
|
||||
void GetMessageListFromRawData(const uint8_t *data, uint32_t size, uint32_t msg_nums,
|
||||
c_list[shared_ptr[CStreamingMessage]] &msg_list);
|
||||
|
||||
cdef extern from "channel.h" namespace "ray::streaming" nogil:
|
||||
cdef struct CChannelCreationParameter "ray::streaming::ChannelCreationParameter":
|
||||
CChannelCreationParameter()
|
||||
CActorID actor_id;
|
||||
shared_ptr[CRayFunction] async_function;
|
||||
shared_ptr[CRayFunction] sync_function;
|
||||
|
||||
cdef extern from "queue/queue_client.h" namespace "ray::streaming" nogil:
|
||||
cdef cppclass CReaderClient "ray::streaming::ReaderClient":
|
||||
CReaderClient(CRayFunction &async_func,
|
||||
CRayFunction &sync_func)
|
||||
CReaderClient()
|
||||
void OnReaderMessage(shared_ptr[CLocalMemoryBuffer] buffer);
|
||||
shared_ptr[CLocalMemoryBuffer] OnReaderMessageSync(shared_ptr[CLocalMemoryBuffer] buffer);
|
||||
|
||||
cdef cppclass CWriterClient "ray::streaming::WriterClient":
|
||||
CWriterClient(CRayFunction &async_func,
|
||||
CRayFunction &sync_func)
|
||||
CWriterClient()
|
||||
void OnWriterMessage(shared_ptr[CLocalMemoryBuffer] buffer);
|
||||
shared_ptr[CLocalMemoryBuffer] OnWriterMessageSync(shared_ptr[CLocalMemoryBuffer] buffer);
|
||||
|
||||
@@ -122,7 +127,7 @@ cdef extern from "data_reader.h" namespace "ray::streaming" nogil:
|
||||
cdef cppclass CDataReader "ray::streaming::DataReader"(CStreamingCommon):
|
||||
CDataReader(shared_ptr[CRuntimeContext] &runtime_context)
|
||||
void Init(const c_vector[CObjectID] &input_ids,
|
||||
const c_vector[CActorID] &actor_ids,
|
||||
const c_vector[CChannelCreationParameter] ¶ms,
|
||||
const c_vector[uint64_t] &seq_ids,
|
||||
const c_vector[uint64_t] &msg_ids,
|
||||
int64_t timer_interval);
|
||||
@@ -135,7 +140,7 @@ cdef extern from "data_writer.h" namespace "ray::streaming" nogil:
|
||||
cdef cppclass CDataWriter "ray::streaming::DataWriter"(CStreamingCommon):
|
||||
CDataWriter(shared_ptr[CRuntimeContext] &runtime_context)
|
||||
CStreamingStatus Init(const c_vector[CObjectID] &channel_ids,
|
||||
const c_vector[CActorID] &actor_ids,
|
||||
const c_vector[CChannelCreationParameter] ¶ms,
|
||||
const c_vector[uint64_t] &message_ids,
|
||||
const c_vector[uint64_t] &queue_size_vec);
|
||||
long WriteMessageToBufferRing(
|
||||
|
||||
@@ -10,6 +10,7 @@ from libcpp.list cimport list as c_list
|
||||
from ray.includes.common cimport (
|
||||
CRayFunction,
|
||||
LANGUAGE_PYTHON,
|
||||
LANGUAGE_JAVA,
|
||||
CBuffer
|
||||
)
|
||||
|
||||
@@ -36,27 +37,43 @@ from ray.streaming.includes.libstreaming cimport (
|
||||
CReaderClient,
|
||||
CWriterClient,
|
||||
CLocalMemoryBuffer,
|
||||
CChannelCreationParameter,
|
||||
)
|
||||
from ray._raylet import JavaFunctionDescriptor
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
channel_logger = logging.getLogger(__name__)
|
||||
|
||||
cdef class ChannelCreationParameter:
|
||||
cdef:
|
||||
CChannelCreationParameter parameter
|
||||
|
||||
def __cinit__(self, ActorID actor_id, FunctionDescriptor async_func, FunctionDescriptor sync_func):
|
||||
cdef:
|
||||
shared_ptr[CRayFunction] async_func_ptr
|
||||
shared_ptr[CRayFunction] sync_func_ptr
|
||||
self.parameter = CChannelCreationParameter()
|
||||
self.parameter.actor_id = (<ActorID>actor_id).data
|
||||
if isinstance(async_func, JavaFunctionDescriptor):
|
||||
self.parameter.async_function = make_shared[CRayFunction](LANGUAGE_JAVA, async_func.descriptor)
|
||||
else:
|
||||
self.parameter.async_function = make_shared[CRayFunction](LANGUAGE_PYTHON, async_func.descriptor)
|
||||
if isinstance(sync_func, JavaFunctionDescriptor):
|
||||
self.parameter.sync_function = make_shared[CRayFunction](LANGUAGE_JAVA, sync_func.descriptor)
|
||||
else:
|
||||
self.parameter.sync_function = make_shared[CRayFunction](LANGUAGE_PYTHON, sync_func.descriptor)
|
||||
|
||||
cdef CChannelCreationParameter get_parameter(self):
|
||||
return self.parameter
|
||||
|
||||
cdef class ReaderClient:
|
||||
cdef:
|
||||
CReaderClient *client
|
||||
|
||||
def __cinit__(self,
|
||||
FunctionDescriptor async_func,
|
||||
FunctionDescriptor sync_func):
|
||||
cdef:
|
||||
CRayFunction async_native_func
|
||||
CRayFunction sync_native_func
|
||||
async_native_func = CRayFunction(LANGUAGE_PYTHON, async_func.descriptor)
|
||||
sync_native_func = CRayFunction(LANGUAGE_PYTHON, sync_func.descriptor)
|
||||
self.client = new CReaderClient(async_native_func, sync_native_func)
|
||||
def __cinit__(self):
|
||||
self.client = new CReaderClient()
|
||||
|
||||
def __dealloc__(self):
|
||||
del self.client
|
||||
@@ -85,15 +102,8 @@ cdef class WriterClient:
|
||||
cdef:
|
||||
CWriterClient * client
|
||||
|
||||
def __cinit__(self,
|
||||
FunctionDescriptor async_func,
|
||||
FunctionDescriptor sync_func):
|
||||
cdef:
|
||||
CRayFunction async_native_func
|
||||
CRayFunction sync_native_func
|
||||
async_native_func = CRayFunction(LANGUAGE_PYTHON, async_func.descriptor)
|
||||
sync_native_func = CRayFunction(LANGUAGE_PYTHON, sync_func.descriptor)
|
||||
self.client = new CWriterClient(async_native_func, sync_native_func)
|
||||
def __cinit__(self):
|
||||
self.client = new CWriterClient()
|
||||
|
||||
def __dealloc__(self):
|
||||
del self.client
|
||||
@@ -127,19 +137,21 @@ cdef class DataWriter:
|
||||
|
||||
@staticmethod
|
||||
def create(list py_output_channels,
|
||||
list output_actor_ids: list[ActorID],
|
||||
list output_creation_parameters: list[ChannelCreationParameter],
|
||||
uint64_t queue_size,
|
||||
list py_msg_ids,
|
||||
bytes config_bytes,
|
||||
c_bool is_mock):
|
||||
cdef:
|
||||
c_vector[CObjectID] channel_ids = bytes_list_to_qid_vec(py_output_channels)
|
||||
c_vector[CActorID] actor_ids
|
||||
c_vector[CChannelCreationParameter] initial_parameters
|
||||
c_vector[uint64_t] msg_ids
|
||||
CDataWriter *c_writer
|
||||
ChannelCreationParameter parameter
|
||||
cdef const unsigned char[:] config_data
|
||||
for actor_id in output_actor_ids:
|
||||
actor_ids.push_back((<ActorID>actor_id).data)
|
||||
for param in output_creation_parameters:
|
||||
parameter = param
|
||||
initial_parameters.push_back(parameter.get_parameter())
|
||||
for py_msg_id in py_msg_ids:
|
||||
msg_ids.push_back(<uint64_t>py_msg_id)
|
||||
|
||||
@@ -156,7 +168,7 @@ cdef class DataWriter:
|
||||
c_vector[uint64_t] queue_size_vec
|
||||
for i in range(channel_ids.size()):
|
||||
queue_size_vec.push_back(queue_size)
|
||||
cdef CStreamingStatus status = c_writer.Init(channel_ids, actor_ids, msg_ids, queue_size_vec)
|
||||
cdef CStreamingStatus status = c_writer.Init(channel_ids, initial_parameters, msg_ids, queue_size_vec)
|
||||
if remain_id_vec.size() != 0:
|
||||
channel_logger.warning("failed queue amounts => %s", remain_id_vec.size())
|
||||
if <uint32_t>status != <uint32_t> libstreaming.StatusOK:
|
||||
@@ -205,7 +217,7 @@ cdef class DataReader:
|
||||
|
||||
@staticmethod
|
||||
def create(list py_input_queues,
|
||||
list input_actor_ids: list[ActorID],
|
||||
list input_creation_parameters: list[ChannelCreationParameter],
|
||||
list py_seq_ids,
|
||||
list py_msg_ids,
|
||||
int64_t timer_interval,
|
||||
@@ -214,13 +226,15 @@ cdef class DataReader:
|
||||
c_bool is_mock):
|
||||
cdef:
|
||||
c_vector[CObjectID] queue_id_vec = bytes_list_to_qid_vec(py_input_queues)
|
||||
c_vector[CActorID] actor_ids
|
||||
c_vector[CChannelCreationParameter] initial_parameters
|
||||
c_vector[uint64_t] seq_ids
|
||||
c_vector[uint64_t] msg_ids
|
||||
CDataReader *c_reader
|
||||
ChannelCreationParameter parameter
|
||||
cdef const unsigned char[:] config_data
|
||||
for actor_id in input_actor_ids:
|
||||
actor_ids.push_back((<ActorID>actor_id).data)
|
||||
for param in input_creation_parameters:
|
||||
parameter = param
|
||||
initial_parameters.push_back(parameter.get_parameter())
|
||||
for py_seq_id in py_seq_ids:
|
||||
seq_ids.push_back(<uint64_t>py_seq_id)
|
||||
for py_msg_id in py_msg_ids:
|
||||
@@ -233,7 +247,7 @@ cdef class DataReader:
|
||||
if is_mock:
|
||||
ctx.get().MarkMockTest()
|
||||
c_reader = new CDataReader(ctx)
|
||||
c_reader.Init(queue_id_vec, actor_ids, seq_ids, msg_ids, timer_interval)
|
||||
c_reader.Init(queue_id_vec, initial_parameters, seq_ids, msg_ids, timer_interval)
|
||||
channel_logger.info("create native reader succeed")
|
||||
cdef DataReader reader = DataReader.__new__(DataReader)
|
||||
reader.reader = c_reader
|
||||
|
||||
@@ -6,9 +6,11 @@ from typing import List
|
||||
import ray
|
||||
import ray.streaming._streaming as _streaming
|
||||
import ray.streaming.generated.streaming_pb2 as streaming_pb
|
||||
from ray import ActorID
|
||||
from ray.actor import ActorHandle
|
||||
from ray.streaming.config import Config
|
||||
from ray._raylet import JavaFunctionDescriptor
|
||||
from ray._raylet import PythonFunctionDescriptor
|
||||
from ray._raylet import Language
|
||||
|
||||
CHANNEL_ID_LEN = 20
|
||||
|
||||
@@ -140,6 +142,85 @@ class DataMessage:
|
||||
return self.__message_id
|
||||
|
||||
|
||||
class ChannelCreationParametersBuilder:
|
||||
"""
|
||||
wrap initial parameters needed by a streaming queue
|
||||
"""
|
||||
_java_reader_async_function_descriptor = JavaFunctionDescriptor(
|
||||
"org.ray.streaming.runtime.worker",
|
||||
"onReaderMessage", "([B)V")
|
||||
_java_reader_sync_function_descriptor = JavaFunctionDescriptor(
|
||||
"org.ray.streaming.runtime.worker",
|
||||
"onReaderMessageSync", "([B)[B")
|
||||
_java_writer_async_function_descriptor = JavaFunctionDescriptor(
|
||||
"org.ray.streaming.runtime.worker",
|
||||
"onWriterMessage", "([B)V")
|
||||
_java_writer_sync_function_descriptor = JavaFunctionDescriptor(
|
||||
"org.ray.streaming.runtime.worker",
|
||||
"onWriterMessageSync", "([B)[B")
|
||||
_python_reader_async_function_descriptor = PythonFunctionDescriptor(
|
||||
"ray.streaming.runtime.core.worker",
|
||||
"on_reader_message", "JobWorker")
|
||||
_python_reader_sync_function_descriptor = PythonFunctionDescriptor(
|
||||
"ray.streaming.runtime.core.worker",
|
||||
"on_reader_message_sync", "JobWorker")
|
||||
_python_writer_async_function_descriptor = PythonFunctionDescriptor(
|
||||
"ray.streaming.runtime.core.worker",
|
||||
"on_writer_message", "JobWorker")
|
||||
_python_writer_sync_function_descriptor = PythonFunctionDescriptor(
|
||||
"ray.streaming.runtime.core.worker",
|
||||
"on_writer_message_sync", "JobWorker")
|
||||
|
||||
def get_parameters(self):
|
||||
return self._parameters
|
||||
|
||||
def __init__(self):
|
||||
self._parameters = []
|
||||
|
||||
def build_input_queue_parameters(self, queue_ids_dict):
|
||||
self.build_parameters(queue_ids_dict,
|
||||
self._java_writer_async_function_descriptor,
|
||||
self._java_writer_sync_function_descriptor,
|
||||
self._python_writer_async_function_descriptor,
|
||||
self._python_writer_sync_function_descriptor)
|
||||
return self
|
||||
|
||||
def build_output_queue_parameters(self, to_actors):
|
||||
self.build_parameters(to_actors,
|
||||
self._java_reader_async_function_descriptor,
|
||||
self._java_reader_sync_function_descriptor,
|
||||
self._python_reader_async_function_descriptor,
|
||||
self._python_reader_sync_function_descriptor)
|
||||
return self
|
||||
|
||||
def build_parameters(self, actors, java_async_func,
|
||||
java_sync_func, py_async_func, py_sync_func):
|
||||
for handle in actors:
|
||||
parameter = None
|
||||
if handle._ray_actor_language == Language.PYTHON:
|
||||
parameter = _streaming.ChannelCreationParameter(
|
||||
handle._ray_actor_id, py_async_func, py_sync_func)
|
||||
else:
|
||||
parameter = _streaming.ChannelCreationParameter(
|
||||
handle._ray_actor_id, java_async_func, java_sync_func)
|
||||
self._parameters.append(parameter)
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def set_python_writer_function_descriptor(async_function, sync_function):
|
||||
ChannelCreationParametersBuilder.\
|
||||
_python_writer_async_function_descriptor = async_function
|
||||
ChannelCreationParametersBuilder.\
|
||||
_python_writer_sync_function_descriptor = sync_function
|
||||
|
||||
@staticmethod
|
||||
def set_python_reader_function_descriptor(async_function, sync_function):
|
||||
ChannelCreationParametersBuilder.\
|
||||
_python_reader_async_function_descriptor = async_function
|
||||
ChannelCreationParametersBuilder.\
|
||||
_python_reader_sync_function_descriptor = sync_function
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -161,16 +242,16 @@ class DataWriter:
|
||||
py_output_channels = [
|
||||
channel_id_str_to_bytes(qid_str) for qid_str in output_channels
|
||||
]
|
||||
output_actor_ids: List[ActorID] = [
|
||||
handle._ray_actor_id for handle in to_actors
|
||||
]
|
||||
creation_parameters = ChannelCreationParametersBuilder()
|
||||
creation_parameters.build_output_queue_parameters(to_actors)
|
||||
channel_size = conf.get(Config.CHANNEL_SIZE,
|
||||
Config.CHANNEL_SIZE_DEFAULT)
|
||||
py_msg_ids = [0 for _ in range(len(output_channels))]
|
||||
config_bytes = _to_native_conf(conf)
|
||||
is_mock = conf[Config.CHANNEL_TYPE] == Config.MEMORY_CHANNEL
|
||||
self.writer = _streaming.DataWriter.create(
|
||||
py_output_channels, output_actor_ids, channel_size, py_msg_ids,
|
||||
py_output_channels, creation_parameters.get_parameters(),
|
||||
channel_size, py_msg_ids,
|
||||
config_bytes, is_mock)
|
||||
|
||||
logger.info("create DataWriter succeed")
|
||||
@@ -215,9 +296,8 @@ class DataReader:
|
||||
py_input_channels = [
|
||||
channel_id_str_to_bytes(qid_str) for qid_str in input_channels
|
||||
]
|
||||
input_actor_ids: List[ActorID] = [
|
||||
handle._ray_actor_id for handle in from_actors
|
||||
]
|
||||
creation_parameters = ChannelCreationParametersBuilder()
|
||||
creation_parameters.build_input_queue_parameters(from_actors)
|
||||
py_seq_ids = [0 for _ in range(len(input_channels))]
|
||||
py_msg_ids = [0 for _ in range(len(input_channels))]
|
||||
timer_interval = int(conf.get(Config.TIMER_INTERVAL_MS, -1))
|
||||
@@ -226,7 +306,8 @@ class DataReader:
|
||||
self.__queue = Queue(10000)
|
||||
is_mock = conf[Config.CHANNEL_TYPE] == Config.MEMORY_CHANNEL
|
||||
self.reader = _streaming.DataReader.create(
|
||||
py_input_channels, input_actor_ids, py_seq_ids, py_msg_ids,
|
||||
py_input_channels, creation_parameters.get_parameters(),
|
||||
py_seq_ids, py_msg_ids,
|
||||
timer_interval, is_recreate, config_bytes, is_mock)
|
||||
logger.info("create DataReader succeed")
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import ray
|
||||
import ray.streaming._streaming as _streaming
|
||||
import ray.streaming.generated.remote_call_pb2 as remote_call_pb
|
||||
import ray.streaming.runtime.processor as processor
|
||||
from ray._raylet import PythonFunctionDescriptor
|
||||
from ray.streaming.config import Config
|
||||
from ray.streaming.runtime.graph import ExecutionGraph
|
||||
from ray.streaming.runtime.task import SourceStreamTask, OneInputStreamTask
|
||||
@@ -48,22 +47,8 @@ class JobWorker(object):
|
||||
self.task_id, self.stream_processor))
|
||||
|
||||
if self.config.get(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL):
|
||||
reader_async_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_reader_message.__name__,
|
||||
self.__class__.__name__)
|
||||
reader_sync_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_reader_message_sync.__name__,
|
||||
self.__class__.__name__)
|
||||
self.reader_client = _streaming.ReaderClient(
|
||||
reader_async_func, reader_sync_func)
|
||||
writer_async_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_writer_message.__name__,
|
||||
self.__class__.__name__)
|
||||
writer_sync_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_writer_message_sync.__name__,
|
||||
self.__class__.__name__)
|
||||
self.writer_client = _streaming.WriterClient(
|
||||
writer_async_func, writer_sync_func)
|
||||
self.reader_client = _streaming.ReaderClient()
|
||||
self.writer_client = _streaming.WriterClient()
|
||||
|
||||
self.task = self.create_stream_task()
|
||||
self.task.start()
|
||||
|
||||
@@ -12,20 +12,8 @@ from ray.streaming.config import Config
|
||||
@ray.remote
|
||||
class Worker:
|
||||
def __init__(self):
|
||||
writer_async_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_writer_message.__name__, self.__class__.__name__)
|
||||
writer_sync_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_writer_message_sync.__name__,
|
||||
self.__class__.__name__)
|
||||
self.writer_client = _streaming.WriterClient(writer_async_func,
|
||||
writer_sync_func)
|
||||
reader_async_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_reader_message.__name__, self.__class__.__name__)
|
||||
reader_sync_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_reader_message_sync.__name__,
|
||||
self.__class__.__name__)
|
||||
self.reader_client = _streaming.ReaderClient(reader_async_func,
|
||||
reader_sync_func)
|
||||
self.writer_client = _streaming.WriterClient()
|
||||
self.reader_client = _streaming.ReaderClient()
|
||||
self.writer = None
|
||||
self.output_channel_id = None
|
||||
self.reader = None
|
||||
@@ -35,6 +23,14 @@ class Worker:
|
||||
Config.TASK_JOB_ID: ray.worker.global_worker.current_job_id,
|
||||
Config.CHANNEL_TYPE: Config.NATIVE_CHANNEL
|
||||
}
|
||||
reader_async_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_reader_message.__name__, self.__class__.__name__)
|
||||
reader_sync_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_reader_message_sync.__name__,
|
||||
self.__class__.__name__)
|
||||
transfer.ChannelCreationParametersBuilder.\
|
||||
set_python_reader_function_descriptor(
|
||||
reader_async_func, reader_sync_func)
|
||||
self.writer = transfer.DataWriter([output_channel],
|
||||
[pickle.loads(reader_actor)], conf)
|
||||
self.output_channel_id = transfer.ChannelID(output_channel)
|
||||
@@ -44,6 +40,14 @@ class Worker:
|
||||
Config.TASK_JOB_ID: ray.worker.global_worker.current_job_id,
|
||||
Config.CHANNEL_TYPE: Config.NATIVE_CHANNEL
|
||||
}
|
||||
writer_async_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_writer_message.__name__, self.__class__.__name__)
|
||||
writer_sync_func = PythonFunctionDescriptor(
|
||||
__name__, self.on_writer_message_sync.__name__,
|
||||
self.__class__.__name__)
|
||||
transfer.ChannelCreationParametersBuilder.\
|
||||
set_python_writer_function_descriptor(
|
||||
writer_async_func, writer_sync_func)
|
||||
self.reader = transfer.DataReader([input_channel],
|
||||
[pickle.loads(writer_actor)], conf)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user