mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 17:02:43 +08:00
[Streaming] Streaming data transfer and python integration (#6185)
This commit is contained in:
@@ -0,0 +1,226 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel("INFO")
|
||||
|
||||
|
||||
def _identity(element):
|
||||
return element
|
||||
|
||||
|
||||
class ReadTextFile:
|
||||
"""A source operator instance that reads a text file line by line.
|
||||
|
||||
Attributes:
|
||||
filepath (string): The path to the input file.
|
||||
"""
|
||||
|
||||
def __init__(self, operator):
|
||||
self.filepath = operator.other_args
|
||||
# TODO (john): Handle possible exception here
|
||||
self.reader = open(self.filepath, "r")
|
||||
|
||||
# Read input file line by line
|
||||
def run(self, input_gate, output_gate):
|
||||
while True:
|
||||
record = self.reader.readline()
|
||||
# Reader returns empty string ('') on EOF
|
||||
if not record:
|
||||
self.reader.close()
|
||||
return
|
||||
output_gate.push(
|
||||
record[:-1]) # Push after removing newline characters
|
||||
|
||||
|
||||
class Map:
|
||||
"""A map operator instance that applies a user-defined
|
||||
stream transformation.
|
||||
|
||||
A map produces exactly one output record for each record in
|
||||
the input stream.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, operator):
|
||||
self.map_fn = operator.logic
|
||||
|
||||
# Applies the mapper each record of the input stream(s)
|
||||
# and pushes resulting records to the output stream(s)
|
||||
def run(self, input_gate, output_gate):
|
||||
elements = 0
|
||||
while True:
|
||||
record = input_gate.pull()
|
||||
if record is None:
|
||||
return
|
||||
output_gate.push(self.map_fn(record))
|
||||
elements += 1
|
||||
|
||||
|
||||
class FlatMap:
|
||||
"""A map operator instance that applies a user-defined
|
||||
stream transformation.
|
||||
|
||||
A flatmap produces one or more output records for each record in
|
||||
the input stream.
|
||||
|
||||
Attributes:
|
||||
flatmap_fn (function): The user-defined function.
|
||||
"""
|
||||
|
||||
def __init__(self, operator):
|
||||
self.flatmap_fn = operator.logic
|
||||
|
||||
# Applies the splitter to the records of the input stream(s)
|
||||
# and pushes resulting records to the output stream(s)
|
||||
def run(self, input_gate, output_gate):
|
||||
while True:
|
||||
record = input_gate.pull()
|
||||
if record is None:
|
||||
return
|
||||
output_gate.push_all(self.flatmap_fn(record))
|
||||
|
||||
|
||||
class Filter:
|
||||
"""A filter operator instance that applies a user-defined filter to
|
||||
each record of the stream.
|
||||
|
||||
Output records are those that pass the filter, i.e. those for which
|
||||
the filter function returns True.
|
||||
|
||||
Attributes:
|
||||
filter_fn (function): The user-defined boolean function.
|
||||
"""
|
||||
|
||||
def __init__(self, operator):
|
||||
self.filter_fn = operator.logic
|
||||
|
||||
# Applies the filter to the records of the input stream(s)
|
||||
# and pushes resulting records to the output stream(s)
|
||||
def run(self, input_gate, output_gate):
|
||||
while True:
|
||||
record = input_gate.pull()
|
||||
if record is None:
|
||||
return
|
||||
if self.filter_fn(record):
|
||||
output_gate.push(record)
|
||||
|
||||
|
||||
class Inspect:
|
||||
"""A inspect operator instance that inspects the content of the stream.
|
||||
Inspect is useful for printing the records in the stream.
|
||||
"""
|
||||
|
||||
def __init__(self, operator):
|
||||
self.inspect_fn = operator.logic
|
||||
|
||||
def run(self, input_gate, output_gate):
|
||||
# Applies the inspect logic (e.g. print) to the records of
|
||||
# the input stream(s)
|
||||
# and leaves stream unaffected by simply pushing the records to
|
||||
# the output stream(s)
|
||||
while True:
|
||||
record = input_gate.pull()
|
||||
if record is None:
|
||||
return
|
||||
if output_gate:
|
||||
output_gate.push(record)
|
||||
self.inspect_fn(record)
|
||||
|
||||
|
||||
class Reduce:
|
||||
"""A reduce operator instance that combines a new value for a key
|
||||
with the last reduced one according to a user-defined logic.
|
||||
"""
|
||||
|
||||
def __init__(self, operator):
|
||||
self.reduce_fn = operator.logic
|
||||
# Set the attribute selector
|
||||
self.attribute_selector = operator.other_args
|
||||
if self.attribute_selector is None:
|
||||
self.attribute_selector = _identity
|
||||
elif isinstance(self.attribute_selector, int):
|
||||
self.key_index = self.attribute_selector
|
||||
self.attribute_selector =\
|
||||
lambda record: record[self.attribute_selector]
|
||||
elif isinstance(self.attribute_selector, str):
|
||||
self.attribute_selector =\
|
||||
lambda record: vars(record)[self.attribute_selector]
|
||||
elif not isinstance(self.attribute_selector, types.FunctionType):
|
||||
sys.exit("Unrecognized or unsupported key selector.")
|
||||
self.state = {} # key -> value
|
||||
|
||||
# Combines the input value for a key with the last reduced
|
||||
# value for that key to produce a new value.
|
||||
# Outputs the result as (key,new value)
|
||||
def run(self, input_gate, output_gate):
|
||||
while True:
|
||||
record = input_gate.pull()
|
||||
if record is None:
|
||||
return
|
||||
key, rest = record
|
||||
new_value = self.attribute_selector(rest)
|
||||
# TODO (john): Is there a way to update state with
|
||||
# a single dictionary lookup?
|
||||
try:
|
||||
old_value = self.state[key]
|
||||
new_value = self.reduce_fn(old_value, new_value)
|
||||
self.state[key] = new_value
|
||||
except KeyError: # Key does not exist in state
|
||||
self.state.setdefault(key, new_value)
|
||||
output_gate.push((key, new_value))
|
||||
|
||||
# Returns the state of the actor
|
||||
def get_state(self):
|
||||
return self.state
|
||||
|
||||
|
||||
class KeyBy:
|
||||
"""A key_by operator instance that physically partitions the
|
||||
stream based on a key.
|
||||
"""
|
||||
|
||||
def __init__(self, operator):
|
||||
# Set the key selector
|
||||
self.key_selector = operator.other_args
|
||||
if isinstance(self.key_selector, int):
|
||||
self.key_selector = lambda r: r[self.key_selector]
|
||||
elif isinstance(self.key_selector, str):
|
||||
self.key_selector = lambda record: vars(record)[self.key_selector]
|
||||
elif not isinstance(self.key_selector, types.FunctionType):
|
||||
sys.exit("Unrecognized or unsupported key selector.")
|
||||
|
||||
# The actual partitioning is done by the output gate
|
||||
def run(self, input_gate, output_gate):
|
||||
while True:
|
||||
record = input_gate.pull()
|
||||
if record is None:
|
||||
return
|
||||
key = self.key_selector(record)
|
||||
output_gate.push((key, record))
|
||||
|
||||
|
||||
# A custom source actor
|
||||
class Source:
|
||||
def __init__(self, operator):
|
||||
# The user-defined source with a get_next() method
|
||||
self.source = operator.logic
|
||||
|
||||
# Starts the source by calling get_next() repeatedly
|
||||
def run(self, input_gate, output_gate):
|
||||
start = time.time()
|
||||
elements = 0
|
||||
while True:
|
||||
record = self.source.get_next()
|
||||
if not record:
|
||||
logger.debug("[writer] puts per second: {}".format(
|
||||
elements / (time.time() - start)))
|
||||
return
|
||||
output_gate.push(record)
|
||||
elements += 1
|
||||
Reference in New Issue
Block a user