ENH: Add zipline.utils.preprocess.

Implements tools for preprocessing the arguments to user-facing
functions.
This commit is contained in:
Scott Sanderson
2015-09-27 21:03:55 -04:00
parent 75138343ba
commit 00c413e9d4
3 changed files with 528 additions and 1 deletions
+14 -1
View File
@@ -11,6 +11,7 @@ from zipline.modelling import (
from zipline.utils import (
memoize,
test_utils,
preprocess,
)
@@ -35,9 +36,18 @@ class DoctestTestCase(TestCase):
"pdbpp is installed." % module.__name__, file=sys.__stdout__)
return
try:
doctest.testmod(module, verbose=True, raise_on_error=True)
doctest.testmod(
module,
verbose=True,
raise_on_error=True,
optionflags=self.flags,
)
except doctest.UnexpectedException as e:
raise e.exc_info[1]
except doctest.DocTestFailure as e:
print("Got:")
print(e.got)
raise
def test_adjustment_docs(self):
self._check_docs(adjustment)
@@ -53,3 +63,6 @@ class DoctestTestCase(TestCase):
def test_test_utils_docs(self):
self._check_docs(test_utils)
def test_preprocess_docs(self):
self._check_docs(preprocess)
+225
View File
@@ -0,0 +1,225 @@
"""
Tests for zipline.utils.validate.
"""
from types import FunctionType
from unittest import TestCase
from nose_parameterized import parameterized
from zipline.utils.preprocess import call, expect_types, preprocess, optional
def noop(func, argname, argvalue):
assert isinstance(func, FunctionType)
assert isinstance(argname, str)
return argvalue
class PreprocessTestCase(TestCase):
@parameterized.expand([
('too_many', (1, 2, 3), {}),
('too_few', (1,), {}),
('collision', (1,), {'a': 1}),
('unexpected', (1,), {'q': 1}),
])
def test_preprocess_doesnt_change_TypeErrors(self, name, args, kwargs):
"""
Verify that the validate decorator doesn't swallow typeerrors that
would be raised when calling a function with invalid arguments
"""
def undecorated(x, y):
return x, y
decorated = preprocess(x=noop, y=noop)(undecorated)
with self.assertRaises(TypeError) as e:
undecorated(*args, **kwargs)
undecorated_errargs = e.exception.args
with self.assertRaises(TypeError) as e:
decorated(*args, **kwargs)
decorated_errargs = e.exception.args
self.assertEqual(len(decorated_errargs), 1)
self.assertEqual(len(undecorated_errargs), 1)
self.assertEqual(decorated_errargs[0], undecorated_errargs[0])
def test_preprocess_co_filename(self):
def undecorated():
pass
decorated = preprocess()(undecorated)
self.assertEqual(
undecorated.__code__.co_filename,
decorated.__code__.co_filename,
)
def test_preprocess_preserves_docstring(self):
@preprocess()
def func():
"My awesome docstring"
self.assertEqual(func.__doc__, "My awesome docstring")
def test_preprocess_preserves_function_name(self):
@preprocess()
def arglebargle():
pass
self.assertEqual(arglebargle.__name__, 'arglebargle')
@parameterized.expand([
((1, 2), {}),
((1, 2), {'c': 3}),
((1,), {'b': 2}),
((), {'a': 1, 'b': 2}),
((), {'a': 1, 'b': 2, 'c': 3}),
])
def test_preprocess_no_processors(self, args, kwargs):
@preprocess()
def func(a, b, c=3):
return a, b, c
self.assertEqual(func(*args, **kwargs), (1, 2, 3))
def test_preprocess_bad_processor_name(self):
a_processor = preprocess(a=int)
# Should work fine.
@a_processor
def func_with_arg_named_a(a):
pass
@a_processor
def func_with_default_arg_named_a(a=1):
pass
message = "Got processors for unknown arguments: %s." % {'a'}
with self.assertRaises(TypeError) as e:
@a_processor
def func_with_no_args():
pass
self.assertEqual(e.exception.args[0], message)
with self.assertRaises(TypeError) as e:
@a_processor
def func_with_arg_named_b(b):
pass
self.assertEqual(e.exception.args[0], message)
@parameterized.expand([
((1, 2), {}),
((1, 2), {'c': 3}),
((1,), {'b': 2}),
((), {'a': 1, 'b': 2}),
((), {'a': 1, 'b': 2, 'c': 3}),
])
def test_preprocess_on_function(self, args, kwargs):
decorators = [
preprocess(a=call(str), b=call(float), c=call(lambda x: x + 1)),
]
for decorator in decorators:
@decorator
def func(a, b, c=3):
return a, b, c
self.assertEqual(func(*args, **kwargs), ('1', 2.0, 4))
@parameterized.expand([
((1, 2), {}),
((1, 2), {'c': 3}),
((1,), {'b': 2}),
((), {'a': 1, 'b': 2}),
((), {'a': 1, 'b': 2, 'c': 3}),
])
def test_preprocess_on_method(self, args, kwargs):
decorators = [
preprocess(a=call(str), b=call(float), c=call(lambda x: x + 1)),
]
for decorator in decorators:
class Foo(object):
@decorator
def method(self, a, b, c=3):
return a, b, c
@classmethod
@decorator
def clsmeth(cls, a, b, c=3):
return a, b, c
self.assertEqual(Foo.clsmeth(*args, **kwargs), ('1', 2.0, 4))
self.assertEqual(Foo().method(*args, **kwargs), ('1', 2.0, 4))
def test_expect_types(self):
@expect_types(a=int, b=int)
def foo(a, b, c):
return a, b, c
self.assertEqual(foo(1, 2, 3), (1, 2, 3))
self.assertEqual(foo(1, 2, c=3), (1, 2, 3))
self.assertEqual(foo(1, b=2, c=3), (1, 2, 3))
self.assertEqual(foo(1, 2, c='3'), (1, 2, '3'))
for not_int in (str, float):
with self.assertRaises(TypeError) as e:
foo(not_int(1), 2, 3)
self.assertEqual(
e.exception.args[0],
"tests.utils.test_preprocess.foo() expected a value of type "
"int for argument 'a', but got {t} instead.".format(
t=not_int.__name__,
)
)
with self.assertRaises(TypeError):
foo(1, not_int(2), 3)
with self.assertRaises(TypeError):
foo(not_int(1), not_int(2), 3)
def test_expect_types_with_tuple(self):
@expect_types(a=(int, float))
def foo(a):
return a
self.assertEqual(foo(1), 1)
self.assertEqual(foo(1.0), 1.0)
with self.assertRaises(TypeError) as e:
foo('1')
expected_message = (
"tests.utils.test_preprocess.foo() expected a value of "
"type int or float for argument 'a', but got str instead."
)
self.assertEqual(e.exception.args[0], expected_message)
def test_expect_optional_types(self):
@expect_types(a=optional(int))
def foo(a=None):
return a
self.assertIs(foo(), None)
self.assertIs(foo(None), None)
self.assertIs(foo(a=None), None)
self.assertEqual(foo(1), 1)
self.assertEqual(foo(a=1), 1)
with self.assertRaises(TypeError) as e:
foo('1')
expected_message = (
"tests.utils.test_preprocess.foo() expected a value of "
"type int or NoneType for argument 'a', but got str instead."
)
self.assertEqual(e.exception.args[0], expected_message)
+289
View File
@@ -0,0 +1,289 @@
"""
Utilities for validating inputs to user-facing API functions.
"""
from textwrap import dedent
from functools import wraps
from inspect import getargspec
from uuid import uuid4
from six import iteritems, viewkeys, exec_
from toolz import valmap
NO_DEFAULT = object()
def expect_types(*_pos, **named):
"""
Preprocessing decorator that verifies inputs have expected types.
Usage
-----
>>> @expect_types(x=int, y=str)
... def foo(x, y):
... return x, y
...
>>> foo(2, '3')
(2, '3')
>>> foo(2.0, '3')
Traceback (most recent call last):
...
TypeError: foo() expected an argument of type 'int' for argument 'x', but got float instead. # noqa
"""
if _pos:
raise TypeError("expect_types() only takes keyword arguments.")
for name, type_ in iteritems(named):
if not isinstance(type_, (type, tuple)):
raise TypeError(
"expect_types() expected a type or tuple of types for "
"argument '{name}', but got {type_} instead.".format(
name=name, type_=type_,
)
)
return preprocess(**valmap(_expect_type, named))
def preprocess(*_unused, **processors):
"""
Decorator that applies pre-processors to the arguments of a function before
calling the function.
Parameters
----------
**processors : dict
Map from argument name -> processor function.
A processor function takes three arguments: (func, argname, argvalue).
`func` is the the function for which we're processing args.
`argname` is the name of the argument we're processing.
`argvalue` is the value of the argument we're processing.
Usage
-----
>>> def _ensure_tuple(func, argname, arg):
... if isinstance(arg, tuple):
... return argvalue
... try:
... return tuple(arg)
... except TypeError:
... raise TypeError(
... "%s() expected argument '%s' to"
... " be iterable, but got %s instead." % (
... func.__name__, argname, arg,
... )
... )
...
>>> @preprocess(arg=_ensure_tuple)
... def foo(arg):
... return arg
...
>>> foo([1, 2, 3])
(1, 2, 3)
>>> foo("a")
('a',)
>>> foo(2)
Traceback (most recent call last):
...
TypeError: foo() expected argument 'arg' to be iterable, but got 2 instead.
"""
if _unused:
raise TypeError("preprocess() doesn't accept positional arguments")
def _decorator(f):
args, varargs, varkw, defaults = argspec = getargspec(f)
if defaults is None:
defaults = ()
no_defaults = (NO_DEFAULT,) * (len(args) - len(defaults))
args_defaults = zip(args, no_defaults + defaults)
argset = set(args)
# These assumptions simplify the implementation significantly. If you
# really want to validate a *args/**kwargs function, you'll have to
# implement this here or do it yourself.
if varargs:
raise TypeError(
"Can't validate functions that take *args: %s" % argspec
)
if varkw:
raise TypeError(
"Can't validate functions that take **kwargs: %s" % argspec
)
# Arguments can be declared as tuples in Python 2.
if not all(isinstance(arg, str) for arg in args):
raise TypeError(
"Can't validate functions using tuple unpacking: %s" % argspec
)
# Ensure that all processors map to valid names.
bad_names = viewkeys(processors) - argset
if bad_names:
raise TypeError(
"Got processors for unknown arguments: %s." % bad_names
)
return _build_preprocessed_function(f, processors, args_defaults)
return _decorator
def call(f):
"""
Wrap a function in a processor that calls `f` on the argument before
passing it along.
Useful for creating simple arguments to the `@preprocess` decorator.
Parameters
----------
f : function
Function accepting a single argument and returning a replacement.
Usage
-----
>>> @preprocess(x=call(lambda x: x + 1))
... def foo(x):
... return x
...
>>> foo(1)
2
"""
@wraps(f)
def processor(func, argname, arg):
return f(arg)
return processor
def _qualified_name(obj):
"""
Return the fully-qualified name (ignoring inner classes) of a type.
"""
module = obj.__module__
if module in ('__builtin__', '__main__', 'builtins'):
return obj.__name__
return '.'.join([module, obj.__name__])
def _expect_type(type_):
"""
Factory for type-checking functions that work the @preprocess decorator.
"""
# Slightly different messages for type and tuple of types.
_template = (
"{{funcname}}() expected a value of type {type_or_types} "
"for argument '{{argname}}', but got {{actual}} instead."
)
if isinstance(type_, tuple):
template = _template.format(
type_or_types=' or '.join(map(_qualified_name, type_))
)
else:
template = _template.format(type_or_types=_qualified_name(type_))
def _check_type(func, argname, argvalue):
if not isinstance(argvalue, type_):
raise TypeError(
template.format(
funcname=_qualified_name(func),
argname=argname,
actual=_qualified_name(type(argvalue)),
)
)
return argvalue
return _check_type
def optional(type_):
"""
Helper for use with `expect_types` when an input can be `type_` or `None`.
Returns an object such that both `None` and instances of `type_` pass
checks of the form `isinstance(obj, optional(type_))`.
Parameters
----------
type_ : type
Type for which to produce an option.
Examples
--------
>>> isinstance({}, optional(dict))
True
>>> isinstance(None, optional(dict))
True
>>> isinstance(1, optional(dict))
False
"""
return (type_, type(None))
def _build_preprocessed_function(func, processors, args_defaults):
"""
Build a preprocessed function with the same signature as `func`.
Uses `exec` internally to build a function that actually has the same
signature as `func.
"""
format_kwargs = {'func_name': func.__name__}
def mangle(name):
return 'a' + uuid4().hex + name
format_kwargs['mangled_func'] = mangled_funcname = mangle(func.__name__)
def make_processor_assignment(arg, processor_name):
template = "{arg} = {processor}({func}, '{arg}', {arg})"
return template.format(
arg=arg,
processor=processor_name,
func=mangled_funcname,
)
exec_globals = {mangled_funcname: func, 'wraps': wraps}
defaults_seen = 0
default_name_template = 'a' + uuid4().hex + '_%d'
signature = []
call_args = []
assignments = []
for arg, default in args_defaults:
if default is NO_DEFAULT:
signature.append(arg)
else:
default_name = default_name_template % defaults_seen
exec_globals[default_name] = default
signature.append('='.join([arg, default_name]))
defaults_seen += 1
if arg in processors:
procname = mangle('_processor_' + arg)
exec_globals[procname] = processors[arg]
assignments.append(make_processor_assignment(arg, procname))
call_args.append(arg + '=' + arg)
exec_str = dedent(
"""
@wraps({wrapped_funcname})
def {func_name}({signature}):
{assignments}
return {wrapped_funcname}({call_args})
"""
).format(
func_name=func.__name__,
signature=', '.join(signature),
assignments='\n '.join(assignments),
wrapped_funcname=mangled_funcname,
call_args=', '.join(call_args),
)
compiled = compile(
exec_str,
func.__code__.co_filename,
mode='exec',
)
exec_locals = {}
exec_(compiled, exec_globals, exec_locals)
return exec_locals[func.__name__]