mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-30 18:31:39 +08:00
59c8e371a2
Adds the data bundle concept which makes it easy for users to register loading functions to build out minute and daily data along with an assets db and adjustments db. By default we have provided a `quandl` bundle which pulls from the public domain WIKI dataset. Users may register new bundles by decorating an ingest function with `zipline.data.bundles.register(<name>)`. This also provides a `yahoo_equities` function for creating an ingestion function that will load a static set of assets from yahoo. The cli is now structured as a couple of subcommands and has been changed to `python -m zipline`. The old behavior of `run_algo.py` has been moved to the `run` subcommand. This is almost entirely the same except that it now takes the name of the data bundle to use, defaulting to `quandl`. The next subcommand is `ingest` which takes the name of a data bundle to ingest. This will run the loading machinery and write the data to a specified location that `run` can find. There is also a `clean` subcommand which deletes the data that was written with `ingest`. Extensions have also been added to zipline. This is an experimental feature where users can provide an extra set of python files to run at the start of the process. These can be used to configure aspects of zipline. Right now the only thing that is supported in an extension file is the registration of a new data bundle.
224 lines
4.8 KiB
Python
224 lines
4.8 KiB
Python
"""
|
|
Canonical path locations for zipline data.
|
|
|
|
Paths are rooted at $ZIPLINE_ROOT if that environment variable is set.
|
|
Otherwise default to expanduser(~/.zipline)
|
|
"""
|
|
from errno import EEXIST
|
|
import os
|
|
from os.path import exists, expanduser, join
|
|
|
|
import pandas as pd
|
|
|
|
|
|
def hidden(path):
|
|
"""Check if a path is hidden.
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
A filepath.
|
|
"""
|
|
return path.startswith('.')
|
|
|
|
|
|
def ensure_directory(path):
|
|
"""
|
|
Ensure that a directory named "path" exists.
|
|
"""
|
|
try:
|
|
os.makedirs(path)
|
|
except OSError as exc:
|
|
if exc.errno == EEXIST and os.path.isdir(path):
|
|
return
|
|
raise
|
|
|
|
|
|
def ensure_directory_containing(path):
|
|
"""
|
|
Ensure that the directory containing `path` exists.
|
|
|
|
This is just a convenience wrapper for doing::
|
|
|
|
ensure_directory(os.path.dirname(path))
|
|
"""
|
|
ensure_directory(os.path.dirname(path))
|
|
|
|
|
|
def last_modified_time(path):
|
|
"""
|
|
Get the last modified time of path as a Timestamp.
|
|
"""
|
|
return pd.Timestamp(os.path.getmtime(path), unit='s', tz='UTC')
|
|
|
|
|
|
def modified_since(path, dt):
|
|
"""
|
|
Check whether `path` was modified since `dt`.
|
|
|
|
Returns False if path doesn't exist.
|
|
|
|
Parameters
|
|
----------
|
|
path : str
|
|
Path to the file to be checked.
|
|
dt : pd.Timestamp
|
|
The date against which to compare last_modified_time(path).
|
|
|
|
Returns
|
|
-------
|
|
was_modified : bool
|
|
Will be ``False`` if path doesn't exists, or if its last modified date
|
|
is earlier than or equal to `dt`
|
|
"""
|
|
return exists(path) and last_modified_time(path) > dt
|
|
|
|
|
|
def zipline_root(environ=None):
|
|
"""
|
|
Get the root directory for all zipline-managed files.
|
|
|
|
For testing purposes, this accepts a dictionary to interpret as the os
|
|
environment.
|
|
|
|
Parameters
|
|
----------
|
|
environ : dict, optional
|
|
A dict to interpret as the os environment.
|
|
|
|
Returns
|
|
-------
|
|
root : string
|
|
Path to the zipline root dir.
|
|
"""
|
|
if environ is None:
|
|
environ = os.environ
|
|
|
|
root = environ.get('ZIPLINE_ROOT', None)
|
|
if root is None:
|
|
root = expanduser('~/.zipline')
|
|
|
|
return root
|
|
|
|
|
|
def zipline_path(paths, environ=None):
|
|
"""
|
|
Get a path relative to the zipline root.
|
|
|
|
Parameters
|
|
----------
|
|
paths : list[str]
|
|
List of requested path pieces.
|
|
environ : dict, optional
|
|
An environment dict to forward to zipline_root.
|
|
|
|
Returns
|
|
-------
|
|
newpath : str
|
|
The requested path joined with the zipline root.
|
|
"""
|
|
return join(zipline_root(environ=environ), *paths)
|
|
|
|
|
|
def default_extension(environ=None):
|
|
"""
|
|
Get the path to the default zipline extension file.
|
|
|
|
Parameters
|
|
----------
|
|
environ : dict, optional
|
|
An environment dict to forwart to zipline_root.
|
|
|
|
Returns
|
|
-------
|
|
default_extension_path : str
|
|
The file path to the default zipline extension file.
|
|
"""
|
|
return zipline_path(['extension.py'], environ=environ)
|
|
|
|
|
|
def data_root(environ=None):
|
|
"""
|
|
The root directory for zipline data files.
|
|
|
|
Parameters
|
|
----------
|
|
environ : dict, optional
|
|
An environment dict to forward to zipline_root.
|
|
|
|
Returns
|
|
-------
|
|
data_root : str
|
|
The zipline data root.
|
|
"""
|
|
return zipline_path(['data'], environ=environ)
|
|
|
|
|
|
def ensure_data_root(environ=None):
|
|
"""
|
|
Ensure that the data root exists.
|
|
"""
|
|
ensure_directory(data_root(environ=environ))
|
|
|
|
|
|
def data_path(paths, environ=None):
|
|
"""
|
|
Get a path relative to the zipline data directory.
|
|
|
|
Parameters
|
|
----------
|
|
paths : iterable[str]
|
|
List of requested path pieces.
|
|
environ : dict, optional
|
|
An environment dict to forward to zipline_root.
|
|
|
|
Returns
|
|
-------
|
|
newpath : str
|
|
The requested path joined with the zipline data root.
|
|
"""
|
|
return zipline_path(['data'] + list(paths), environ=environ)
|
|
|
|
|
|
def cache_root(environ=None):
|
|
"""
|
|
The root directory for zipline cache files.
|
|
|
|
Parameters
|
|
----------
|
|
environ : dict, optional
|
|
An environment dict to forward to zipline_root.
|
|
|
|
Returns
|
|
-------
|
|
cache_root : str
|
|
The zipline cache root.
|
|
"""
|
|
return zipline_path(['cache'], environ=environ)
|
|
|
|
|
|
def ensure_cache_root(environ=None):
|
|
"""
|
|
Ensure that the data root exists.
|
|
"""
|
|
ensure_directory(cache_root(environ=environ))
|
|
|
|
|
|
def cache_path(paths, environ=None):
|
|
"""
|
|
Get a path relative to the zipline cache directory.
|
|
|
|
Parameters
|
|
----------
|
|
paths : iterable[str]
|
|
List of requested path pieces.
|
|
environ : dict, optional
|
|
An environment dict to forward to zipline_root.
|
|
|
|
Returns
|
|
-------
|
|
newpath : str
|
|
The requested path joined with the zipline cache root.
|
|
"""
|
|
return zipline_path(['cache'] + list(paths), environ=environ)
|