Files
catalyst/zipline/utils/paths.py
T
Joe Jevnik 59c8e371a2 ENH: Updates the cli, data bundles and extensions.
Adds the data bundle concept which makes it easy for users to register
loading functions to build out minute and daily data along with an
assets db and adjustments db. By default we have provided a `quandl`
bundle which pulls from the public domain WIKI dataset. Users may
register new bundles by decorating an ingest function with
`zipline.data.bundles.register(<name>)`. This also provides a
`yahoo_equities` function for creating an ingestion function that will
load a static set of assets from yahoo.

The cli is now structured as a couple of subcommands and has been
changed to `python -m zipline`. The old behavior of `run_algo.py` has
been moved to the `run` subcommand. This is almost entirely the same
except that it now takes the name of the data bundle to use, defaulting
to `quandl`.

The next subcommand is `ingest` which takes the name of
a data bundle to ingest. This will run the loading machinery and write
the data to a specified location that `run` can find.

There is also a `clean` subcommand which deletes the data that was
written with `ingest`.

Extensions have also been added to zipline. This is an experimental
feature where users can provide an extra set of python files to run at
the start of the process. These can be used to configure aspects of
zipline. Right now the only thing that is supported in an extension file
is the registration of a new data bundle.
2016-05-03 18:38:24 -04:00

224 lines
4.8 KiB
Python

"""
Canonical path locations for zipline data.
Paths are rooted at $ZIPLINE_ROOT if that environment variable is set.
Otherwise default to expanduser(~/.zipline)
"""
from errno import EEXIST
import os
from os.path import exists, expanduser, join
import pandas as pd
def hidden(path):
"""Check if a path is hidden.
Parameters
----------
path : str
A filepath.
"""
return path.startswith('.')
def ensure_directory(path):
"""
Ensure that a directory named "path" exists.
"""
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == EEXIST and os.path.isdir(path):
return
raise
def ensure_directory_containing(path):
"""
Ensure that the directory containing `path` exists.
This is just a convenience wrapper for doing::
ensure_directory(os.path.dirname(path))
"""
ensure_directory(os.path.dirname(path))
def last_modified_time(path):
"""
Get the last modified time of path as a Timestamp.
"""
return pd.Timestamp(os.path.getmtime(path), unit='s', tz='UTC')
def modified_since(path, dt):
"""
Check whether `path` was modified since `dt`.
Returns False if path doesn't exist.
Parameters
----------
path : str
Path to the file to be checked.
dt : pd.Timestamp
The date against which to compare last_modified_time(path).
Returns
-------
was_modified : bool
Will be ``False`` if path doesn't exists, or if its last modified date
is earlier than or equal to `dt`
"""
return exists(path) and last_modified_time(path) > dt
def zipline_root(environ=None):
"""
Get the root directory for all zipline-managed files.
For testing purposes, this accepts a dictionary to interpret as the os
environment.
Parameters
----------
environ : dict, optional
A dict to interpret as the os environment.
Returns
-------
root : string
Path to the zipline root dir.
"""
if environ is None:
environ = os.environ
root = environ.get('ZIPLINE_ROOT', None)
if root is None:
root = expanduser('~/.zipline')
return root
def zipline_path(paths, environ=None):
"""
Get a path relative to the zipline root.
Parameters
----------
paths : list[str]
List of requested path pieces.
environ : dict, optional
An environment dict to forward to zipline_root.
Returns
-------
newpath : str
The requested path joined with the zipline root.
"""
return join(zipline_root(environ=environ), *paths)
def default_extension(environ=None):
"""
Get the path to the default zipline extension file.
Parameters
----------
environ : dict, optional
An environment dict to forwart to zipline_root.
Returns
-------
default_extension_path : str
The file path to the default zipline extension file.
"""
return zipline_path(['extension.py'], environ=environ)
def data_root(environ=None):
"""
The root directory for zipline data files.
Parameters
----------
environ : dict, optional
An environment dict to forward to zipline_root.
Returns
-------
data_root : str
The zipline data root.
"""
return zipline_path(['data'], environ=environ)
def ensure_data_root(environ=None):
"""
Ensure that the data root exists.
"""
ensure_directory(data_root(environ=environ))
def data_path(paths, environ=None):
"""
Get a path relative to the zipline data directory.
Parameters
----------
paths : iterable[str]
List of requested path pieces.
environ : dict, optional
An environment dict to forward to zipline_root.
Returns
-------
newpath : str
The requested path joined with the zipline data root.
"""
return zipline_path(['data'] + list(paths), environ=environ)
def cache_root(environ=None):
"""
The root directory for zipline cache files.
Parameters
----------
environ : dict, optional
An environment dict to forward to zipline_root.
Returns
-------
cache_root : str
The zipline cache root.
"""
return zipline_path(['cache'], environ=environ)
def ensure_cache_root(environ=None):
"""
Ensure that the data root exists.
"""
ensure_directory(cache_root(environ=environ))
def cache_path(paths, environ=None):
"""
Get a path relative to the zipline cache directory.
Parameters
----------
paths : iterable[str]
List of requested path pieces.
environ : dict, optional
An environment dict to forward to zipline_root.
Returns
-------
newpath : str
The requested path joined with the zipline cache root.
"""
return zipline_path(['cache'] + list(paths), environ=environ)