mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-28 17:00:51 +08:00
fce97176d6
* Updated cython build scripts * Updated setup.py to to install catalyst package * Updated momentum example to use catalyst package * catalyst executable now supports loading pipelines from multiple bundles
43 lines
1.4 KiB
Python
43 lines
1.4 KiB
Python
from toolz import partition_all
|
|
|
|
|
|
def compute_date_range_chunks(sessions, start_date, end_date, chunksize):
|
|
"""Compute the start and end dates to run a pipeline for.
|
|
|
|
Parameters
|
|
----------
|
|
sessions : DatetimeIndex
|
|
The available dates.
|
|
start_date : pd.Timestamp
|
|
The first date in the pipeline.
|
|
end_date : pd.Timestamp
|
|
The last date in the pipeline.
|
|
chunksize : int or None
|
|
The size of the chunks to run. Setting this to None returns one chunk.
|
|
|
|
Returns
|
|
-------
|
|
ranges : iterable[(np.datetime64, np.datetime64)]
|
|
A sequence of start and end dates to run the pipeline for.
|
|
"""
|
|
if start_date not in sessions:
|
|
raise KeyError("Start date %s is not found in calendar." %
|
|
(start_date.strftime("%Y-%m-%d"),))
|
|
if end_date not in sessions:
|
|
raise KeyError("End date %s is not found in calendar." %
|
|
(end_date.strftime("%Y-%m-%d"),))
|
|
if end_date < start_date:
|
|
raise ValueError("End date %s cannot precede start date %s." %
|
|
(end_date.strftime("%Y-%m-%d"),
|
|
start_date.strftime("%Y-%m-%d")))
|
|
|
|
if chunksize is None:
|
|
return [(start_date, end_date)]
|
|
|
|
start_ix, end_ix = sessions.slice_locs(start_date, end_date)
|
|
return (
|
|
(r[0], r[-1]) for r in partition_all(
|
|
chunksize, sessions[start_ix:end_ix]
|
|
)
|
|
)
|