mirror of
https://github.com/wassname/catalyst.git
synced 2026-06-29 11:49:18 +08:00
a4205a0500
The minute to session sampling reading was creating two DataFrame objects, the first to hold the minute data, and then a second returned by the `DataFrame.groupby` to sample down to sessions. Instead use the arrays returned by the minute readers `load_raw_arrays` and implement sampling logic which takes advantage that the minutes being passed start with the first minute of the first session and end with the last minute of the last session. On my machine this takes the tests in `test/test_continuous_futures` from ~4.0 to about ~0.1 seconds.
109 lines
3.3 KiB
Cython
109 lines
3.3 KiB
Cython
# Copyright 2016 Quantopian, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
from cython cimport boundscheck, wraparound
|
|
from numpy import finfo, float64, nan, isnan
|
|
from numpy cimport intp_t, float64_t, uint32_t
|
|
|
|
@boundscheck(False)
|
|
@wraparound(False)
|
|
cpdef void _minute_to_session_open(intp_t[:] close_locs,
|
|
float64_t[:] data,
|
|
float64_t[:] out):
|
|
cdef intp_t i, close_loc, loc = 0
|
|
cdef float64_t val
|
|
for i, close_loc in enumerate(close_locs):
|
|
val = data[loc]
|
|
while isnan(val) and loc <= close_loc:
|
|
loc += 1
|
|
val = data[loc]
|
|
out[i] = val
|
|
loc = close_loc + 1
|
|
|
|
|
|
@boundscheck(False)
|
|
@wraparound(False)
|
|
cpdef void _minute_to_session_high(intp_t[:] close_locs,
|
|
float64_t[:] data,
|
|
float64_t[:] out):
|
|
cdef intp_t i, close_loc, loc = 0
|
|
cdef float64_t val
|
|
for i, close_loc in enumerate(close_locs):
|
|
val = -1
|
|
while loc <= close_loc:
|
|
val = max(val, data[loc])
|
|
loc += 1
|
|
if val == -1:
|
|
val = nan
|
|
out[i] = val
|
|
loc = close_loc + 1
|
|
|
|
|
|
@boundscheck(False)
|
|
@wraparound(False)
|
|
cpdef void _minute_to_session_low(intp_t[:] close_locs,
|
|
float64_t[:] data,
|
|
float64_t[:] out):
|
|
cdef intp_t i, close_loc, loc = 0
|
|
cdef float64_t val
|
|
cdef float64_t max_float = finfo(float64).max
|
|
for i, close_loc in enumerate(close_locs):
|
|
val = max_float
|
|
while loc <= close_loc:
|
|
val = min(val, data[loc])
|
|
loc += 1
|
|
if val == max_float:
|
|
val = nan
|
|
out[i] = val
|
|
loc = close_loc + 1
|
|
|
|
|
|
@boundscheck(False)
|
|
@wraparound(False)
|
|
cpdef void _minute_to_session_close(intp_t[:] close_locs,
|
|
float64_t[:] data,
|
|
float64_t[:] out):
|
|
cdef intp_t i, close_loc, loc = 0
|
|
cdef float64_t val
|
|
loc = len(data) - 1
|
|
num_out = len(close_locs)
|
|
for j in range(num_out, 0, -1):
|
|
i = j - 1
|
|
if i > 0:
|
|
close_loc = close_locs[i - 1]
|
|
else:
|
|
close_loc = -1
|
|
val = data[loc]
|
|
while isnan(val) and loc > close_loc:
|
|
loc -= 1
|
|
val = data[loc]
|
|
out[i] = val
|
|
loc = close_loc
|
|
|
|
|
|
@boundscheck(False)
|
|
@wraparound(False)
|
|
cpdef void _minute_to_session_volume(intp_t[:] close_locs,
|
|
uint32_t[:] data,
|
|
uint32_t[:] out):
|
|
cdef intp_t i, close_loc, loc = 0
|
|
cdef uint32_t val
|
|
loc = 0
|
|
for i, close_loc in enumerate(close_locs):
|
|
val = 0
|
|
while loc <= close_loc:
|
|
val += data[loc]
|
|
loc += 1
|
|
out[i] = val
|
|
loc = close_loc + 1
|