diff --git a/etc/requirements_dev.txt b/etc/requirements_dev.txt index c131f4a5..d9015774 100644 --- a/etc/requirements_dev.txt +++ b/etc/requirements_dev.txt @@ -33,3 +33,6 @@ wsgiref==0.1.2 # misc pycleaner==1.1.1 -e git://github.com/pydata/vbench.git#egg=vbench + +# profilers +yappi==0.62 diff --git a/zipline/optimize/factory.py b/zipline/optimize/factory.py index 153f1d57..cb0de069 100644 --- a/zipline/optimize/factory.py +++ b/zipline/optimize/factory.py @@ -4,6 +4,7 @@ Factory functions to prepare useful data for optimize tests. Author: Thomas V. Wiecki (thomas.wiecki@gmail.com), 2012 """ from datetime import timedelta +import pandas as pd import zipline.protocol as zp @@ -43,7 +44,7 @@ def create_updown_trade_source(sid, trade_count, trading_environment, base_price price = base_price-amplitude/2. cur = trading_environment.first_open - one_day = timedelta(days = 1) + one_day = timedelta(minutes = 1)#days = 1) #create iterator to cycle through up and down phases change = cycle([1,-1]) diff --git a/zipline/profile/prof_yappi.py b/zipline/profile/prof_yappi.py new file mode 100644 index 00000000..58878fe8 --- /dev/null +++ b/zipline/profile/prof_yappi.py @@ -0,0 +1,93 @@ +from __future__ import division + +import logging +from zipline.core.devsimulator import AddressAllocator +import zipline.finance +from zipline.optimize.factory import create_predictable_zipline +import pandas as pd +import numpy as np +import os.path + +def convert_ystats(ystats): + """Convert yappi.get_stats().func_stats object to pandas + DataFrame. + + """ + func_names = [os.path.split(item[0])[-1] for item in ystats] + ncall = [float(item[1]) for item in ystats] + ttot = [float(item[2]) for item in ystats] + tsub = [float(item[3]) for item in ystats] + tavg = [float(item[4]) for item in ystats] + stats = pd.DataFrame({'ncall': ncall, 'ttot': ttot, 'tsub': tsub, 'tavg': tavg}, index=func_names) + + return stats + + +allocator = AddressAllocator(1000) + +config = { 'allocator' :allocator, + 'sid' :133, + 'trade_count' :5000, + 'amplitude' :30, + 'base_price' :50 + } + +LOGGER = logging.getLogger('ZiplineLogger') + +import yappi + +def gen_single_stats(func, *args, **kwargs): + """Profile func(*args, **kwargs) with yappi. + + Returns DataFrame of statistics. + """ + yappi.start() + func(*args, **kwargs) + yappi.stop() + return convert_ystats(yappi.get_stats().func_stats) + +def gen_avg_stats(func, runs=1, *args, **kwargs): + """Profile func(*args, **kwargs) with yappi. Runs multiple times at computes the average. + + Returns DataFrame of average statistics. + """ + + avg_stats = pd.concat([gen_single_stats() for i in range(runs)], keys=range(runs)) + grouped = avg_stats.groupby(level=1) + + return grouped.aggregate(np.mean) + +def run_updown(fname='before_stats.csv'): + """Profile a zipline with the UpDown tradesource (does not require + DB access) and the buy/sell algorithm (requires no + computation). + + Saves output statics under fname. + + Returns Dataframe of statistics. + """ + zp, _ = create_predictable_zipline(config, simulate=False) + stats = gen_single_stats(zp.simulate, blocking=True) + stats.to_csv(fname) + + return stats + +def calc_speedup(before='before_stats.csv', after='after_stats.csv'): + """Calculate speed-up between two previously run and saved + statistics under filename before and after. + + Prints DataFrame of top 30 speed-ups and top 30 slow-downs. + + """ + old = pd.DataFrame.from_csv(before) + new = pd.DataFrame.from_csv(after) + speed_up = old / new + speed_up = speed_up.fillna(1) + speed_up = speed_up.sort(column='ttot', ascending=False) + slow_down = speed_up.sort(column='ttot', ascending=True) + print speed_up[:30] + print slow_down[:30] + +if __name__ == '__main__': + run_updown() + yappi.print_stats(sort_type=yappi.SORTTYPE_TTOT) \ No newline at end of file