#
# Copyright 2015 Quantopian, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Tools to generate data sources.
"""
import numpy as np
import pandas as pd

from zipline.gens.utils import hash_args

from zipline.sources.data_source import DataSource


class DataFrameSource(DataSource):
    """
    Data source that yields from a pandas DataFrame.

    :Axis layout:
        * columns : sids
        * index : datetime

    :Note:
        Bars where the price is nan are filtered out.
    """

    def __init__(self, data, **kwargs):
        assert isinstance(data.index, pd.tseries.index.DatetimeIndex)
        # Only accept integer SIDs as the items of the DataFrame
        assert isinstance(data.columns, pd.Int64Index)
        # TODO is ffilling correct/necessary?
        # Forward fill prices
        self.data = data.fillna(method='ffill')
        # Unpack config dictionary with default values.
        self.start = kwargs.get('start', self.data.index[0])
        self.end = kwargs.get('end', self.data.index[-1])
        self.sids = self.data.columns

        # Hash_value for downstream sorting.
        self.arg_string = hash_args(data, **kwargs)

        self._raw_data = None

        self.started_sids = set()

    @property
    def mapping(self):
        return {
            'dt': (lambda x: x, 'dt'),
            'sid': (lambda x: x, 'sid'),
            'price': (float, 'price'),
            'volume': (int, 'volume'),
        }

    @property
    def instance_hash(self):
        return self.arg_string

    def raw_data_gen(self):
        for dt, series in self.data.iterrows():
            for sid, price in series.iteritems():
                # Skip SIDs that can not be forward filled
                if np.isnan(price) and \
                   sid not in self.started_sids:
                    continue
                self.started_sids.add(sid)

                event = {
                    'dt': dt,
                    'sid': sid,
                    'price': price,
                    # Just chose something large
                    # if no volume available.
                    'volume': 1e9,
                }
                yield event

    @property
    def raw_data(self):
        if not self._raw_data:
            self._raw_data = self.raw_data_gen()
        return self._raw_data


class DataPanelSource(DataSource):
    """
    Data source that yields from a pandas Panel.

    :Axis layout:
        * items : sids
        * major_axis : datetime
        * minor_axis : price, volume, ...

    :Note:
        Bars where the price is nan are filtered out.
    """

    def __init__(self, data, **kwargs):
        assert isinstance(data.major_axis, pd.tseries.index.DatetimeIndex)
        # Only accept integer SIDs as the items of the Panel
        assert isinstance(data.items, pd.Int64Index)
        # TODO is ffilling correct/necessary?
        # forward fill with volumes of 0
        self.data = data.fillna(value={'volume': 0})
        self.data = self.data.fillna(method='ffill')
        # Unpack config dictionary with default values.
        self.start = kwargs.get('start', self.data.major_axis[0])
        self.end = kwargs.get('end', self.data.major_axis[-1])
        self.sids = self.data.items

        # Hash_value for downstream sorting.
        self.arg_string = hash_args(data, **kwargs)

        self._raw_data = None

        self.started_sids = set()

    @property
    def mapping(self):
        mapping = {
            'dt': (lambda x: x, 'dt'),
            'sid': (lambda x: x, 'sid'),
            'price': (float, 'price'),
            'volume': (int, 'volume'),
        }

        # Add additional fields.
        for field_name in self.data.minor_axis:
            if field_name in ['price', 'volume', 'dt', 'sid']:
                continue
            mapping[field_name] = (lambda x: x, field_name)

        return mapping

    @property
    def instance_hash(self):
        return self.arg_string

    def raw_data_gen(self):
        for dt in self.data.major_axis:
            df = self.data.major_xs(dt)
            for sid, series in df.iteritems():
                # Skip SIDs that can not be forward filled
                if np.isnan(series['price']) and \
                   sid not in self.started_sids:
                    continue
                self.started_sids.add(sid)

                event = {
                    'dt': dt,
                    'sid': sid,
                }
                for field_name, value in series.iteritems():
                    event[field_name] = value

                yield event

    @property
    def raw_data(self):
        if not self._raw_data:
            self._raw_data = self.raw_data_gen()
        return self._raw_data