import Utils, numpy as np, scipy.sparse as sp
from Utils.SolverUtils import *
norm = np.linalg.norm


__all__ = ['Minimize', 'Remember', 'SteepestDescent', 'BFGS', 'GaussNewton', 'InexactGaussNewton', 'ProjectedGradient', 'NewtonRoot', 'StoppingCriteria', 'IterationPrinters']

SolverICG = SolverWrapI(sp.linalg.cg, checkAccuracy=False)

class StoppingCriteria(object):
    """docstring for StoppingCriteria"""

    iteration   = { "str": "%d : maxIter   =     %3d    <= iter          =    %3d",
                    "left": lambda M: M.maxIter, "right": lambda M: M.iter,
                    "stopType": "critical"}

    iterationLS = { "str": "%d : maxIterLS =     %3d    <= iterLS          =    %3d",
                    "left": lambda M: M.maxIterLS, "right": lambda M: M.iterLS,
                    "stopType": "critical"}

    armijoGoldstein = { "str": "%d :    ft     = %1.4e <= alp*descent     = %1.4e",
                        "left": lambda M: M._LS_ft, "right": lambda M: M.f + M.LSreduction * M._LS_descent,
                        "stopType": "optimal"}

    tolerance_f = { "str": "%d : |fc-fOld| = %1.4e <= tolF*(1+|f0|) = %1.4e",
                    "left":     lambda M: 1 if M.iter==0 else abs(M.f-M.f_last), "right":    lambda M: 0 if M.iter==0 else M.tolF*(1+abs(M.f0)),
                    "stopType": "optimal"}

    moving_x = { "str": "%d : |xc-x_last| = %1.4e <= tolX*(1+|x0|) = %1.4e",
                 "left": lambda M: 1 if M.iter==0 else norm(M.xc-M.x_last), "right": lambda M: 0 if M.iter==0 else M.tolX*(1+norm(M.x0)),
                 "stopType": "optimal"}

    tolerance_g = { "str": "%d : |proj(x-g)-x|    = %1.4e <= tolG          = %1.4e",
                   "left": lambda M: norm(M.projection(M.xc - M.g) - M.xc), "right": lambda M: M.tolG,
                   "stopType": "optimal"}

    norm_g = { "str": "%d : |proj(x-g)-x|    = %1.4e <= 1e3*eps       = %1.4e",
               "left": lambda M: norm(M.projection(M.xc - M.g) - M.xc), "right": lambda M: 1e3*M.eps,
               "stopType": "critical"}

    bindingSet = { "str": "%d : probSize  =    %3d   <= bindingSet      =    %3d",
                   "left": lambda M: M.xc.size, "right": lambda M: np.sum(M.bindingSet(M.xc)),
                   "stopType": "critical"}

    bindingSet_LS = { "str": "%d : probSize  =    %3d   <= bindingSet      =    %3d",
                      "left": lambda M: M._LS_xt.size, "right": lambda M: np.sum(M.bindingSet(M._LS_xt)),
                      "stopType": "critical"}

    phi_d_target_Minimize = { "str": "%d : phi_d  = %1.4e <= phi_d_target  = %1.4e ",
                              "left": lambda M: M.parent.phi_d, "right": lambda M: M.parent.phi_d_target,
                              "stopType": "critical"}

    phi_d_target_Inversion = { "str": "%d : phi_d  = %1.4e <= phi_d_target  = %1.4e ",
                               "left": lambda I: I.phi_d, "right": lambda I: I.phi_d_target,
                               "stopType": "critical"}


class IterationPrinters(object):
    """docstring for IterationPrinters"""

    iteration = {"title": "#", "value": lambda M: M.iter, "width": 5, "format": "%3d"}
    f = {"title": "f", "value": lambda M: M.f, "width": 10, "format": "%1.2e"}
    norm_g = {"title": "|proj(x-g)-x|", "value": lambda M: norm(M.projection(M.xc - M.g) - M.xc), "width": 15, "format": "%1.2e"}
    totalLS = {"title": "LS", "value": lambda M: M.iterLS, "width": 5, "format": "%d"}

    iterationLS = {"title": "#", "value": lambda M: (M.iter, M.iterLS), "width": 5, "format": "%3d.%d"}
    LS_ft = {"title": "ft", "value": lambda M: M._LS_ft, "width": 10, "format": "%1.2e"}
    LS_t = {"title": "t", "value": lambda M: M._LS_t, "width": 10, "format": "%0.5f"}
    LS_armijoGoldstein = {"title": "f + alp*g.T*p", "value": lambda M: M.f + M.LSreduction*M._LS_descent, "width": 16, "format": "%1.2e"}

    itType = {"title": "itType", "value": lambda M: M._itType, "width": 8, "format": "%s"}
    aSet = {"title": "aSet", "value": lambda M: np.sum(M.activeSet(M.xc)), "width": 8, "format": "%d"}
    bSet = {"title": "bSet", "value": lambda M: np.sum(M.bindingSet(M.xc)), "width": 8, "format": "%d"}
    comment = {"title": "Comment", "value": lambda M: M.comment, "width": 12, "format": "%s"}

    beta = {"title": "beta", "value": lambda M: M.parent.beta, "width": 10, "format":   "%1.2e"}
    phi_d = {"title": "phi_d", "value": lambda M: M.parent.phi_d, "width": 10, "format":   "%1.2e"}
    phi_m = {"title": "phi_m", "value": lambda M: M.parent.phi_m, "width": 10, "format":   "%1.2e"}


class Minimize(object):
    """
        Minimize is a general class for derivative based optimization.
    """

    __metaclass__ = Utils.SimPEGMetaClass

    name = "General Optimization Algorithm"  #: The name of the optimization algorithm

    maxIter = 20       #: Maximum number of iterations
    maxIterLS = 10     #: Maximum number of iterations for the line-search
    maxStep = np.inf   #: Maximum step possible, used in scaling before the line-search.
    LSreduction = 1e-4 #: Expected decrease in the line-search
    LSshorten = 0.5    #: Line-search step is shortened by this amount each time.
    tolF = 1e-1        #: Tolerance on function value decrease
    tolX = 1e-1        #: Tolerance on norm(x) movement
    tolG = 1e-1        #: Tolerance on gradient norm
    eps = 1e-5         #: Small value

    stopNextIteration = False #: Stops the optimization program nicely.

    debug   = False    #: Print debugging information
    debugLS = False    #: Print debugging information for the line-search

    comment = ''       #: Used by some functions to indicate what is going on in the algorithm
    counter = None     #: Set this to a SimPEG.Utils.Counter() if you want to count things
    parent = None      #: This is the parent of the optimization routine.

    def __init__(self, **kwargs):
        self.stoppers = [StoppingCriteria.tolerance_f, StoppingCriteria.moving_x, StoppingCriteria.tolerance_g, StoppingCriteria.norm_g, StoppingCriteria.iteration]
        self.stoppersLS = [StoppingCriteria.armijoGoldstein, StoppingCriteria.iterationLS]

        self.printers = [IterationPrinters.iteration, IterationPrinters.f, IterationPrinters.norm_g, IterationPrinters.totalLS]
        self.printersLS = [IterationPrinters.iterationLS, IterationPrinters.LS_ft, IterationPrinters.LS_t, IterationPrinters.LS_armijoGoldstein]

        Utils.setKwargs(self, **kwargs)

    @property
    def callback(self):
        return getattr(self, '_callback', None)
    @callback.setter
    def callback(self, value):
        if self.callback is not None:
            print 'The callback on the %s Optimization was replaced.' % self.__name__
        self._callback = value


    @Utils.timeIt
    def minimize(self, evalFunction, x0):
        """minimize(evalFunction, x0)

        Minimizes the function (evalFunction) starting at the location x0.

        :param def evalFunction: function handle that evaluates: f, g, H = F(x)
        :param numpy.ndarray x0: starting location
        :rtype: numpy.ndarray
        :return: x, the last iterate of the optimization algorithm

        evalFunction is a function handle::

            (f[, g][, H]) = evalFunction(x, return_g=False, return_H=False )

            def evalFunction(x, return_g=False, return_H=False):
                out = (f,)
                if return_g:
                    out += (g,)
                if return_H:
                    out += (H,)
                return out if len(out) > 1 else out[0]


        The algorithm for general minimization is as follows::

            startup(x0)
            printInit()

            while True:
                doStartIteration()
                f, g, H = evalFunction(xc)
                printIter()
                if stoppingCriteria(): break
                p = findSearchDirection()
                p = scaleSearchDirection(p)
                xt, passLS = modifySearchDirection(p)
                if not passLS:
                    xt, caught = modifySearchDirectionBreak(p)
                    if not caught: return xc
                doEndIteration(xt)

            printDone()
            finish()
            return xc
        """
        self.evalFunction = evalFunction
        self.startup(x0)
        self.printInit()

        while True:
            self.doStartIteration()
            self.f, self.g, self.H = evalFunction(self.xc, return_g=True, return_H=True)
            self.printIter()
            if self.stoppingCriteria(): break
            self.searchDirection = self.findSearchDirection()
            del self.H #: Doing this saves memory, as it is not needed in the rest of the computations.
            p = self.scaleSearchDirection(self.searchDirection)
            xt, passLS = self.modifySearchDirection(p)
            if not passLS:
                xt, caught = self.modifySearchDirectionBreak(p)
                if not caught: return self.xc
            self.doEndIteration(xt)
            if self.stopNextIteration: break

        self.printDone()
        self.finish()

        return self.xc

    @Utils.callHooks('startup')
    def startup(self, x0):
        """
            **startup** is called at the start of any new minimize call.

            This will set::

                x0 = x0
                xc = x0
                iter = iterLS = 0

            :param numpy.ndarray x0: initial x
            :rtype: None
            :return: None
        """

        self.iter = 0
        self.iterLS = 0
        self.stopNextIteration = False

        x0 = self.projection(x0)  # ensure that we start of feasible.
        self.x0 = x0
        self.xc = x0
        self.f_last = np.nan
        self.x_last = x0

    @Utils.count
    @Utils.callHooks('doStartIteration')
    def doStartIteration(self):
        """doStartIteration()

            **doStartIteration** is called at the start of each minimize iteration.

            :rtype: None
            :return: None
        """
        pass


    def printInit(self, inLS=False):
        """
            **printInit** is called at the beginning of the optimization routine.

            If there is a parent object, printInit will check for a
            parent.printInit function and call that.

        """
        pad = ' '*10 if inLS else ''
        name = self.name if not inLS else self.nameLS
        Utils.printTitles(self, self.printers if not inLS else self.printersLS, name, pad)

    @Utils.callHooks('printIter')
    def printIter(self, inLS=False):
        """
            **printIter** is called directly after function evaluations.

            If there is a parent object, printIter will check for a
            parent.printIter function and call that.

        """
        pad = ' '*10 if inLS else ''
        Utils.printLine(self, self.printers if not inLS else self.printersLS, pad=pad)

    def printDone(self, inLS=False):
        """
            **printDone** is called at the end of the optimization routine.

            If there is a parent object, printDone will check for a
            parent.printDone function and call that.

        """
        pad = ' '*10 if inLS else ''
        stop, done = (' STOP! ', ' DONE! ') if not inLS else ('----------------', ' End Linesearch ')
        stoppers = self.stoppers if not inLS else self.stoppersLS
        Utils.printStoppers(self, stoppers, pad='', stop=stop, done=done)


    @Utils.callHooks('finish')
    def finish(self):
        """finish()

            **finish** is called at the end of the optimization.

            :rtype: None
            :return: None

        """
        pass

    def stoppingCriteria(self, inLS=False):
        if self.iter == 0:
            self.f0 = self.f
            self.g0 = self.g
        return Utils.checkStoppers(self, self.stoppers if not inLS else self.stoppersLS)

    @Utils.timeIt
    @Utils.callHooks('projection')
    def projection(self, p):
        """projection(p)

            projects the search direction.

            by default, no projection is applied.

            :param numpy.ndarray p: searchDirection
            :rtype: numpy.ndarray
            :return: p, projected search direction
        """
        return p

    @Utils.timeIt
    def findSearchDirection(self):
        """findSearchDirection()

            **findSearchDirection** should return an approximation of:

            .. math::

                H p = - g

            Where you are solving for the search direction, p

            The default is:

            .. math::

                H = I

                p = - g

            And corresponds to SteepestDescent.

            The latest function evaluations are present in::

                self.f, self.g, self.H

            :rtype: numpy.ndarray
            :return: p, Search Direction
        """
        return -self.g

    @Utils.count
    def scaleSearchDirection(self, p):
        """scaleSearchDirection(p)

            **scaleSearchDirection** should scale the search direction if appropriate.

            Set the parameter **maxStep** in the minimize object, to scale back the gradient to a maximum size.

            :param numpy.ndarray p: searchDirection
            :rtype: numpy.ndarray
            :return: p, Scaled Search Direction
        """

        if self.maxStep < np.abs(p.max()):
            p = self.maxStep*p/np.abs(p.max())
        return p

    nameLS = "Armijo linesearch" #: The line-search name

    @Utils.timeIt
    def modifySearchDirection(self, p):
        """modifySearchDirection(p)

            **modifySearchDirection** changes the search direction based on some sort of linesearch or trust-region criteria.

            By default, an Armijo backtracking linesearch is preformed with the following parameters:

                * maxIterLS, the maximum number of linesearch iterations
                * LSreduction, the expected reduction expected, default: 1e-4
                * LSshorten, how much the step is reduced, default: 0.5

            If the linesearch is completed, and a descent direction is found, passLS is returned as True.

            Else, a modifySearchDirectionBreak call is preformed.

            :param numpy.ndarray p: searchDirection
            :rtype: numpy.ndarray,bool
            :return: (xt, passLS)
        """
        # Projected Armijo linesearch
        self._LS_t = 1
        self.iterLS = 0
        while self.iterLS < self.maxIterLS:
            self._LS_xt      = self.projection(self.xc + self._LS_t*p)
            self._LS_ft      = self.evalFunction(self._LS_xt, return_g=False, return_H=False)
            self._LS_descent = np.inner(self.g, self._LS_xt - self.xc)  # this takes into account multiplying by t, but is important for projection.
            if self.stoppingCriteria(inLS=True): break
            self.iterLS += 1
            self._LS_t = self.LSshorten*self._LS_t
            if self.debugLS:
                if self.iterLS == 1: self.printInit(inLS=True)
                self.printIter(inLS=True)

        if self.debugLS and self.iterLS > 0: self.printDone(inLS=True)

        return self._LS_xt, self.iterLS < self.maxIterLS

    @Utils.count
    def modifySearchDirectionBreak(self, p):
        """modifySearchDirectionBreak(p)

            Code is called if modifySearchDirection fails
            to find a descent direction.

            The search direction is passed as input and
            this function must pass back both a new searchDirection,
            and if the searchDirection break has been caught.

            By default, no additional work is done, and the
            evalFunction returns a False indicating the break was not caught.

            :param numpy.ndarray p: searchDirection
            :rtype: numpy.ndarray,bool
            :return: (xt, breakCaught)
        """
        self.printDone(inLS=True)
        print 'The linesearch got broken. Boo.'
        return p, False

    @Utils.count
    @Utils.callHooks('doEndIteration')
    def doEndIteration(self, xt):
        """doEndIteration(xt)

            **doEndIteration** is called at the end of each minimize iteration.

            By default, function values and x locations are shuffled to store 1 past iteration in memory.

            self.xc must be updated in this code.

            :param numpy.ndarray xt: tested new iterate that ensures a descent direction.
            :rtype: None
            :return: None
        """
        # store old values
        self.f_last = self.f
        self.x_last, self.xc = self.xc, xt
        self.iter += 1
        if self.debug: self.printDone()

        if self.callback is not None:
            self.callback(xt)


    def save(self, group):
        group.setArray('searchDirection', self.searchDirection)

        if getattr(self,'parent',None) is None:
            group.setArray('x', self.xc)
        else: # Assume inversion is the parent
            group.attrs['phi_d'] = self.parent.phi_d
            group.attrs['phi_m'] = self.parent.phi_m
            group.attrs['beta'] = self.parent.beta
            group.setArray('m', self.xc)
            group.setArray('dpred', self.parent.dpred)


class Remember(object):
    """
        This mixin remembers all the things you tend to forget.

        You can remember parameters directly, naming the str in Minimize,
        or pass a tuple with the name and the function that takes Minimize.

        For Example::

            opt.remember('f',('norm_g', lambda M: np.linalg.norm(M.g)))

            opt.minimize(evalFunction, x0)

            opt.recall('f')

        The param name (str) can also be located in the parent (if no conflicts),
        and it will be looked up by default.
    """

    _rememberThese = []

    def remember(self, *args):
        self._rememberThese = args

    def recall(self, param):
        assert param in self._rememberList, "You didn't tell me to remember "+param+", you gotta tell me what to remember!"
        return self._rememberList[param]

    def _startupRemember(self, x0):
        self._rememberList = {}
        for param in self._rememberThese:
            if type(param) is str:
                self._rememberList[param] = []
            elif type(param) is tuple:
                self._rememberList[param[0]] = []

    def _doEndIterationRemember(self, *args):
        for param in self._rememberThese:
            if type(param) is str:
                if self.debug: print 'Remember is remembering: ' + param
                val = getattr(self, param, None)
                if val is None and getattr(self, 'parent', None) is not None:
                    # Look to the parent for the param if not found here.
                    val = getattr(self.parent, param, None)
                self._rememberList[param].append( val )
            elif type(param) is tuple:
                if self.debug: print 'Remember is remembering: ' + param[0]
                self._rememberList[param[0]].append( param[1](self) )


class ProjectedGradient(Minimize, Remember):
    name = 'Projected Gradient'

    maxIterCG = 5
    tolCG = 1e-1

    lower = -np.inf
    upper = np.inf

    def __init__(self,**kwargs):
        super(ProjectedGradient, self).__init__(**kwargs)

        self.stoppers.append(StoppingCriteria.bindingSet)
        self.stoppersLS.append(StoppingCriteria.bindingSet_LS)

        self.printers.extend([ IterationPrinters.itType, IterationPrinters.aSet, IterationPrinters.bSet, IterationPrinters.comment ])


    def _startup(self, x0):
        # ensure bound vectors are the same size as the model
        if type(self.lower) is not np.ndarray:
            self.lower = np.ones_like(x0)*self.lower
        if type(self.upper) is not np.ndarray:
            self.upper = np.ones_like(x0)*self.upper

        self.explorePG = True
        self.exploreCG = False
        self.stopDoingPG = False

        self._itType = 'SD'
        self.comment = ''

        self.aSet_prev = self.activeSet(x0)

    @Utils.count
    def projection(self, x):
        """projection(x)

            Make sure we are feasible.

        """
        return np.median(np.c_[self.lower,x,self.upper],axis=1)

    @Utils.count
    def activeSet(self, x):
        """activeSet(x)

            If we are on a bound

        """
        return np.logical_or(x == self.lower, x == self.upper)

    @Utils.count
    def inactiveSet(self, x):
        """inactiveSet(x)

            The free variables.

        """
        return np.logical_not(self.activeSet(x))

    @Utils.count
    def bindingSet(self, x):
        """bindingSet(x)

            If we are on a bound and the negative gradient points away from the feasible set.

            Optimality condition. (Satisfies Kuhn-Tucker) MoreToraldo91

        """
        bind_up  = np.logical_and(x == self.lower, self.g >= 0)
        bind_low = np.logical_and(x == self.upper, self.g <= 0)
        return np.logical_or(bind_up, bind_low)

    @Utils.timeIt
    def findSearchDirection(self):
        """findSearchDirection()

            Finds the search direction based on either CG or steepest descent.
        """
        self.aSet_prev = self.activeSet(self.xc)
        allBoundsAreActive = sum(self.aSet_prev) == self.xc.size

        if self.debug: print 'findSearchDirection: stopDoingPG: ', self.stopDoingPG
        if self.debug: print 'findSearchDirection: explorePG: ', self.explorePG
        if self.debug: print 'findSearchDirection: exploreCG: ', self.exploreCG
        if self.debug: print 'findSearchDirection: aSet', np.sum(self.activeSet(self.xc))
        if self.debug: print 'findSearchDirection: bSet', np.sum(self.bindingSet(self.xc))
        if self.debug: print 'findSearchDirection: allBoundsAreActive: ', allBoundsAreActive

        if self.explorePG or not self.exploreCG or allBoundsAreActive:
            if self.debug: print 'findSearchDirection.PG: doingPG'
            self._itType = 'SD'
            p = -self.g
        else:
            if self.debug: print 'findSearchDirection.CG: doingCG'
            # Reset the max decrease each time you do a CG iteration
            self.f_decrease_max = -np.inf

            self._itType = '.CG.'

            iSet  = self.inactiveSet(self.xc)  # The inactive set (free variables)
            bSet = self.bindingSet(self.xc)
            shape = (self.xc.size, np.sum(iSet))
            v = np.ones(shape[1])
            i = np.where(iSet)[0]
            j = np.arange(shape[1])
            if self.debug: print 'findSearchDirection.CG: Z.shape', shape
            Z = sp.csr_matrix((v, (i, j)), shape=shape)

            def reduceHess(v):
                # Z is tall and skinny
                return Z.T*(self.H*(Z*v))
            operator = sp.linalg.LinearOperator( (shape[1], shape[1]), reduceHess, dtype=self.xc.dtype )
            p, info  = sp.linalg.cg(operator, -Z.T*self.g, tol=self.tolCG, maxiter=self.maxIterCG)
            p = Z*p  #  bring up to full size
            # aSet_after = self.activeSet(self.xc+p)
        return p

    @Utils.timeIt
    def _doEndIteration_ProjectedGradient(self, xt):
        """_doEndIteration_ProjectedGradient(xt)"""
        aSet = self.activeSet(xt)
        bSet = self.bindingSet(xt)

        self.explorePG = not np.all(aSet == self.aSet_prev) # explore proximal gradient
        self.exploreCG = np.all(aSet == bSet) # explore conjugate gradient

        f_current_decrease = self.f_last - self.f
        self.comment = ''
        if self.iter < 1:
            # Note that this is reset on every CG iteration.
            self.f_decrease_max = -np.inf
        else:
            self.f_decrease_max = max(self.f_decrease_max, f_current_decrease)
            self.stopDoingPG = f_current_decrease < 0.25 * self.f_decrease_max
            if self.stopDoingPG:
                self.comment = 'Stop SD'
                self.explorePG = False
                self.exploreCG = True
        # implement 3.8, MoreToraldo91
        #self.eta_2 * max_decrease where max decrease
        # if true go to CG
        # don't do too many steps of PG in a row.

        if self.debug: print 'doEndIteration.ProjGrad, f_current_decrease: ', f_current_decrease
        if self.debug: print 'doEndIteration.ProjGrad, f_decrease_max: ', self.f_decrease_max
        if self.debug: print 'doEndIteration.ProjGrad, stopDoingSD: ', self.stopDoingPG


class BFGS(Minimize, Remember):
    name  = 'BFGS'
    nbfgs = 10

    def __init__(self, **kwargs):
        Minimize.__init__(self, **kwargs)

    @property
    def bfgsH0(self):
        """
            Approximate Hessian used in preconditioning the problem.

            Must be a SimPEG.Solver
        """
        if getattr(self,'_bfgsH0',None) is None:
            self._bfgsH0 = SolverDiag(sp.identity(self.xc.size))
        return self._bfgsH0

    @bfgsH0.setter
    def bfgsH0(self, value):
        self._bfgsH0 = value

    def _startup_BFGS(self,x0):
        self._bfgscnt = -1
        self._bfgsY   = np.zeros((x0.size, self.nbfgs))
        self._bfgsS   = np.zeros((x0.size, self.nbfgs))
        if not np.any([p is IterationPrinters.comment for p in self.printers]):
            self.printers.append(IterationPrinters.comment)

    def bfgs(self, d):
        n  = self._bfgscnt
        nn = ktop = min(self._bfgsS.shape[1],n)
        return self.bfgsrec(ktop,n,nn,self._bfgsS,self._bfgsY,d)

    def bfgsrec(self,k,n,nn,S,Y,d):
        """BFGS recursion"""
        if k < 0:
            d = self.bfgsH0 * d   #Assume that bfgsH0 is a SimPEG.Solver
        else:
            khat    = 0 if nn is 0 else np.mod(n-nn+k,nn)
            gamma   = np.vdot(S[:,khat],d)/np.vdot(Y[:,khat],S[:,khat])
            d       = d - gamma*Y[:,khat]
            d       = self.bfgsrec(k-1,n,nn,S,Y,d)
            d       = d + (gamma - np.vdot(Y[:,khat],d)/np.vdot(Y[:,khat],S[:,khat]))*S[:,khat]
        return d

    def findSearchDirection(self):
        return self.bfgs(-self.g)

    def _doEndIteration_BFGS(self, xt):
        if self.iter is 0:
            self.g_last = self.g
            return

        yy = self.g - self.g_last;
        ss = self.xc - xt;
        self.g_last = self.g

        if yy.dot(ss) > 0:
            self._bfgscnt += 1
            ktop = np.mod(self._bfgscnt,self.nbfgs)
            self._bfgsY[:,ktop] = yy
            self._bfgsS[:,ktop] = ss
            self.comment = ''
        else:
            self.comment = 'Skip BFGS'


class GaussNewton(Minimize, Remember):
    name = 'Gauss Newton'

    def __init__(self, **kwargs):
        Minimize.__init__(self, **kwargs)

    @Utils.timeIt
    def findSearchDirection(self):
        return Solver(self.H) * (-self.g)


class InexactGaussNewton(BFGS, Minimize, Remember):
    """
        Minimizes using CG as the inexact solver of

        .. math::

            \mathbf{H p = -g}

        By default BFGS is used as the preconditioner.

        Use *nbfgs* to set the memory limitation of BFGS.

        To set the initial H0 to be used in BFGS, set *bfgsH0* to be a SimPEG.Solver

    """

    def __init__(self, **kwargs):
        Minimize.__init__(self, **kwargs)

    name = 'Inexact Gauss Newton'

    maxIterCG = 5
    tolCG = 1e-1

    @property
    def approxHinv(self):
        """
            The approximate Hessian inverse is used to precondition CG.

            Default uses BFGS, with an initial H0 of *bfgsH0*.

            Must be a scipy.sparse.linalg.LinearOperator
        """
        _approxHinv = getattr(self,'_approxHinv',None)
        if _approxHinv is None:
            M = sp.linalg.LinearOperator( (self.xc.size, self.xc.size), self.bfgs, dtype=self.xc.dtype )
            return M
        return _approxHinv
    @approxHinv.setter
    def approxHinv(self, value):
        self._approxHinv = value

    @Utils.timeIt
    def findSearchDirection(self):
        Hinv = SolverICG(self.H, M=self.approxHinv, tol=self.tolCG, maxiter=self.maxIterCG)
        p = Hinv * (-self.g)
        return p


class SteepestDescent(Minimize, Remember):
    name = 'Steepest Descent'

    def __init__(self, **kwargs):
        Minimize.__init__(self, **kwargs)

    @Utils.timeIt
    def findSearchDirection(self):
        return -self.g


class NewtonRoot(object):
    """
        Newton Method - Root Finding

        root = newtonRoot(fun,x);

        Where fun is the function that returns the function value as well as the
        gradient.

        For iterative solving of dh = -J\\r, use O.solveTol = TOL. For direct
        solves, use SOLVETOL = 0 (default)

        Rowan Cockett
        16-May-2013 16:29:51
        University of British Columbia
        rcockett@eos.ubc.ca

    """

    tol      = 1.000e-06
    maxIter  = 20
    stepDcr  = 0.5
    maxLS    = 30
    comments = False
    doLS     = True

    Solver     = Solver
    solverOpts = {}

    def __init__(self, **kwargs):
        Utils.setKwargs(self, **kwargs)

    def root(self, fun, x):
        """root(fun, x)

        Function Should have the form::

            def evalFunction(x, return_g=False):
                    out = (f,)
                    if return_g:
                        out += (g,)
                    return out if len(out) > 1 else out[0]

        """
        if self.comments: print 'Newton Method:\n'

        self.iter = 0
        while True:

            r, J = fun(x, return_g=True)

            Jinv = self.Solver(J, **self.solverOpts)
            dh   = - (Jinv * r)

            muLS = 1.
            LScnt  = 1
            xt = x + dh
            rt = fun(xt, return_g=False)

            if self.comments and self.doLS: print '\tLinesearch:\n'
            # Enter Linesearch
            while True and self.doLS:
                if self.comments: print '\t\tResid: %e\n'%norm(rt)
                if norm(rt) <= norm(r) or norm(rt) < self.tol:
                    break

                muLS = muLS*self.stepDcr
                LScnt = LScnt + 1
                print '.'
                if LScnt > self.maxLS:
                    print 'Newton Method: Line search break.'
                    return None
                xt = x + muLS*dh
                rt = fun(xt, return_g=False)

            x = xt
            self.iter += 1
            if norm(rt) < self.tol:
                break
            if self.iter > self.maxIter:
                print 'NewtonRoot stopped by maxIters (%d). norm: %4.4e' % (self.maxIter, norm(rt))
                break

        return x

class ProjectedGNCG(BFGS, Minimize, Remember):

    def __init__(self, **kwargs):
        Minimize.__init__(self, **kwargs)

    name = 'Projected GNCG'

    maxIterCG = 5
    tolCG = 1e-1

    stepOffBoundsFact = 0.1 # perturbation of the inactive set off the bounds

    lower = -np.inf
    upper = np.inf

    def _startup(self, x0):
        # ensure bound vectors are the same size as the model
        if type(self.lower) is not np.ndarray:
            self.lower = np.ones_like(x0)*self.lower
        if type(self.upper) is not np.ndarray:
            self.upper = np.ones_like(x0)*self.upper


    @Utils.count
    def projection(self, x):
        """projection(x)

            Make sure we are feasible.

        """
        return np.median(np.c_[self.lower,x,self.upper],axis=1)

    @Utils.count
    def activeSet(self, x):
        """activeSet(x)

            If we are on a bound

        """
        return np.logical_or(x <= self.lower, x >= self.upper)

    @property
    def approxHinv(self):
        """
            The approximate Hessian inverse is used to precondition CG.

            Default uses BFGS, with an initial H0 of *bfgsH0*.

            Must be a scipy.sparse.linalg.LinearOperator
        """
        _approxHinv = getattr(self,'_approxHinv',None)
        if _approxHinv is None:
            M = sp.linalg.LinearOperator( (self.xc.size, self.xc.size), self.bfgs, dtype=self.xc.dtype )
            return M
        return _approxHinv

    @approxHinv.setter
    def approxHinv(self, value):
        self._approxHinv = value

    @Utils.timeIt
    def findSearchDirection(self):

        """
            findSearchDirection()
            Finds the search direction based on either CG or steepest descent.
        """
        Active = self.activeSet(self.xc)
        temp = sum((np.ones_like(self.xc.size)-Active))
        allBoundsAreActive =  temp == self.xc.size


        if allBoundsAreActive:
            Hinv = SolverICG(self.H, M=self.approxHinv, tol=self.tolCG, maxiter=self.maxIterCG)
            p = Hinv * (-self.g)
            return p
        else:


            delx = np.zeros(self.g.size)
            resid = -(1-Active) * self.g

            # Begin CG iterations.
            cgiter = 0
            cgFlag = 0
            normResid0 = norm(resid)

            while cgFlag == 0:

                cgiter = cgiter + 1
                dc = (1-Active)*(self.approxHinv*resid)
                rd = np.dot(resid, dc)

                #  Compute conjugate direction pc.
                if cgiter == 1:
                    pc = dc
                else:
                    betak = rd / rdlast
                    pc = dc + betak * pc

                #  Form product Hessian*pc.
                Hp = self.H*pc
                Hp = (1-Active)*Hp

                #  Update delx and residual.
                alphak = rd / np.dot(pc, Hp)
                delx = delx + alphak*pc
                resid = resid - alphak*Hp
                rdlast = rd

                if np.logical_or(norm(resid)/normResid0 <= self.tolCG, cgiter == self.maxIterCG):
                    cgFlag = 1
                # End CG Iterations

            # Take a gradient step on the active cells if exist
            if temp != self.xc.size:

                rhs_a = (Active) * -self.g

                dm_i = max( abs( delx ) )
                dm_a = max( abs(rhs_a) )

                # perturb inactive set off of bounds so that they are included in the step
                delx = delx + self.stepOffBoundsFact * (rhs_a * dm_i / dm_a)


            # Only keep gradients going in the right direction on the active set
            indx = ((self.xc<=self.lower) & (delx < 0)) | ((self.xc>=self.upper) & (delx > 0))
            delx[indx] = 0.

            return delx