simpeg/SimPEG/Inverse/Inversion.py

import SimPEG
from SimPEG import Utils, sp, np
from Optimize import Remember
from BetaSchedule import Cooling
from SimPEG.Inverse import IterationPrinters, StoppingCriteria

class BaseInversion(object):
    """BaseInversion(prob, reg, opt, data, **kwargs)
    """

    __metaclass__ = Utils.Save.Savable

    maxIter = 1        #: Maximum number of iterations
    name = 'BaseInversion'

    debug   = False    #: Print debugging information

    comment = ''       #: Used by some functions to indicate what is going on in the algorithm
    counter = None     #: Set this to a SimPEG.Utils.Counter() if you want to count things

    beta0  = None      #: The initial Beta (regularization parameter)
    beta0_ratio = 0.1  #: When beta0 is set to None, estimateBeta0 is used with this ratio

    def __init__(self, prob, reg, opt, data, **kwargs):
        Utils.setKwargs(self, **kwargs)
        self.prob = prob
        self.reg = reg
        self.opt = opt
        self.data = data
        self.opt.parent = self

        self.stoppers = [StoppingCriteria.iteration]

        # Check if we have inserted printers into the optimization
        if IterationPrinters.phi_d not in self.opt.printers:
            self.opt.printers.insert(1,IterationPrinters.beta)
            self.opt.printers.insert(2,IterationPrinters.phi_d)
            self.opt.printers.insert(3,IterationPrinters.phi_m)

        if not hasattr(opt, '_bfgsH0') and hasattr(opt, 'bfgsH0'): # Check if it has been set by the user and the default is not being used.
            print 'Setting bfgsH0 to the inverse of the modelObj2Deriv. Done using direct methods.'
            opt.bfgsH0 = SimPEG.Solver(reg.modelObj2Deriv())


    @property
    def phi_d_target(self):
        """
        target for phi_d

        By default this is the number of data.

        Note that we do not set the target if it is None, but we return the default value.
        """
        if getattr(self, '_phi_d_target', None) is None:
            return self.data.dobs.size #
        return self._phi_d_target

    @phi_d_target.setter
    def phi_d_target(self, value):
        self._phi_d_target = value

    @Utils.timeIt
    def run(self, m0):
        """run(m0)

            Runs the inversion!

        """
        self.startup(m0)
        while True:
            self.doStartIteration()
            self.m = self.opt.minimize(self.evalFunction, self.m)
            self.doEndIteration()
            if self.stoppingCriteria(): break

        self.printDone()
        self.finish()

        return self.m

    @Utils.callHooks('startup')
    def startup(self, m0):
        """
            **startup** is called at the start of any new run call.

            :param numpy.ndarray x0: initial x
            :rtype: None
            :return: None
        """

        if not hasattr(self.reg, '_mref'):
            print 'Regularization has not set mref. SimPEG will set it to m0.'
            self.reg.mref = m0

        self.m = m0
        self._iter = 0
        self._beta = None
        self.phi_d_last = np.nan
        self.phi_m_last = np.nan

    @Utils.callHooks('doStartIteration')
    def doStartIteration(self):
        """
            **doStartIteration** is called at the end of each run iteration.

            :rtype: None
            :return: None
        """
        self._beta = self.getBeta()


    @Utils.callHooks('doEndIteration')
    def doEndIteration(self):
        """
            **doEndIteration** is called at the end of each run iteration.

            :rtype: None
            :return: None
        """
        # store old values
        self.phi_d_last = self.phi_d
        self.phi_m_last = self.phi_m
        self._iter += 1

    def getBeta(self):
        return self.beta0

    def estimateBeta0(self, u=None, ratio=0.1):
        """estimateBeta0(u=None, ratio=0.1)

            The initial beta is calculated by comparing the estimated
            eigenvalues of JtJ and WtW.

            To estimate the eigenvector of **A**, we will use one iteration
            of the *Power Method*:

            .. math::

                \mathbf{x_1 = A x_0}

            Given this (very course) approximation of the eigenvector,
            we can use the *Rayleigh quotient* to approximate the largest eigenvalue.

            .. math::

                \lambda_0 = \\frac{\mathbf{x^\\top A x}}{\mathbf{x^\\top x}}

            We will approximate the largest eigenvalue for both JtJ and WtW, and
            use some ratio of the quotient to estimate beta0.

            .. math::

                \\beta_0 = \gamma \\frac{\mathbf{x^\\top J^\\top J x}}{\mathbf{x^\\top W^\\top W x}}


            :param numpy.array u: fields
            :param float ratio: desired ratio of the eigenvalues, default is 0.1
            :rtype: float
            :return: beta0
        """
        if u is None:
            u = self.prob.field(self.m)

        x0 = np.random.rand(*self.m.shape)
        t = x0.dot(self.dataObj2Deriv(self.m,x0,u=u))
        b = x0.dot(self.reg.modelObj2Deriv()*x0)
        return ratio*(t/b)

    def stoppingCriteria(self):
        if self.debug: print 'checking stoppingCriteria'
        return Utils.checkStoppers(self, self.stoppers)


    def printDone(self):
        """
            **printDone** is called at the end of the inversion routine.

        """
        Utils.printStoppers(self, self.stoppers)

    @Utils.callHooks('finish')
    def finish(self):
        """finish()

            **finish** is called at the end of the optimization.
        """
        pass

    @Utils.timeIt
    def evalFunction(self, m, return_g=True, return_H=True):
        """evalFunction(m, return_g=True, return_H=True)


        """

        u = self.prob.field(m)

        if self._iter is 0 and self._beta is None:
            self._beta = self.beta0 = self.estimateBeta0(u=u,ratio=self.beta0_ratio)

        phi_d = self.dataObj(m, u)
        phi_m = self.reg.modelObj(m)

        self.dpred = self.prob.dpred(m, u=u)  # This is a cheap matrix vector calculation.
        self.phi_d = phi_d
        self.phi_m = phi_m

        f = phi_d + self._beta * phi_m

        out = (f,)
        if return_g:
            phi_dDeriv = self.dataObjDeriv(m, u=u)
            phi_mDeriv = self.reg.modelObjDeriv(m)

            g = phi_dDeriv + self._beta * phi_mDeriv
            out += (g,)

        if return_H:
            def H_fun(v):
                phi_d2Deriv = self.dataObj2Deriv(m, v, u=u)
                phi_m2Deriv = self.reg.modelObj2Deriv()*v

                return phi_d2Deriv + self._beta * phi_m2Deriv

            operator = sp.linalg.LinearOperator( (m.size, m.size), H_fun, dtype=m.dtype )
            out += (operator,)
        return out if len(out) > 1 else out[0]

    @Utils.timeIt
    def dataObj(self, m, u=None):
        """dataObj(m, u=None)

            :param numpy.array m: geophysical model
            :param numpy.array u: fields
            :rtype: float
            :return: data misfit

            The data misfit using an l_2 norm is:

            .. math::

                \mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2

            Where P is a projection matrix that brings the field on the full domain to the data measurement locations;
            u is the field of interest; d_obs is the observed data; and W is the weighting matrix.
        """
        # TODO: ensure that this is a data is vector and Wd is a matrix.
        R = self.Wd*self.prob.dataResidual(m, self.data, u=u)
        R = Utils.mkvc(R)
        return 0.5*np.vdot(R, R)

    @Utils.timeIt
    def dataObjDeriv(self, m, u=None):
        """dataObjDeriv(m, u=None)

            :param numpy.array m: geophysical model
            :param numpy.array u: fields
            :rtype: numpy.array
            :return: data misfit derivative

            The data misfit using an l_2 norm is:

            .. math::

                \mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2

            If the field, u, is provided, the calculation of the data is fast:

            .. math::

                \mathbf{d}_\\text{pred} = \mathbf{Pu(m)}

                \mathbf{R} = \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs})

            Where P is a projection matrix that brings the field on the full domain to the data measurement locations;
            u is the field of interest; d_obs is the observed data; and W is the weighting matrix.

            The derivative of this, with respect to the model, is:

            .. math::

                \\frac{\partial \mu_\\text{data}}{\partial \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ R}

        """
        if u is None:
            u = self.prob.field(m)

        R = self.Wd*self.prob.dataResidual(m, self.data, u=u)

        dmisfit = self.prob.Jt(m, self.Wd * R, u=u)

        return dmisfit

    @Utils.timeIt
    def dataObj2Deriv(self, m, v, u=None):
        """dataObj2Deriv(m, v, u=None)

            :param numpy.array m: geophysical model
            :param numpy.array v: vector to multiply
            :param numpy.array u: fields
            :rtype: numpy.array
            :return: data misfit derivative

            The data misfit using an l_2 norm is:

            .. math::

                \mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2

            If the field, u, is provided, the calculation of the data is fast:

            .. math::

                \mathbf{d}_\\text{pred} = \mathbf{Pu(m)}

                \mathbf{R} = \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs})

            Where P is a projection matrix that brings the field on the full domain to the data measurement locations;
            u is the field of interest; d_obs is the observed data; and W is the weighting matrix.

            The derivative of this, with respect to the model, is:

            .. math::

                \\frac{\partial \mu_\\text{data}}{\partial \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ R}

                \\frac{\partial^2 \mu_\\text{data}}{\partial^2 \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ W J}

        """
        if u is None:
            u = self.prob.field(m)

        R = self.Wd*self.prob.dataResidual(m, self.data, u=u)

        # TODO: abstract to different norms a little cleaner.
        #                                        \/ it goes here. in l2 it is the identity.
        dmisfit = self.prob.Jt_approx(m, self.Wd * self.Wd * self.prob.J_approx(m, v, u=u), u=u)

        return dmisfit

    def save(self, group):
        group.attrs['phi_d'] = self.phi_d
        group.attrs['phi_m'] = self.phi_m
        group.setArray('m', self.m)
        group.setArray('dpred', self.dpred)

class Inversion(Cooling, Remember, BaseInversion):

    maxIter = 10
    name = "SimPEG Inversion"

    def __init__(self, prob, reg, opt, data, **kwargs):
        BaseInversion.__init__(self, prob, reg, opt, data, **kwargs)

        self.stoppers.append(StoppingCriteria.phi_d_target_Inversion)

        if StoppingCriteria.phi_d_target_Minimize not in self.opt.stoppers:
            self.opt.stoppers.append(StoppingCriteria.phi_d_target_Minimize)

class TimeSteppingInversion(Remember, BaseInversion):
    """
        A slightly different view on regularization parameters,
        let Beta be viewed as 1/dt, and timestep by updating the
        reference model every optimization iteration.
    """
    maxIter = 1
    name = "Time-Stepping SimPEG Inversion"

    def __init__(self, prob, reg, opt, data, **kwargs):
        BaseInversion.__init__(self, prob, reg, opt, data, **kwargs)

        self.stoppers.append(StoppingCriteria.phi_d_target_Inversion)

        if StoppingCriteria.phi_d_target_Minimize not in self.opt.stoppers:
            self.opt.stoppers.append(StoppingCriteria.phi_d_target_Minimize)

    def _startup_TimeSteppingInversion(self, m0):

        def _doEndIteration_updateMref(self, xt):
            if self.debug: 'Updating the reference model.'
            self.parent.reg.mref = self.xc

        self.opt.hook(_doEndIteration_updateMref, overwrite=True)