from SimPEG import Utils, np, sp class BaseObjFunction(object): """docstring for BaseObjFunction""" __metaclass__ = Utils.Save.Savable beta = Utils.ParameterProperty('beta', default=None, doc='Regularization trade-off parameter') debug = False #: Print debugging information counter = None #: Set this to a SimPEG.Utils.Counter() if you want to count things name = 'BaseObjFunction' #: Name of the objective function u_current = None #: The most current evaluated field m_current = None #: The most current model @property def parent(self): """This is the parent of the objective function.""" return getattr(self,'_parent',None) @parent.setter def parent(self, p): if getattr(self,'_parent',None) is not None: print 'Objective function has switched to a new parent!' self._parent = p def __init__(self, data, reg, **kwargs): Utils.setKwargs(self, **kwargs) self.data = data self.reg = reg @Utils.callHooks('startup') def startup(self, m0): """startup(m0) Called when inversion is first starting. """ if self.debug: print 'Calling ObjFunction.startup' if not hasattr(self.reg, '_mref'): print 'Regularization has not set mref. SimPEG will set it to m0.' self.reg.mref = m0 self.phi_d = np.nan self.phi_m = np.nan self.m_current = m0 @Utils.timeIt def evalFunction(self, m, return_g=True, return_H=True): """evalFunction(m, return_g=True, return_H=True) """ self.u_current = None self.m_current = m u = self.data.prob.field(m) self.u_current = u phi_d = self.dataObj(m, u=u) phi_m = self.reg.modelObj(m) self.dpred = self.data.dpred(m, u=u) # This is a cheap matrix vector calculation. self.phi_d, self.phi_d_last = phi_d, self.phi_d self.phi_m, self.phi_m_last = phi_m, self.phi_m f = phi_d + self.beta * phi_m out = (f,) if return_g: phi_dDeriv = self.dataObjDeriv(m, u=u) phi_mDeriv = self.reg.modelObjDeriv(m) g = phi_dDeriv + self.beta * phi_mDeriv out += (g,) if return_H: def H_fun(v): phi_d2Deriv = self.dataObj2Deriv(m, v, u=u) phi_m2Deriv = self.reg.modelObj2Deriv()*v return phi_d2Deriv + self.beta * phi_m2Deriv operator = sp.linalg.LinearOperator( (m.size, m.size), H_fun, dtype=m.dtype ) out += (operator,) return out if len(out) > 1 else out[0] @Utils.timeIt def dataObj(self, m, u=None): """dataObj(m, u=None) :param numpy.array m: geophysical model :param numpy.array u: fields :rtype: float :return: data misfit The data misfit using an l_2 norm is: .. math:: \mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2 Where P is a projection matrix that brings the field on the full domain to the data measurement locations; u is the field of interest; d_obs is the observed data; and W is the weighting matrix. """ # TODO: ensure that this is a data is vector and Wd is a matrix. R = self.data.residualWeighted(m, u=u) return 0.5*np.vdot(R, R) @Utils.timeIt def dataObjDeriv(self, m, u=None): """dataObjDeriv(m, u=None) :param numpy.array m: geophysical model :param numpy.array u: fields :rtype: numpy.array :return: data misfit derivative The data misfit using an l_2 norm is: .. math:: \mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2 If the field, u, is provided, the calculation of the data is fast: .. math:: \mathbf{d}_\\text{pred} = \mathbf{Pu(m)} \mathbf{R} = \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) Where P is a projection matrix that brings the field on the full domain to the data measurement locations; u is the field of interest; d_obs is the observed data; and W is the weighting matrix. The derivative of this, with respect to the model, is: .. math:: \\frac{\partial \mu_\\text{data}}{\partial \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ R} """ if u is None: u = self.data.prob.field(m) R = self.data.residualWeighted(m, u=u) dmisfit = self.data.prob.Jt(m, self.data.Wd * R, u=u) return dmisfit @Utils.timeIt def dataObj2Deriv(self, m, v, u=None): """dataObj2Deriv(m, v, u=None) :param numpy.array m: geophysical model :param numpy.array v: vector to multiply :param numpy.array u: fields :rtype: numpy.array :return: data misfit derivative The data misfit using an l_2 norm is: .. math:: \mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2 If the field, u, is provided, the calculation of the data is fast: .. math:: \mathbf{d}_\\text{pred} = \mathbf{Pu(m)} \mathbf{R} = \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) Where P is a projection matrix that brings the field on the full domain to the data measurement locations; u is the field of interest; d_obs is the observed data; and W is the weighting matrix. The derivative of this, with respect to the model, is: .. math:: \\frac{\partial \mu_\\text{data}}{\partial \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ R} \\frac{\partial^2 \mu_\\text{data}}{\partial^2 \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ W J} """ if u is None: u = self.data.prob.field(m) R = self.data.residualWeighted(m, u=u) # TODO: abstract to different norms a little cleaner. # \/ it goes here. in l2 it is the identity. dmisfit = self.data.prob.Jt_approx(m, self.data.Wd * self.data.Wd * self.data.prob.J_approx(m, v, u=u), u=u) return dmisfit class BetaSchedule(Utils.Parameter): """BetaSchedule""" beta0 = 'guess' #: The initial Beta (regularization parameter) beta0_ratio = 0.1 #: When beta0 is set to 'guess', estimateBeta0 is used with this ratio coolingFactor = 2. coolingRate = 3 beta = None #: Beta parameter def __init__(self, *args, **kwargs): Utils.Parameter.__init__(self, *args, **kwargs) Utils.setKwargs(self, **kwargs) def initialize(self): self.beta = self.beta0 @Utils.requires('parent') def nextIter(self): if self.beta is 'guess': if self.debug: print 'BetaSchedule is estimating Beta0.' self.beta = self.estimateBeta0() opt = self.parent.parent.opt if opt._iter > 0 and opt._iter % self.coolingRate == 0: if self.debug: print 'BetaSchedule is cooling Beta. Iteration: %d' % opt._iter self.beta /= self.coolingFactor return self.beta @Utils.requires('parent') def estimateBeta0(self): """estimateBeta0(u=None) The initial beta is calculated by comparing the estimated eigenvalues of JtJ and WtW. To estimate the eigenvector of **A**, we will use one iteration of the *Power Method*: .. math:: \mathbf{x_1 = A x_0} Given this (very course) approximation of the eigenvector, we can use the *Rayleigh quotient* to approximate the largest eigenvalue. .. math:: \lambda_0 = \\frac{\mathbf{x^\\top A x}}{\mathbf{x^\\top x}} We will approximate the largest eigenvalue for both JtJ and WtW, and use some ratio of the quotient to estimate beta0. .. math:: \\beta_0 = \gamma \\frac{\mathbf{x^\\top J^\\top J x}}{\mathbf{x^\\top W^\\top W x}} :param numpy.array u: fields :param float ratio: desired ratio of the eigenvalues, default is 0.1 :rtype: float :return: beta0 """ objFunc = self.parent data = objFunc.data m = objFunc.m_current u = objFunc.u_current if u is None: u = data.prob.field(m) x0 = np.random.rand(*m.shape) t = x0.dot(objFunc.dataObj2Deriv(m,x0,u=u)) b = x0.dot(objFunc.reg.modelObj2Deriv()*x0) return self.beta0_ratio*(t/b)