Files
simpeg/SimPEG/ObjFunction.py
T

282 lines
8.8 KiB
Python

from SimPEG import Utils, np, sp
class BaseObjFunction(object):
"""docstring for BaseObjFunction"""
__metaclass__ = Utils.Save.Savable
beta = Utils.ParameterProperty('beta', default=None, doc='Regularization trade-off parameter')
debug = False #: Print debugging information
counter = None #: Set this to a SimPEG.Utils.Counter() if you want to count things
name = 'BaseObjFunction' #: Name of the objective function
u_current = None #: The most current evaluated field
m_current = None #: The most current model
@property
def parent(self):
"""This is the parent of the objective function."""
return getattr(self,'_parent',None)
@parent.setter
def parent(self, p):
if getattr(self,'_parent',None) is not None:
print 'Objective function has switched to a new parent!'
self._parent = p
def __init__(self, data, reg, **kwargs):
Utils.setKwargs(self, **kwargs)
self.data = data
self.reg = reg
@Utils.callHooks('startup')
def startup(self, m0):
"""startup(m0)
Called when inversion is first starting.
"""
if self.debug: print 'Calling ObjFunction.startup'
if not hasattr(self.reg, '_mref'):
print 'Regularization has not set mref. SimPEG will set it to m0.'
self.reg.mref = m0
self.phi_d = np.nan
self.phi_m = np.nan
self.m_current = m0
@Utils.timeIt
def evalFunction(self, m, return_g=True, return_H=True):
"""evalFunction(m, return_g=True, return_H=True)
"""
self.u_current = None
self.m_current = m
u = self.data.prob.field(m)
self.u_current = u
phi_d = self.dataObj(m, u=u)
phi_m = self.reg.modelObj(m)
self.dpred = self.data.dpred(m, u=u) # This is a cheap matrix vector calculation.
self.phi_d, self.phi_d_last = phi_d, self.phi_d
self.phi_m, self.phi_m_last = phi_m, self.phi_m
f = phi_d + self.beta * phi_m
out = (f,)
if return_g:
phi_dDeriv = self.dataObjDeriv(m, u=u)
phi_mDeriv = self.reg.modelObjDeriv(m)
g = phi_dDeriv + self.beta * phi_mDeriv
out += (g,)
if return_H:
def H_fun(v):
phi_d2Deriv = self.dataObj2Deriv(m, v, u=u)
phi_m2Deriv = self.reg.modelObj2Deriv()*v
return phi_d2Deriv + self.beta * phi_m2Deriv
operator = sp.linalg.LinearOperator( (m.size, m.size), H_fun, dtype=m.dtype )
out += (operator,)
return out if len(out) > 1 else out[0]
@Utils.timeIt
def dataObj(self, m, u=None):
"""dataObj(m, u=None)
:param numpy.array m: geophysical model
:param numpy.array u: fields
:rtype: float
:return: data misfit
The data misfit using an l_2 norm is:
.. math::
\mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2
Where P is a projection matrix that brings the field on the full domain to the data measurement locations;
u is the field of interest; d_obs is the observed data; and W is the weighting matrix.
"""
# TODO: ensure that this is a data is vector and Wd is a matrix.
R = self.data.residualWeighted(m, u=u)
return 0.5*np.vdot(R, R)
@Utils.timeIt
def dataObjDeriv(self, m, u=None):
"""dataObjDeriv(m, u=None)
:param numpy.array m: geophysical model
:param numpy.array u: fields
:rtype: numpy.array
:return: data misfit derivative
The data misfit using an l_2 norm is:
.. math::
\mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2
If the field, u, is provided, the calculation of the data is fast:
.. math::
\mathbf{d}_\\text{pred} = \mathbf{Pu(m)}
\mathbf{R} = \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs})
Where P is a projection matrix that brings the field on the full domain to the data measurement locations;
u is the field of interest; d_obs is the observed data; and W is the weighting matrix.
The derivative of this, with respect to the model, is:
.. math::
\\frac{\partial \mu_\\text{data}}{\partial \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ R}
"""
if u is None: u = self.data.prob.field(m)
R = self.data.residualWeighted(m, u=u)
dmisfit = self.data.prob.Jt(m, self.data.Wd * R, u=u)
return dmisfit
@Utils.timeIt
def dataObj2Deriv(self, m, v, u=None):
"""dataObj2Deriv(m, v, u=None)
:param numpy.array m: geophysical model
:param numpy.array v: vector to multiply
:param numpy.array u: fields
:rtype: numpy.array
:return: data misfit derivative
The data misfit using an l_2 norm is:
.. math::
\mu_\\text{data} = {1\over 2}\left| \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs}) \\right|_2^2
If the field, u, is provided, the calculation of the data is fast:
.. math::
\mathbf{d}_\\text{pred} = \mathbf{Pu(m)}
\mathbf{R} = \mathbf{W} \circ (\mathbf{d}_\\text{pred} - \mathbf{d}_\\text{obs})
Where P is a projection matrix that brings the field on the full domain to the data measurement locations;
u is the field of interest; d_obs is the observed data; and W is the weighting matrix.
The derivative of this, with respect to the model, is:
.. math::
\\frac{\partial \mu_\\text{data}}{\partial \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ R}
\\frac{\partial^2 \mu_\\text{data}}{\partial^2 \mathbf{m}} = \mathbf{J}^\\top \mathbf{W \circ W J}
"""
if u is None: u = self.data.prob.field(m)
R = self.data.residualWeighted(m, u=u)
# TODO: abstract to different norms a little cleaner.
# \/ it goes here. in l2 it is the identity.
dmisfit = self.data.prob.Jt_approx(m, self.data.Wd * self.data.Wd * self.data.prob.J_approx(m, v, u=u), u=u)
return dmisfit
class BetaSchedule(Utils.Parameter):
"""BetaSchedule"""
beta0 = 'guess' #: The initial Beta (regularization parameter)
beta0_ratio = 0.1 #: When beta0 is set to 'guess', estimateBeta0 is used with this ratio
coolingFactor = 2.
coolingRate = 3
beta = None #: Beta parameter
def __init__(self, *args, **kwargs):
Utils.Parameter.__init__(self, *args, **kwargs)
Utils.setKwargs(self, **kwargs)
def initialize(self):
self.beta = self.beta0
@Utils.requires('parent')
def nextIter(self):
if self.beta is 'guess':
if self.debug: print 'BetaSchedule is estimating Beta0.'
self.beta = self.estimateBeta0()
opt = self.parent.parent.opt
if opt._iter > 0 and opt._iter % self.coolingRate == 0:
if self.debug: print 'BetaSchedule is cooling Beta. Iteration: %d' % opt._iter
self.beta /= self.coolingFactor
return self.beta
@Utils.requires('parent')
def estimateBeta0(self):
"""estimateBeta0(u=None)
The initial beta is calculated by comparing the estimated
eigenvalues of JtJ and WtW.
To estimate the eigenvector of **A**, we will use one iteration
of the *Power Method*:
.. math::
\mathbf{x_1 = A x_0}
Given this (very course) approximation of the eigenvector,
we can use the *Rayleigh quotient* to approximate the largest eigenvalue.
.. math::
\lambda_0 = \\frac{\mathbf{x^\\top A x}}{\mathbf{x^\\top x}}
We will approximate the largest eigenvalue for both JtJ and WtW, and
use some ratio of the quotient to estimate beta0.
.. math::
\\beta_0 = \gamma \\frac{\mathbf{x^\\top J^\\top J x}}{\mathbf{x^\\top W^\\top W x}}
:param numpy.array u: fields
:param float ratio: desired ratio of the eigenvalues, default is 0.1
:rtype: float
:return: beta0
"""
objFunc = self.parent
data = objFunc.data
m = objFunc.m_current
u = objFunc.u_current
if u is None:
u = data.prob.field(m)
x0 = np.random.rand(*m.shape)
t = x0.dot(objFunc.dataObj2Deriv(m,x0,u=u))
b = x0.dot(objFunc.reg.modelObj2Deriv()*x0)
return self.beta0_ratio*(t/b)