Files
simpeg/SimPEG/Optimization.py
T
Rowan Cockett 8aa23c31de Allow moving off bounds in projected gradient.
The current implementation does not allow you to move off the
bounds (lower/upper) once you have gotten on them, this allows you
to move off of the bound.

Please note that more testing should be done to ensure that this does
not introduce oscillations into the optimization routine.
2016-02-16 21:53:31 -08:00

1008 lines
33 KiB
Python

import Utils, numpy as np, scipy.sparse as sp
from Utils.SolverUtils import *
norm = np.linalg.norm
__all__ = ['Minimize', 'Remember', 'SteepestDescent', 'BFGS', 'GaussNewton', 'InexactGaussNewton', 'ProjectedGradient', 'NewtonRoot', 'StoppingCriteria', 'IterationPrinters']
SolverICG = SolverWrapI(sp.linalg.cg, checkAccuracy=False)
class StoppingCriteria(object):
"""docstring for StoppingCriteria"""
iteration = { "str": "%d : maxIter = %3d <= iter = %3d",
"left": lambda M: M.maxIter, "right": lambda M: M.iter,
"stopType": "critical"}
iterationLS = { "str": "%d : maxIterLS = %3d <= iterLS = %3d",
"left": lambda M: M.maxIterLS, "right": lambda M: M.iterLS,
"stopType": "critical"}
armijoGoldstein = { "str": "%d : ft = %1.4e <= alp*descent = %1.4e",
"left": lambda M: M._LS_ft, "right": lambda M: M.f + M.LSreduction * M._LS_descent,
"stopType": "optimal"}
tolerance_f = { "str": "%d : |fc-fOld| = %1.4e <= tolF*(1+|f0|) = %1.4e",
"left": lambda M: 1 if M.iter==0 else abs(M.f-M.f_last), "right": lambda M: 0 if M.iter==0 else M.tolF*(1+abs(M.f0)),
"stopType": "optimal"}
moving_x = { "str": "%d : |xc-x_last| = %1.4e <= tolX*(1+|x0|) = %1.4e",
"left": lambda M: 1 if M.iter==0 else norm(M.xc-M.x_last), "right": lambda M: 0 if M.iter==0 else M.tolX*(1+norm(M.x0)),
"stopType": "optimal"}
tolerance_g = { "str": "%d : |proj(x-g)-x| = %1.4e <= tolG = %1.4e",
"left": lambda M: norm(M.projection(M.xc - M.g) - M.xc), "right": lambda M: M.tolG,
"stopType": "optimal"}
norm_g = { "str": "%d : |proj(x-g)-x| = %1.4e <= 1e3*eps = %1.4e",
"left": lambda M: norm(M.projection(M.xc - M.g) - M.xc), "right": lambda M: 1e3*M.eps,
"stopType": "critical"}
bindingSet = { "str": "%d : probSize = %3d <= bindingSet = %3d",
"left": lambda M: M.xc.size, "right": lambda M: np.sum(M.bindingSet(M.xc)),
"stopType": "critical"}
bindingSet_LS = { "str": "%d : probSize = %3d <= bindingSet = %3d",
"left": lambda M: M._LS_xt.size, "right": lambda M: np.sum(M.bindingSet(M._LS_xt)),
"stopType": "critical"}
phi_d_target_Minimize = { "str": "%d : phi_d = %1.4e <= phi_d_target = %1.4e ",
"left": lambda M: M.parent.phi_d, "right": lambda M: M.parent.phi_d_target,
"stopType": "critical"}
phi_d_target_Inversion = { "str": "%d : phi_d = %1.4e <= phi_d_target = %1.4e ",
"left": lambda I: I.phi_d, "right": lambda I: I.phi_d_target,
"stopType": "critical"}
class IterationPrinters(object):
"""docstring for IterationPrinters"""
iteration = {"title": "#", "value": lambda M: M.iter, "width": 5, "format": "%3d"}
f = {"title": "f", "value": lambda M: M.f, "width": 10, "format": "%1.2e"}
norm_g = {"title": "|proj(x-g)-x|", "value": lambda M: norm(M.projection(M.xc - M.g) - M.xc), "width": 15, "format": "%1.2e"}
totalLS = {"title": "LS", "value": lambda M: M.iterLS, "width": 5, "format": "%d"}
iterationLS = {"title": "#", "value": lambda M: (M.iter, M.iterLS), "width": 5, "format": "%3d.%d"}
LS_ft = {"title": "ft", "value": lambda M: M._LS_ft, "width": 10, "format": "%1.2e"}
LS_t = {"title": "t", "value": lambda M: M._LS_t, "width": 10, "format": "%0.5f"}
LS_armijoGoldstein = {"title": "f + alp*g.T*p", "value": lambda M: M.f + M.LSreduction*M._LS_descent, "width": 16, "format": "%1.2e"}
itType = {"title": "itType", "value": lambda M: M._itType, "width": 8, "format": "%s"}
aSet = {"title": "aSet", "value": lambda M: np.sum(M.activeSet(M.xc)), "width": 8, "format": "%d"}
bSet = {"title": "bSet", "value": lambda M: np.sum(M.bindingSet(M.xc)), "width": 8, "format": "%d"}
comment = {"title": "Comment", "value": lambda M: M.comment, "width": 12, "format": "%s"}
beta = {"title": "beta", "value": lambda M: M.parent.beta, "width": 10, "format": "%1.2e"}
phi_d = {"title": "phi_d", "value": lambda M: M.parent.phi_d, "width": 10, "format": "%1.2e"}
phi_m = {"title": "phi_m", "value": lambda M: M.parent.phi_m, "width": 10, "format": "%1.2e"}
class Minimize(object):
"""
Minimize is a general class for derivative based optimization.
"""
__metaclass__ = Utils.SimPEGMetaClass
name = "General Optimization Algorithm" #: The name of the optimization algorithm
maxIter = 20 #: Maximum number of iterations
maxIterLS = 10 #: Maximum number of iterations for the line-search
maxStep = np.inf #: Maximum step possible, used in scaling before the line-search.
LSreduction = 1e-4 #: Expected decrease in the line-search
LSshorten = 0.5 #: Line-search step is shortened by this amount each time.
tolF = 1e-1 #: Tolerance on function value decrease
tolX = 1e-1 #: Tolerance on norm(x) movement
tolG = 1e-1 #: Tolerance on gradient norm
eps = 1e-5 #: Small value
stopNextIteration = False #: Stops the optimization program nicely.
debug = False #: Print debugging information
debugLS = False #: Print debugging information for the line-search
comment = '' #: Used by some functions to indicate what is going on in the algorithm
counter = None #: Set this to a SimPEG.Utils.Counter() if you want to count things
parent = None #: This is the parent of the optimization routine.
def __init__(self, **kwargs):
self.stoppers = [StoppingCriteria.tolerance_f, StoppingCriteria.moving_x, StoppingCriteria.tolerance_g, StoppingCriteria.norm_g, StoppingCriteria.iteration]
self.stoppersLS = [StoppingCriteria.armijoGoldstein, StoppingCriteria.iterationLS]
self.printers = [IterationPrinters.iteration, IterationPrinters.f, IterationPrinters.norm_g, IterationPrinters.totalLS]
self.printersLS = [IterationPrinters.iterationLS, IterationPrinters.LS_ft, IterationPrinters.LS_t, IterationPrinters.LS_armijoGoldstein]
Utils.setKwargs(self, **kwargs)
@property
def callback(self):
return getattr(self, '_callback', None)
@callback.setter
def callback(self, value):
if self.callback is not None:
print 'The callback on the %s Optimization was replaced.' % self.__name__
self._callback = value
@Utils.timeIt
def minimize(self, evalFunction, x0):
"""minimize(evalFunction, x0)
Minimizes the function (evalFunction) starting at the location x0.
:param def evalFunction: function handle that evaluates: f, g, H = F(x)
:param numpy.ndarray x0: starting location
:rtype: numpy.ndarray
:return: x, the last iterate of the optimization algorithm
evalFunction is a function handle::
(f[, g][, H]) = evalFunction(x, return_g=False, return_H=False )
def evalFunction(x, return_g=False, return_H=False):
out = (f,)
if return_g:
out += (g,)
if return_H:
out += (H,)
return out if len(out) > 1 else out[0]
The algorithm for general minimization is as follows::
startup(x0)
printInit()
while True:
doStartIteration()
f, g, H = evalFunction(xc)
printIter()
if stoppingCriteria(): break
p = findSearchDirection()
p = scaleSearchDirection(p)
xt, passLS = modifySearchDirection(p)
if not passLS:
xt, caught = modifySearchDirectionBreak(p)
if not caught: return xc
doEndIteration(xt)
printDone()
finish()
return xc
"""
self.evalFunction = evalFunction
self.startup(x0)
self.printInit()
while True:
self.doStartIteration()
self.f, self.g, self.H = evalFunction(self.xc, return_g=True, return_H=True)
self.printIter()
if self.stoppingCriteria(): break
self.searchDirection = self.findSearchDirection()
del self.H #: Doing this saves memory, as it is not needed in the rest of the computations.
p = self.scaleSearchDirection(self.searchDirection)
xt, passLS = self.modifySearchDirection(p)
if not passLS:
xt, caught = self.modifySearchDirectionBreak(p)
if not caught: return self.xc
self.doEndIteration(xt)
if self.stopNextIteration: break
self.printDone()
self.finish()
return self.xc
@Utils.callHooks('startup')
def startup(self, x0):
"""
**startup** is called at the start of any new minimize call.
This will set::
x0 = x0
xc = x0
iter = iterLS = 0
:param numpy.ndarray x0: initial x
:rtype: None
:return: None
"""
self.iter = 0
self.iterLS = 0
self.stopNextIteration = False
x0 = self.projection(x0) # ensure that we start of feasible.
self.x0 = x0
self.xc = x0
self.f_last = np.nan
self.x_last = x0
@Utils.count
@Utils.callHooks('doStartIteration')
def doStartIteration(self):
"""doStartIteration()
**doStartIteration** is called at the start of each minimize iteration.
:rtype: None
:return: None
"""
pass
def printInit(self, inLS=False):
"""
**printInit** is called at the beginning of the optimization routine.
If there is a parent object, printInit will check for a
parent.printInit function and call that.
"""
pad = ' '*10 if inLS else ''
name = self.name if not inLS else self.nameLS
Utils.printTitles(self, self.printers if not inLS else self.printersLS, name, pad)
@Utils.callHooks('printIter')
def printIter(self, inLS=False):
"""
**printIter** is called directly after function evaluations.
If there is a parent object, printIter will check for a
parent.printIter function and call that.
"""
pad = ' '*10 if inLS else ''
Utils.printLine(self, self.printers if not inLS else self.printersLS, pad=pad)
def printDone(self, inLS=False):
"""
**printDone** is called at the end of the optimization routine.
If there is a parent object, printDone will check for a
parent.printDone function and call that.
"""
pad = ' '*10 if inLS else ''
stop, done = (' STOP! ', ' DONE! ') if not inLS else ('----------------', ' End Linesearch ')
stoppers = self.stoppers if not inLS else self.stoppersLS
Utils.printStoppers(self, stoppers, pad='', stop=stop, done=done)
@Utils.callHooks('finish')
def finish(self):
"""finish()
**finish** is called at the end of the optimization.
:rtype: None
:return: None
"""
pass
def stoppingCriteria(self, inLS=False):
if self.iter == 0:
self.f0 = self.f
self.g0 = self.g
return Utils.checkStoppers(self, self.stoppers if not inLS else self.stoppersLS)
@Utils.timeIt
@Utils.callHooks('projection')
def projection(self, p):
"""projection(p)
projects the search direction.
by default, no projection is applied.
:param numpy.ndarray p: searchDirection
:rtype: numpy.ndarray
:return: p, projected search direction
"""
return p
@Utils.timeIt
def findSearchDirection(self):
"""findSearchDirection()
**findSearchDirection** should return an approximation of:
.. math::
H p = - g
Where you are solving for the search direction, p
The default is:
.. math::
H = I
p = - g
And corresponds to SteepestDescent.
The latest function evaluations are present in::
self.f, self.g, self.H
:rtype: numpy.ndarray
:return: p, Search Direction
"""
return -self.g
@Utils.count
def scaleSearchDirection(self, p):
"""scaleSearchDirection(p)
**scaleSearchDirection** should scale the search direction if appropriate.
Set the parameter **maxStep** in the minimize object, to scale back the gradient to a maximum size.
:param numpy.ndarray p: searchDirection
:rtype: numpy.ndarray
:return: p, Scaled Search Direction
"""
if self.maxStep < np.abs(p.max()):
p = self.maxStep*p/np.abs(p.max())
return p
nameLS = "Armijo linesearch" #: The line-search name
@Utils.timeIt
def modifySearchDirection(self, p):
"""modifySearchDirection(p)
**modifySearchDirection** changes the search direction based on some sort of linesearch or trust-region criteria.
By default, an Armijo backtracking linesearch is preformed with the following parameters:
* maxIterLS, the maximum number of linesearch iterations
* LSreduction, the expected reduction expected, default: 1e-4
* LSshorten, how much the step is reduced, default: 0.5
If the linesearch is completed, and a descent direction is found, passLS is returned as True.
Else, a modifySearchDirectionBreak call is preformed.
:param numpy.ndarray p: searchDirection
:rtype: numpy.ndarray,bool
:return: (xt, passLS)
"""
# Projected Armijo linesearch
self._LS_t = 1
self.iterLS = 0
while self.iterLS < self.maxIterLS:
self._LS_xt = self.projection(self.xc + self._LS_t*p)
self._LS_ft = self.evalFunction(self._LS_xt, return_g=False, return_H=False)
self._LS_descent = np.inner(self.g, self._LS_xt - self.xc) # this takes into account multiplying by t, but is important for projection.
if self.stoppingCriteria(inLS=True): break
self.iterLS += 1
self._LS_t = self.LSshorten*self._LS_t
if self.debugLS:
if self.iterLS == 1: self.printInit(inLS=True)
self.printIter(inLS=True)
if self.debugLS and self.iterLS > 0: self.printDone(inLS=True)
return self._LS_xt, self.iterLS < self.maxIterLS
@Utils.count
def modifySearchDirectionBreak(self, p):
"""modifySearchDirectionBreak(p)
Code is called if modifySearchDirection fails
to find a descent direction.
The search direction is passed as input and
this function must pass back both a new searchDirection,
and if the searchDirection break has been caught.
By default, no additional work is done, and the
evalFunction returns a False indicating the break was not caught.
:param numpy.ndarray p: searchDirection
:rtype: numpy.ndarray,bool
:return: (xt, breakCaught)
"""
self.printDone(inLS=True)
print 'The linesearch got broken. Boo.'
return p, False
@Utils.count
@Utils.callHooks('doEndIteration')
def doEndIteration(self, xt):
"""doEndIteration(xt)
**doEndIteration** is called at the end of each minimize iteration.
By default, function values and x locations are shuffled to store 1 past iteration in memory.
self.xc must be updated in this code.
:param numpy.ndarray xt: tested new iterate that ensures a descent direction.
:rtype: None
:return: None
"""
# store old values
self.f_last = self.f
self.x_last, self.xc = self.xc, xt
self.iter += 1
if self.debug: self.printDone()
if self.callback is not None:
self.callback(xt)
def save(self, group):
group.setArray('searchDirection', self.searchDirection)
if getattr(self,'parent',None) is None:
group.setArray('x', self.xc)
else: # Assume inversion is the parent
group.attrs['phi_d'] = self.parent.phi_d
group.attrs['phi_m'] = self.parent.phi_m
group.attrs['beta'] = self.parent.beta
group.setArray('m', self.xc)
group.setArray('dpred', self.parent.dpred)
class Remember(object):
"""
This mixin remembers all the things you tend to forget.
You can remember parameters directly, naming the str in Minimize,
or pass a tuple with the name and the function that takes Minimize.
For Example::
opt.remember('f',('norm_g', lambda M: np.linalg.norm(M.g)))
opt.minimize(evalFunction, x0)
opt.recall('f')
The param name (str) can also be located in the parent (if no conflicts),
and it will be looked up by default.
"""
_rememberThese = []
def remember(self, *args):
self._rememberThese = args
def recall(self, param):
assert param in self._rememberList, "You didn't tell me to remember "+param+", you gotta tell me what to remember!"
return self._rememberList[param]
def _startupRemember(self, x0):
self._rememberList = {}
for param in self._rememberThese:
if type(param) is str:
self._rememberList[param] = []
elif type(param) is tuple:
self._rememberList[param[0]] = []
def _doEndIterationRemember(self, *args):
for param in self._rememberThese:
if type(param) is str:
if self.debug: print 'Remember is remembering: ' + param
val = getattr(self, param, None)
if val is None and getattr(self, 'parent', None) is not None:
# Look to the parent for the param if not found here.
val = getattr(self.parent, param, None)
self._rememberList[param].append( val )
elif type(param) is tuple:
if self.debug: print 'Remember is remembering: ' + param[0]
self._rememberList[param[0]].append( param[1](self) )
class ProjectedGradient(Minimize, Remember):
name = 'Projected Gradient'
maxIterCG = 5
tolCG = 1e-1
lower = -np.inf
upper = np.inf
def __init__(self,**kwargs):
super(ProjectedGradient, self).__init__(**kwargs)
self.stoppers.append(StoppingCriteria.bindingSet)
self.stoppersLS.append(StoppingCriteria.bindingSet_LS)
self.printers.extend([ IterationPrinters.itType, IterationPrinters.aSet, IterationPrinters.bSet, IterationPrinters.comment ])
def _startup(self, x0):
# ensure bound vectors are the same size as the model
if type(self.lower) is not np.ndarray:
self.lower = np.ones_like(x0)*self.lower
if type(self.upper) is not np.ndarray:
self.upper = np.ones_like(x0)*self.upper
self.explorePG = True
self.exploreCG = False
self.stopDoingPG = False
self._itType = 'SD'
self.comment = ''
self.aSet_prev = self.activeSet(x0)
@Utils.count
def projection(self, x):
"""projection(x)
Make sure we are feasible.
"""
return np.median(np.c_[self.lower,x,self.upper],axis=1)
@Utils.count
def activeSet(self, x):
"""activeSet(x)
If we are on a bound
"""
return np.logical_or(x == self.lower, x == self.upper)
@Utils.count
def inactiveSet(self, x):
"""inactiveSet(x)
The free variables.
"""
return np.logical_not(self.activeSet(x))
@Utils.count
def bindingSet(self, x):
"""bindingSet(x)
If we are on a bound and the negative gradient points away from the feasible set.
Optimality condition. (Satisfies Kuhn-Tucker) MoreToraldo91
"""
bind_up = np.logical_and(x == self.lower, self.g >= 0)
bind_low = np.logical_and(x == self.upper, self.g <= 0)
return np.logical_or(bind_up, bind_low)
@Utils.timeIt
def findSearchDirection(self):
"""findSearchDirection()
Finds the search direction based on either CG or steepest descent.
"""
self.aSet_prev = self.activeSet(self.xc)
allBoundsAreActive = sum(self.aSet_prev) == self.xc.size
if self.debug: print 'findSearchDirection: stopDoingPG: ', self.stopDoingPG
if self.debug: print 'findSearchDirection: explorePG: ', self.explorePG
if self.debug: print 'findSearchDirection: exploreCG: ', self.exploreCG
if self.debug: print 'findSearchDirection: aSet', np.sum(self.activeSet(self.xc))
if self.debug: print 'findSearchDirection: bSet', np.sum(self.bindingSet(self.xc))
if self.debug: print 'findSearchDirection: allBoundsAreActive: ', allBoundsAreActive
if self.explorePG or not self.exploreCG or allBoundsAreActive:
if self.debug: print 'findSearchDirection.PG: doingPG'
self._itType = 'SD'
p = -self.g
else:
if self.debug: print 'findSearchDirection.CG: doingCG'
# Reset the max decrease each time you do a CG iteration
self.f_decrease_max = -np.inf
self._itType = '.CG.'
iSet = self.inactiveSet(self.xc) # The inactive set (free variables)
bSet = self.bindingSet(self.xc)
shape = (self.xc.size, np.sum(iSet))
v = np.ones(shape[1])
i = np.where(iSet)[0]
j = np.arange(shape[1])
if self.debug: print 'findSearchDirection.CG: Z.shape', shape
Z = sp.csr_matrix((v, (i, j)), shape=shape)
def reduceHess(v):
# Z is tall and skinny
return Z.T*(self.H*(Z*v))
operator = sp.linalg.LinearOperator( (shape[1], shape[1]), reduceHess, dtype=self.xc.dtype )
p, info = sp.linalg.cg(operator, -Z.T*self.g, tol=self.tolCG, maxiter=self.maxIterCG)
p = Z*p # bring up to full size
# aSet_after = self.activeSet(self.xc+p)
return p
@Utils.timeIt
def _doEndIteration_ProjectedGradient(self, xt):
"""_doEndIteration_ProjectedGradient(xt)"""
aSet = self.activeSet(xt)
bSet = self.bindingSet(xt)
self.explorePG = not np.all(aSet == self.aSet_prev) # explore proximal gradient
self.exploreCG = np.all(aSet == bSet) # explore conjugate gradient
f_current_decrease = self.f_last - self.f
self.comment = ''
if self.iter < 1:
# Note that this is reset on every CG iteration.
self.f_decrease_max = -np.inf
else:
self.f_decrease_max = max(self.f_decrease_max, f_current_decrease)
self.stopDoingPG = f_current_decrease < 0.25 * self.f_decrease_max
if self.stopDoingPG:
self.comment = 'Stop SD'
self.explorePG = False
self.exploreCG = True
# implement 3.8, MoreToraldo91
#self.eta_2 * max_decrease where max decrease
# if true go to CG
# don't do too many steps of PG in a row.
if self.debug: print 'doEndIteration.ProjGrad, f_current_decrease: ', f_current_decrease
if self.debug: print 'doEndIteration.ProjGrad, f_decrease_max: ', self.f_decrease_max
if self.debug: print 'doEndIteration.ProjGrad, stopDoingSD: ', self.stopDoingPG
class BFGS(Minimize, Remember):
name = 'BFGS'
nbfgs = 10
def __init__(self, **kwargs):
Minimize.__init__(self, **kwargs)
@property
def bfgsH0(self):
"""
Approximate Hessian used in preconditioning the problem.
Must be a SimPEG.Solver
"""
if getattr(self,'_bfgsH0',None) is None:
self._bfgsH0 = SolverDiag(sp.identity(self.xc.size))
return self._bfgsH0
@bfgsH0.setter
def bfgsH0(self, value):
self._bfgsH0 = value
def _startup_BFGS(self,x0):
self._bfgscnt = -1
self._bfgsY = np.zeros((x0.size, self.nbfgs))
self._bfgsS = np.zeros((x0.size, self.nbfgs))
if not np.any([p is IterationPrinters.comment for p in self.printers]):
self.printers.append(IterationPrinters.comment)
def bfgs(self, d):
n = self._bfgscnt
nn = ktop = min(self._bfgsS.shape[1],n)
return self.bfgsrec(ktop,n,nn,self._bfgsS,self._bfgsY,d)
def bfgsrec(self,k,n,nn,S,Y,d):
"""BFGS recursion"""
if k < 0:
d = self.bfgsH0 * d #Assume that bfgsH0 is a SimPEG.Solver
else:
khat = 0 if nn is 0 else np.mod(n-nn+k,nn)
gamma = np.vdot(S[:,khat],d)/np.vdot(Y[:,khat],S[:,khat])
d = d - gamma*Y[:,khat]
d = self.bfgsrec(k-1,n,nn,S,Y,d)
d = d + (gamma - np.vdot(Y[:,khat],d)/np.vdot(Y[:,khat],S[:,khat]))*S[:,khat]
return d
def findSearchDirection(self):
return self.bfgs(-self.g)
def _doEndIteration_BFGS(self, xt):
if self.iter is 0:
self.g_last = self.g
return
yy = self.g - self.g_last;
ss = self.xc - xt;
self.g_last = self.g
if yy.dot(ss) > 0:
self._bfgscnt += 1
ktop = np.mod(self._bfgscnt,self.nbfgs)
self._bfgsY[:,ktop] = yy
self._bfgsS[:,ktop] = ss
self.comment = ''
else:
self.comment = 'Skip BFGS'
class GaussNewton(Minimize, Remember):
name = 'Gauss Newton'
def __init__(self, **kwargs):
Minimize.__init__(self, **kwargs)
@Utils.timeIt
def findSearchDirection(self):
return Solver(self.H) * (-self.g)
class InexactGaussNewton(BFGS, Minimize, Remember):
"""
Minimizes using CG as the inexact solver of
.. math::
\mathbf{H p = -g}
By default BFGS is used as the preconditioner.
Use *nbfgs* to set the memory limitation of BFGS.
To set the initial H0 to be used in BFGS, set *bfgsH0* to be a SimPEG.Solver
"""
def __init__(self, **kwargs):
Minimize.__init__(self, **kwargs)
name = 'Inexact Gauss Newton'
maxIterCG = 5
tolCG = 1e-1
@property
def approxHinv(self):
"""
The approximate Hessian inverse is used to precondition CG.
Default uses BFGS, with an initial H0 of *bfgsH0*.
Must be a scipy.sparse.linalg.LinearOperator
"""
_approxHinv = getattr(self,'_approxHinv',None)
if _approxHinv is None:
M = sp.linalg.LinearOperator( (self.xc.size, self.xc.size), self.bfgs, dtype=self.xc.dtype )
return M
return _approxHinv
@approxHinv.setter
def approxHinv(self, value):
self._approxHinv = value
@Utils.timeIt
def findSearchDirection(self):
Hinv = SolverICG(self.H, M=self.approxHinv, tol=self.tolCG, maxiter=self.maxIterCG)
p = Hinv * (-self.g)
return p
class SteepestDescent(Minimize, Remember):
name = 'Steepest Descent'
def __init__(self, **kwargs):
Minimize.__init__(self, **kwargs)
@Utils.timeIt
def findSearchDirection(self):
return -self.g
class NewtonRoot(object):
"""
Newton Method - Root Finding
root = newtonRoot(fun,x);
Where fun is the function that returns the function value as well as the
gradient.
For iterative solving of dh = -J\\r, use O.solveTol = TOL. For direct
solves, use SOLVETOL = 0 (default)
Rowan Cockett
16-May-2013 16:29:51
University of British Columbia
rcockett@eos.ubc.ca
"""
tol = 1.000e-06
maxIter = 20
stepDcr = 0.5
maxLS = 30
comments = False
doLS = True
Solver = Solver
solverOpts = {}
def __init__(self, **kwargs):
Utils.setKwargs(self, **kwargs)
def root(self, fun, x):
"""root(fun, x)
Function Should have the form::
def evalFunction(x, return_g=False):
out = (f,)
if return_g:
out += (g,)
return out if len(out) > 1 else out[0]
"""
if self.comments: print 'Newton Method:\n'
self.iter = 0
while True:
r, J = fun(x, return_g=True)
Jinv = self.Solver(J, **self.solverOpts)
dh = - (Jinv * r)
muLS = 1.
LScnt = 1
xt = x + dh
rt = fun(xt, return_g=False)
if self.comments and self.doLS: print '\tLinesearch:\n'
# Enter Linesearch
while True and self.doLS:
if self.comments: print '\t\tResid: %e\n'%norm(rt)
if norm(rt) <= norm(r) or norm(rt) < self.tol:
break
muLS = muLS*self.stepDcr
LScnt = LScnt + 1
print '.'
if LScnt > self.maxLS:
print 'Newton Method: Line search break.'
return None
xt = x + muLS*dh
rt = fun(xt, return_g=False)
x = xt
self.iter += 1
if norm(rt) < self.tol:
break
if self.iter > self.maxIter:
print 'NewtonRoot stopped by maxIters (%d). norm: %4.4e' % (self.maxIter, norm(rt))
break
return x
class ProjectedGNCG(BFGS, Minimize, Remember):
def __init__(self, **kwargs):
Minimize.__init__(self, **kwargs)
name = 'Projected GNCG'
maxIterCG = 5
tolCG = 1e-1
lower = -np.inf
upper = np.inf
def _startup(self, x0):
# ensure bound vectors are the same size as the model
if type(self.lower) is not np.ndarray:
self.lower = np.ones_like(x0)*self.lower
if type(self.upper) is not np.ndarray:
self.upper = np.ones_like(x0)*self.upper
@Utils.count
def projection(self, x):
"""projection(x)
Make sure we are feasible.
"""
return np.median(np.c_[self.lower,x,self.upper],axis=1)
@Utils.count
def activeSet(self, x):
"""activeSet(x)
If we are on a bound
"""
return np.logical_or(x <= self.lower, x >= self.upper)
@property
def approxHinv(self):
"""
The approximate Hessian inverse is used to precondition CG.
Default uses BFGS, with an initial H0 of *bfgsH0*.
Must be a scipy.sparse.linalg.LinearOperator
"""
_approxHinv = getattr(self,'_approxHinv',None)
if _approxHinv is None:
M = sp.linalg.LinearOperator( (self.xc.size, self.xc.size), self.bfgs, dtype=self.xc.dtype )
return M
return _approxHinv
@approxHinv.setter
def approxHinv(self, value):
self._approxHinv = value
@Utils.timeIt
def findSearchDirection(self):
"""
findSearchDirection()
Finds the search direction based on either CG or steepest descent.
"""
Active = self.activeSet(self.xc)
temp = sum((np.ones_like(self.xc.size)-Active))
allBoundsAreActive = temp == self.xc.size
if allBoundsAreActive:
Hinv = SolverICG(self.H, M=self.approxHinv, tol=self.tolCG, maxiter=self.maxIterCG)
p = Hinv * (-self.g)
return p
else:
delx = np.zeros(self.g.size)
resid = -(1-Active) * self.g
# Begin CG iterations.
cgiter = 0
cgFlag = 0
normResid0 = norm(resid)
while cgFlag == 0:
cgiter = cgiter + 1
dc = (1-Active)*(self.approxHinv*resid)
rd = np.dot(resid, dc)
# Compute conjugate direction pc.
if cgiter == 1:
pc = dc
else:
betak = rd / rdlast
pc = dc + betak * pc
# Form product Hessian*pc.
Hp = self.H*pc
Hp = (1-Active)*Hp
# Update delx and residual.
alphak = rd / np.dot(pc, Hp)
delx = delx + alphak*pc
resid = resid - alphak*Hp
rdlast = rd
if np.logical_or(norm(resid)/normResid0 <= self.tolCG, cgiter == self.maxIterCG):
cgFlag = 1
# End CG Iterations
# Take a gradient step on the active cells if exist
if temp != self.xc.size:
rhs_a = (Active) * -self.g
dm_i = max( abs( delx ) )
dm_a = max( abs(rhs_a) )
delx = delx + rhs_a * dm_i / dm_a /10.
# Only keep gradients going in the right direction on the active set
indx = ((self.xc<=self.lower) & (delx < 0)) | ((self.xc>=self.upper) & (delx > 0))
delx[indx] = 0.
return delx