diff --git a/SimPEG/GaussNewton.py b/SimPEG/GaussNewton.py
index e0304a8d..bd6d37c3 100644
--- a/SimPEG/GaussNewton.py
+++ b/SimPEG/GaussNewton.py
@@ -2,25 +2,25 @@ import numpy as np
 import matplotlib.pyplot as plt
 from pylab import norm
 
-def GaussNewton(fctn, x0,maxIter=20, maxIterLS=10, LSreduction=1e-4, tolJ=1e-3, tolX=1e-3, 
+def GaussNewton(fctn, x0,maxIter=20, maxIterLS=10, LSreduction=1e-4, tolJ=1e-3, tolX=1e-3,
                                                     tolG=1e-3, eps=1e-16, xStop=[]):
     """
         GaussNewton Optimization
-        
+
         Input:
         ------
             fctn - objective Function (lambda function)
             x0   - starting guess
-            
+
         Output:
         -------
             xOpt - numerical optimizer
     """
     # initial output
     print "%s GaussNewton %s" % ('='*22,'='*22)
-    print "iter\tJc\t\tnorm(dJ)\tLS" 
+    print "iter\tJc\t\tnorm(dJ)\tLS"
     print "%s" % '-'*57
-    
+
     # evaluate stopping criteria
     if xStop==[]:
         xStop=x0
@@ -31,26 +31,26 @@ def GaussNewton(fctn, x0,maxIter=20, maxIterLS=10, LSreduction=1e-4, tolJ=1e-3,
     xc = x0
     STOP = np.zeros((5,1),dtype=bool)
     iterLS=0; iter=0
-    
+
     Jold = Jstop
     xOld=xc
     while 1:
         # evaluate objective function
         Jc,dJ,H = fctn(xc)
         print "%3d\t%1.2e\t%1.2e\t%d" % (iter, Jc[0],norm(dJ),iterLS)
-        
+
         # check stopping rules
         STOP[0] = (iter>0) & (abs(Jc[0]-Jold[0]) <= tolJ*(1+abs(Jstop[0])))
         STOP[1] = (iter>0) & (norm(xc-xOld)      <= tolX*(1+norm(x0)))
         STOP[2] = norm(dJ)                       <= tolG*(1+abs(Jstop[0]))
         STOP[3] = norm(dJ)                       <= 1e3*eps
         STOP[4] = (iter >= maxIter)
-        if all(STOP[0:3]) | any(STOP[3:]): 
+        if all(STOP[0:3]) | any(STOP[3:]):
             break
-        
+
         # get search direction
         dx = np.linalg.solve(H,-dJ)
-        
+
         # Armijo linesearch
         descent = np.dot(dJ.T,dx)
         LS =0; t = 1; iterLS=1
@@ -62,13 +62,13 @@ def GaussNewton(fctn, x0,maxIter=20, maxIterLS=10, LSreduction=1e-4, tolJ=1e-3,
                 break
             iterLS = iterLS+1
             t = .5*t
-            
+
         # store old values
         Jold = Jc; xOld = xc
-        # update 
+        # update
         xc = xt
         iter = iter +1
-        
+
     print "%s STOP! %s" % ('-'*25,'-'*25)
     print "%d : |Jc-Jold| = %1.4e <= tolJ*(1+|Jstop|) = %1.4e"  % (STOP[0],abs(Jc[0]-Jold[0]),tolJ*(1+abs(Jstop[0])))
     print "%d : |xc-xOld| = %1.4e <= tolX*(1+|x0|)    = %1.4e"  % (STOP[1],norm(xc-xOld),tolX*(1+norm(x0)))
@@ -76,50 +76,50 @@ def GaussNewton(fctn, x0,maxIter=20, maxIterLS=10, LSreduction=1e-4, tolJ=1e-3,
     print "%d : |dJ|      = %1.4e <= 1e3*eps          = %1.4e"  % (STOP[3],norm(dJ),1e3*eps)
     print "%d : iter      = %3d\t <= maxIter\t       = %3d"     % (STOP[4],iter,maxIter)
     print "%s DONE! %s\n" % ('='*25,'='*25)
-    
+
     return xc
-      
+
 def Rosenbrock(x):
     """
         Rosenbrock function for testing GaussNewton scheme
     """
-    J   = 100*(x[1]-x[0]**2)**2+(1-x[0])**2 
+    J   = 100*(x[1]-x[0]**2)**2+(1-x[0])**2
     dJ  = np.array([2*(200*x[0]**3-200*x[0]*x[1]+x[0]-1),200*(x[1]-x[0]**2)])
     H = np.array([[-400*x[1]+1200*x[0]**2+2, -400*x[0]],[ -400*x[0], 200]],dtype=float);
-    
+
     return J,dJ,H
-    
+
 def checkDerivative(fctn,x0):
     """
         Basic derivative check
-        
-        Compares error decay of 0th and 1st order Taylor approximation at point 
+
+        Compares error decay of 0th and 1st order Taylor approximation at point
         x0 for a randomized search direction.
-        
+
        Input:
        ------
          fctn  -  function handle
-         x0    -  point at which to check derivative   
+         x0    -  point at which to check derivative
     """
-    
+
     print "%s checkDerivative %s" % ('='*20,'='*20)
     print "iter\th\t\t|J0-Jt|\t\t|J0+h*dJ'*dx-Jt|"
 
     Jc,dJ,H = fctn(x0)
-    
+
     dx = np.random.randn(len(x0),1)
-    
+
     t  = np.logspace(-1,-10,10)
     E0 = np.zeros(t.shape)
     E1 = np.zeros(t.shape)
-    
+
     for i in range(0,10):
         Jt = fctn(x0+t[i]*dx)
-        E0[i] = norm(Jt[0]-Jc[0])                           # 0th order Taylor 
-        E1[i] = norm(Jt[0]-Jc[0]-t[i]*np.dot(dJ.T,dx))      # 1st order Taylor 
-        
+        E0[i] = norm(Jt[0]-Jc[0])                           # 0th order Taylor
+        E1[i] = norm(Jt[0]-Jc[0]-t[i]*np.dot(dJ.T,dx))      # 1st order Taylor
+
         print "%d\t%1.2e\t%1.3e\t%1.3e" % (i,t[i],E0[i],E1[i])
-        
+
     print "%s DONE! %s\n" % ('='*25,'='*25)
     plt.figure()
     plt.clf()
@@ -131,11 +131,11 @@ def checkDerivative(fctn,x0):
     plt.legend(['0th order', '1st order'],loc='upper left')
     plt.show()
     return
-    
+
 if __name__ == '__main__':
     x0 = np.array([[2.6],[3.7]])
     fctn = lambda x:Rosenbrock(x)
     checkDerivative(fctn,x0)
     xOpt = GaussNewton(fctn,x0,maxIter=20)
     print "xOpt=[%f,%f]" % (xOpt[0],xOpt[1])
-   
\ No newline at end of file
+
diff --git a/SimPEG/__init__.py b/SimPEG/__init__.py
index ca9b9f7b..f36000a7 100644
--- a/SimPEG/__init__.py
+++ b/SimPEG/__init__.py
@@ -1,3 +1,4 @@
 from TensorMesh import TensorMesh
 from LogicallyOrthogonalMesh import LogicallyOrthogonalMesh
 import utils
+import inverse
diff --git a/SimPEG/forward/Problem.py b/SimPEG/forward/Problem.py
new file mode 100644
index 00000000..f83ea505
--- /dev/null
+++ b/SimPEG/forward/Problem.py
@@ -0,0 +1,115 @@
+import numpy as np
+from SimPEG.utils import mkvc, sdiag
+norm = np.linalg.norm
+
+
+class Problem(object):
+    """Problem is the base class for all geophysical forward problems in SimPEG"""
+    def __init__(self, mesh):
+        self.mesh = mesh
+        pass
+
+    def residual(self, m):
+        pass
+
+    def modelTransform(self, m):
+        """
+            :param numpy.array m: model
+            :rtype: numpy.array
+            :return: transformed model
+
+            The modelTransform changes the model into the physical property.
+
+            A common example of this is to invert for electrical conductivity
+            in log space. In this case, your model will be log(sigma) and to
+            get back to sigma, you can take the exponential:
+
+            .. math::
+
+                m = \log{\sigma}
+
+                \exp{m} = \exp{\log{\sigma}} = \sigma
+        """
+        return np.exp(mkvc(m))
+
+    def modelTransformDeriv(self, m):
+        """
+            :param numpy.array m: model
+            :rtype: scipy.csr_matrix
+            :return: derivative of transformed model
+
+            The modelTransform changes the model into the physical property.
+            The modelTransformDeriv provides the derivative of the modelTransform.
+
+            If the model transform is:
+
+            .. math::
+
+                m = \log{\sigma}
+
+                \exp{m} = \exp{\log{\sigma}} = \sigma
+
+            Then the derivative is:
+
+            .. math::
+
+                \\frac{\partial \exp{m}}{\partial m} = \\text{sdiag}(\exp{m})
+        """
+        return sdiag(np.exp(mkvc(m)))
+
+    def _test_modelTransformDeriv(self):
+        m = np.random.rand(5)
+        return checkDerivative(lambda m : [self.modelTransform(m), self.modelTransformDeriv(m)], m)
+
+    def misfit(self, field):
+        """
+            :param numpy.array field: geophysical field of interest
+            :rtype: float
+            :return: data misfit
+
+            The data misfit using an l_2 norm is:
+
+            .. math::
+
+                \mu_\\text{data} = {1\over 2}\left| \mathbf{W} (\mathbf{Pu} - d_\\text{obs}) \\right|_2^2
+
+            Where P is a projection matrix that brings the field on the full domain to the data measurement locations;
+            u is the field of interest; d_obs is the observed data; and W is the weighting matrix.
+        """
+        R = self.W*(self.P*field - self.dobs)
+        return 0.5*mkvc(R).inner(mkvc(R))
+
+    def misfitDeriv(self, field):
+        """
+            TODO: Change this documentation.
+
+            :param numpy.array field: geophysical field of interest
+            :rtype: float
+            :return: data misfit derivative
+
+            The data misfit using an l_2 norm is:
+
+            .. math::
+
+                \mu_\\text{data} = {1\over 2}\left| \mathbf{W} (\mathbf{Pu} - d_\\text{obs}) \\right|_2^2
+
+            Where P is a projection matrix that brings the field on the full domain to the data measurement locations;
+            u is the field of interest; d_obs is the observed data; and W is the weighting matrix.
+        """
+
+        R = self.W*(self.P*field - self.dobs)
+        # TODO: make in terms of the field and call Jt, e.g. if looping over RHSs using i: self.Jt(field[:,i],self.W[:,i]*R[:,i])
+        return mkvc(R)
+
+    def J(self, u):
+        pass
+
+    def Jt(self, v):
+        pass
+
+if __name__ == '__main__':
+    from SimPEG.inverse import checkDerivative
+
+    p = Problem(None)
+    m = np.random.rand(5)
+    checkDerivative(lambda m : [p.modelTransform(m), p.modelTransformDeriv(m)], m)
diff --git a/SimPEG/forward/__init__.py b/SimPEG/forward/__init__.py
new file mode 100644
index 00000000..3c4d3728
--- /dev/null
+++ b/SimPEG/forward/__init__.py
@@ -0,0 +1 @@
+from Problem import *
diff --git a/SimPEG/inverse/Optimize.py b/SimPEG/inverse/Optimize.py
new file mode 100644
index 00000000..53a11410
--- /dev/null
+++ b/SimPEG/inverse/Optimize.py
@@ -0,0 +1,222 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from SimPEG.utils import mkvc, sdiag
+norm = np.linalg.norm
+
+
+class Minimize(object):
+    """docstring for Minimize"""
+
+    name = "GeneralOptimizationAlgorithm"
+
+    maxIter = 20
+    maxIterLS = 10
+    LSreduction = 1e-4
+    LSshorten = 0.5
+    tolF = 1e-4
+    tolX = 1e-4
+    tolG = 1e-4
+    eps = 1e-16
+
+    def __init__(self, problem, **kwargs):
+        self.problem = problem
+
+        # Set the variables, throw an error if they don't exist.
+        for attr in kwargs:
+            if hasattr(self, attr):
+                setattr(self, attr, kwargs[attr])
+            else:
+                raise Exception('%s attr is not recognized' % attr)
+
+    def minimize(self, x0):
+
+        self.startup(x0)
+        self.printInit()
+
+        while True:
+            self.f, self.g, self.H = self.evalFunction(self.xc)
+            self.printIter()
+            if self.stoppingCriteria(): break
+            p = self.findSearchDirection()
+            xt, passLS = self.linesearch(p)
+            if not passLS:
+                xt = self.linesearchBreak(p)
+            self.doEndIteration(xt)
+
+        self.printDone()
+
+        return self.xc
+
+    def startup(self, x0):
+        self._iter = 0
+        self._iterLS = 0
+        self._STOP = np.zeros((5,1),dtype=bool)
+
+        self.x0 = x0
+        self.xc = x0
+        self.xOld = x0
+
+    def printInit(self):
+        print "%s %s %s" % ('='*22, self.name, '='*22)
+        print "iter\tJc\t\tnorm(dJ)\tLS"
+        print "%s" % '-'*57
+
+    def printIter(self):
+        print "%3d\t%1.2e\t%1.2e\t%d" % (self._iter, self.f, norm(self.g), self._iterLS)
+
+    def printDone(self):
+        print "%s STOP! %s" % ('-'*25,'-'*25)
+        print "%d : |fc-fOld| = %1.4e <= tolF*(1+|fStop|) = %1.4e"  % (self._STOP[0], abs(self.f-self.fOld), self.tolF*(1+abs(self.fStop)))
+        print "%d : |xc-xOld| = %1.4e <= tolX*(1+|x0|)    = %1.4e"  % (self._STOP[1], norm(self.xc-self.xOld), self.tolX*(1+norm(self.x0)))
+        print "%d : |g|       = %1.4e <= tolG*(1+|fStop|) = %1.4e"  % (self._STOP[2], norm(self.g), self.tolG*(1+abs(self.fStop)))
+        print "%d : |g|       = %1.4e <= 1e3*eps          = %1.4e"  % (self._STOP[3], norm(self.g), 1e3*self.eps)
+        print "%d : iter      = %3d\t <= maxIter\t       = %3d"     % (self._STOP[4], self._iter, self.maxIter)
+        print "%s DONE! %s\n" % ('='*25,'='*25)
+
+    def evalFunction(self, x, doDerivative=True):
+        f, g, H = self.problem(x)
+        return f, g, H
+
+    def findSearchDirection(self):
+        return -self.g
+
+    def stoppingCriteria(self):
+        if self._iter == 0:
+            self.fStop = self.f  # Save this for stopping criteria
+
+        # check stopping rules
+        self._STOP[0] = self._iter > 0 and (abs(self.f-self.fOld) <= self.tolF*(1+abs(self.fStop)))
+        self._STOP[1] = self._iter > 0 and (norm(self.xc-self.xOld) <= self.tolX*(1+norm(self.x0)))
+        self._STOP[2] = norm(self.g) <= self.tolG*(1+abs(self.fStop))
+        self._STOP[3] = norm(self.g) <= 1e3*self.eps
+        self._STOP[4] = self._iter >= self.maxIter
+        return all(self._STOP[0:3]) | any(self._STOP[3:])
+
+    def linesearch(self, p):
+        # Armijo linesearch
+        descent = np.inner(self.g, p)
+        t = 1
+        iterLS = 0
+        while iterLS < self.maxIterLS:
+            xt = self.xc + t*p
+            ft, temp, temp = self.evalFunction(xt, doDerivative=False)
+            if ft < self.f + t*self.LSreduction*descent:
+                break
+            iterLS += 1
+            t = self.LSshorten*t
+
+        self._iterLS = iterLS
+        return xt, iterLS < self.maxIterLS
+
+    def linesearchBreak(self, p):
+        raise Exception('The linesearch got broken. Boo.')
+
+    def doEndIteration(self, xt):
+        # store old values
+        self.fOld = self.f
+        self.xOld, self.xc = self.xc, xt
+        self._iter += 1
+
+
+class GaussNewton(Minimize):
+    name = 'GaussNewton'
+    def findSearchDirection(self):
+        return np.linalg.solve(self.H,-self.g)
+
+
+class SteepestDescent(Minimize):
+    name = 'SteepestDescent'
+    def findSearchDirection(self):
+        return -self.g
+
+
+
+def Rosenbrock(x):
+    """Rosenbrock function for testing GaussNewton scheme"""
+
+    f = 100*(x[1]-x[0]**2)**2+(1-x[0])**2
+    g = np.array([2*(200*x[0]**3-200*x[0]*x[1]+x[0]-1), 200*(x[1]-x[0]**2)])
+    H = np.array([[-400*x[1]+1200*x[0]**2+2, -400*x[0]], [-400*x[0], 200]])
+    return f, g, H
+
+
+def checkDerivative(fctn, x0, num=7, plotIt=True, dx=None):
+    """
+        Basic derivative check
+
+        Compares error decay of 0th and 1st order Taylor approximation at point
+        x0 for a randomized search direction.
+
+       Input:
+       ------
+         fctn  -  function handle
+         x0    -  point at which to check derivative
+    """
+
+    print "%s checkDerivative %s" % ('='*20, '='*20)
+    print "iter\th\t\t|J0-Jt|\t\t|J0+h*dJ'*dx-Jt|\tOrder\n%s" % ('-'*57)
+
+    Jc = fctn(x0)
+
+    x0 = mkvc(x0)
+
+    if dx is None:
+        dx = np.random.randn(len(x0))
+
+    t  = np.logspace(-1, -num, num)
+    E0 = np.ones(t.shape)
+    E1 = np.ones(t.shape)
+
+    l2norm = lambda x: np.sqrt(np.inner(x, x))  # because np.norm breaks if they are scalars?
+    for i in range(num):
+        Jt = fctn(x0+t[i]*dx)
+        E0[i] = l2norm(Jt[0]-Jc[0])               # 0th order Taylor
+        E1[i] = l2norm(Jt[0]-Jc[0]-t[i]*Jc[1].dot(dx))  # 1st order Taylor
+        order0 = np.log10(E0[:-1]/E0[1:])
+        order1 = np.log10(E1[:-1]/E1[1:])
+        print "%d\t%1.2e\t%1.3e\t\t%1.3e\t\t%1.3f" % (i, t[i], E0[i], E1[i], np.nan if i == 0 else order1[i-1])
+
+    tolerance = 0.9
+    expectedOrder = 2
+    eps = 1e-10
+    order0 = order0[E0[1:] > eps]
+    order1 = order1[E1[1:] > eps]
+    belowTol = order1.size == 0 and order0.size > 0
+    correctOrder = order1.size > 0 and np.mean(order1) > tolerance * expectedOrder
+
+    passTest = belowTol or correctOrder
+
+    if passTest:
+        print "%s PASS! %s\n" % ('='*25, '='*25)
+    else:
+        print "%s\n%s FAIL! %s\n%s" % ('*'*57, '<'*25, '>'*25, '*'*57)
+
+    if plotIt:
+        plt.figure()
+        plt.clf()
+        plt.loglog(t, E0, 'b')
+        plt.loglog(t, E1, 'g--')
+        plt.title('checkDerivative')
+        plt.xlabel('h')
+        plt.ylabel('error of Taylor approximation')
+        plt.legend(['0th order', '1st order'], loc='upper left')
+        plt.show()
+
+    return passTest
+
+if __name__ == '__main__':
+    x0 = np.array([2.6, 3.7])
+    checkDerivative(Rosenbrock, x0, plotIt=False)
+    xOpt = GaussNewton(Rosenbrock, maxIter=20).minimize(x0)
+    print "xOpt=[%f, %f]" % (xOpt[0], xOpt[1])
+    xOpt = SteepestDescent(Rosenbrock, maxIter=20, maxIterLS=15).minimize(x0)
+    print "xOpt=[%f, %f]" % (xOpt[0], xOpt[1])
+
+    def simplePass(x):
+        return np.sin(x), sdiag(np.cos(x))
+
+    def simpleFail(x):
+        return np.sin(x), -sdiag(np.cos(x))
+
+    checkDerivative(simplePass, np.random.randn(5), plotIt=False)
+    checkDerivative(simpleFail, np.random.randn(5), plotIt=False)
diff --git a/SimPEG/inverse/__init__.py b/SimPEG/inverse/__init__.py
new file mode 100644
index 00000000..b2a5e506
--- /dev/null
+++ b/SimPEG/inverse/__init__.py
@@ -0,0 +1 @@
+from Optimize import *
diff --git a/docs/api_Problem.rst b/docs/api_Problem.rst
new file mode 100644
index 00000000..d43616d9
--- /dev/null
+++ b/docs/api_Problem.rst
@@ -0,0 +1,8 @@
+.. _api_Problem:
+
+Problem
+*******
+
+.. automodule:: SimPEG.forward.Problem
+    :members:
+    :undoc-members:
diff --git a/docs/index.rst b/docs/index.rst
index 22c9d5e9..48ebe940 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -38,12 +38,13 @@ Inversion
 
    api_GaussNewton
 
-Example Problems
+Forward Problems
 ================
 
 .. toctree::
    :maxdepth: 2
 
+   api_Problem
 
 
 Project Index & Search