Merge branch 'random_walker'

This commit is contained in:
emmanuelle
2012-01-23 19:49:39 +01:00
6 changed files with 572 additions and 8 deletions
+2 -1
View File
@@ -47,7 +47,8 @@
- Emmanuelle Guillart
Total variation noise filtering, integration of CellProfiler's
mathematical morphology tools, tutorials, and more.
mathematical morphology tools, random walker segmentation,
tutorials, and more.
- Maël Primet
Total variation noise filtering
@@ -0,0 +1,77 @@
"""
==========================
Random walker segmentation
==========================
The random walker algorithm [1]_ determines the segmentation of an image from
a set of markers labeling several phases (2 or more). An anisotropic diffusion
equation is solved with tracers initiated at the markers' position. The local
diffusivity coefficient is greater if neighboring pixels have similar values,
so that diffusion is difficult across high gradients. The label of each unknown
pixel is attributed to the label of the known marker that has the highest
probability to be reached first during this diffusion process.
In this example, two phases are clearly visible, but the data are too
noisy to perform the segmentation from the histogram only. We determine
markers of the two phases from the extreme tails of the histogram of gray
values, and use the random walker for the segmentation.
.. [1] *Random walks for image segmentation*, Leo Grady, IEEE Trans. Pattern
Anal. Mach. Intell. 2006 Nov; 28(11):1768-83
"""
print __doc__
import numpy as np
from scipy import ndimage
from skimage.segmentation import random_walker
import matplotlib.pyplot as plt
def microstructure(l=256):
"""
Synthetic binary data: binary microstructure with blobs.
Parameters
----------
l: int, optional
linear size of the returned image
"""
n = 5
x, y = np.ogrid[0:l, 0:l]
mask = np.zeros((l, l))
generator = np.random.RandomState(1)
points = l * generator.rand(2, n ** 2)
mask[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1
mask = ndimage.gaussian_filter(mask, sigma=l / (4. * n))
return (mask > mask.mean()).astype(np.float)
# Generate noisy synthetic data
data = microstructure(l=128)
data += 0.35 * np.random.randn(*data.shape)
markers = np.zeros(data.shape, dtype=np.uint)
markers[data < -0.3] = 1
markers[data > 1.3] = 2
# Run random walker algorithm
labels = random_walker(data, markers, beta=10, mode='bf')
# Plot results
plt.figure(figsize=(9, 3.5))
plt.subplot(131)
plt.imshow(data, cmap='gray', interpolation='nearest')
plt.axis('off')
plt.title('Noisy data')
plt.subplot(132)
plt.imshow(markers, cmap='hot', interpolation='nearest')
plt.axis('off')
plt.title('Markers')
plt.subplot(133)
plt.imshow(labels, cmap='gray', interpolation='nearest')
plt.axis('off')
plt.title('Segmentation')
plt.subplots_adjust(hspace=0.01, wspace=0.01, top=1, bottom=0, left=0,
right=1)
plt.show()
+14 -7
View File
@@ -42,25 +42,32 @@ def watershed(image, markers, connectivity=None, offset=None, mask=None):
image: ndarray (2-D, 3-D, ...) of integers
Data array where the lowest value points are labeled first.
markers: ndarray of the same shape as `image`
An array marking the basins with the values to be assigned in the
label matrix. Zero means not a marker. This array should be of an
An array marking the basins with the values to be assigned in the
label matrix. Zero means not a marker. This array should be of an
integer type.
connectivity: ndarray, optional
An array with the same number of dimensions as `image` whose
non-zero elements indicate neighbors for connection.
Following the scipy convention, default is a one-connected array of
Following the scipy convention, default is a one-connected array of
the dimension of the image.
offset: array_like of shape image.ndim, optional
offset of the connectivity (one offset per dimension)
mask: ndarray of bools or 0s and 1s, optional
Array of same shape as `image`. Only points at which mask == True
Array of same shape as `image`. Only points at which mask == True
will be labeled.
Returns
Returns
-------
out: ndarray
A labeled matrix of the same type and shape as markers
See also
--------
skimage.segmentation.random_walker: random walker segmentation
A segmentation algorithm based on anisotropic diffusion, usually
slower than the watershed but with good results on noisy data and
boundaries with holes.
Notes
-----
+1
View File
@@ -0,0 +1 @@
from random_walker_segmentation import random_walker
@@ -0,0 +1,367 @@
"""
Random walker segmentation algorithm
from *Random walks for image segmentation*, Leo Grady, IEEE Trans
Pattern Anal Mach Intell. 2006 Nov;28(11):1768-83.
Installing pyamg and using the 'cg_mg' mode of random_walker improves
significantly the performance.
"""
import warnings
import numpy as np
from scipy import sparse, ndimage
try:
from scipy.sparse.linalg.dsolve import umfpack
u = umfpack.UmfpackContext()
except:
warnings.warn("""Scipy was built without UMFPACK. Consider rebuilding
Scipy with UMFPACK, this will greatly speed up the random walker
functions. You may also install pyamg and run the random walker function
in cg_mg mode (see the docstrings)
""")
try:
from pyamg import ruge_stuben_solver
amg_loaded = True
except ImportError:
amg_loaded = False
from scipy.sparse.linalg import cg
#-----------Laplacian--------------------
def _make_graph_edges_3d(n_x, n_y, n_z):
"""
Returns a list of edges for a 3D image.
Parameters
----------
n_x: integer
The size of the grid in the x direction.
n_y: integer
The size of the grid in the y direction
n_z: integer
The size of the grid in the z direction
Returns
-------
edges : (2, N) ndarray
with the total number of edges N = n_x * n_y * (nz - 1) +
n_x * (n_y - 1) * nz +
(n_x - 1) * n_y * nz
Graph edges with each column describing a node-id pair.
"""
vertices = np.arange(n_x * n_y * n_z).reshape((n_x, n_y, n_z))
edges_deep = np.vstack((vertices[:, :, :-1].ravel(),
vertices[:, :, 1:].ravel()))
edges_right = np.vstack((vertices[:, :-1].ravel(),
vertices[:, 1:].ravel()))
edges_down = np.vstack((vertices[:-1].ravel(), vertices[1:].ravel()))
edges = np.hstack((edges_deep, edges_right, edges_down))
return edges
def _compute_weights_3d(data, beta=130, eps=1.e-6):
gradients = _compute_gradients_3d(data) ** 2
beta /= 10 * data.std()
gradients *= beta
weights = np.exp(- gradients)
weights += eps
return weights
def _compute_gradients_3d(data):
gr_deep = np.abs(data[:, :, :-1] - data[:, :, 1:]).ravel()
gr_right = np.abs(data[:, :-1] - data[:, 1:]).ravel()
gr_down = np.abs(data[:-1] - data[1:]).ravel()
return np.r_[gr_deep, gr_right, gr_down]
def _make_laplacian_sparse(edges, weights):
"""
Sparse implementation
"""
pixel_nb = edges.max() + 1
diag = np.arange(pixel_nb)
i_indices = np.hstack((edges[0], edges[1]))
j_indices = np.hstack((edges[1], edges[0]))
data = np.hstack((-weights, -weights))
lap = sparse.coo_matrix((data, (i_indices, j_indices)),
shape=(pixel_nb, pixel_nb))
connect = - np.ravel(lap.sum(axis=1))
lap = sparse.coo_matrix((np.hstack((data, connect)),
(np.hstack((i_indices, diag)), np.hstack((j_indices, diag)))),
shape=(pixel_nb, pixel_nb))
return lap.tocsr()
def _clean_labels_ar(X, labels):
labels = np.ravel(labels)
labels[labels == 0] = X
return labels
def _buildAB(lap_sparse, labels):
"""
Build the matrix A and rhs B of the linear system to solve.
A and B are two block of the laplacian of the image graph.
"""
labels = labels[labels >= 0]
indices = np.arange(labels.size)
unlabeled_indices = indices[labels == 0]
seeds_indices = indices[labels > 0]
# The following two lines take most of the time in this function
B = lap_sparse[unlabeled_indices][:, seeds_indices]
lap_sparse = lap_sparse[unlabeled_indices][:, unlabeled_indices]
nlabels = labels.max()
rhs = []
for lab in range(1, nlabels + 1):
mask = (labels[seeds_indices] == lab)
fs = sparse.csr_matrix(mask)
fs = fs.transpose()
rhs.append(B * fs)
return lap_sparse, rhs
def _mask_edges_weights(edges, weights, mask):
"""
Remove edges of the graph connected to masked nodes, as well as
corresponding weights of the edges.
"""
mask0 = np.hstack((mask[:, :, :-1].ravel(), mask[:, :-1].ravel(),
mask[:-1].ravel()))
mask1 = np.hstack((mask[:, :, 1:].ravel(), mask[:, 1:].ravel(),
mask[1:].ravel()))
ind_mask = np.logical_and(mask0, mask1)
edges, weights = edges[:, ind_mask], weights[ind_mask]
max_node_index = edges.max()
# Reassign edges labels to 0, 1, ... edges_number - 1
order = np.searchsorted(np.unique(edges.ravel()),
np.arange(max_node_index + 1))
edges = order[edges]
return edges, weights
def _build_laplacian(data, mask=None, beta=50):
l_x, l_y, l_z = data.shape
edges = _make_graph_edges_3d(l_x, l_y, l_z)
weights = _compute_weights_3d(data, beta=beta, eps=1.e-10)
if mask is not None:
edges, weights = _mask_edges_weights(edges, weights, mask)
lap = _make_laplacian_sparse(edges, weights)
del edges, weights
return lap
#----------- Random walker algorithm --------------------------------
def random_walker(data, labels, beta=130, mode='bf', tol=1.e-3, copy=True):
"""
Random walker algorithm for segmentation from markers.
Parameters
----------
data : array_like
Image to be segmented in phases. `data` can be two- or
three-dimensional.
labels : array of ints, of same shape as `data`
Array of seed markers labeled with different positive integers
for different phases. Zero-labeled pixels are unlabeled pixels.
Negative labels correspond to inactive pixels that are not taken
into account (they are removed from the graph).
beta : float
Penalization coefficient for the random walker motion
(the greater `beta`, the more difficult the diffusion).
mode : {'bf', 'cg_mg', 'cg'} (default: 'bf')
Mode for solving the linear system in the random walker
algorithm.
- 'bf' (brute force, default): an LU factorization of the
Laplacian is computed. This is fast for small images (<1024x1024),
but very slow (due to the memory cost) and memory-consuming for
big images (in 3-D for example).
- 'cg' (conjugate gradient): the linear system is solved
iteratively using the Conjugate Gradient method from
scipy.sparse.linalg. This is less memory-consuming than the
brute force method for large images, but it is quite slow.
- 'cg_mg' (conjugate gradient with multigrid preconditioner):
a preconditioner is computed using a multigrid solver, then
the solution is computed with the Conjugate Gradient method.
This mode requires that the pyamg module
(http://code.google.com/p/pyamg/) is installed. For images of
size > 512x512, this is the recommended (fastest) mode.
tol : float
tolerance to achieve when solving the linear system, in
cg' and 'cg_mg' modes.
copy : bool
If copy is False, the `labels` array will be overwritten with
the result of the segmentation. Use copy=False if you want to
save on memory.
Returns
-------
output : ndarray of ints
Array in which each pixel has been labeled according to the marker
that reached the pixel first by anisotropic diffusion.
See also
--------
skimage.morphology.watershed: watershed segmentation
A segmentation algorithm based on mathematical morphology
and "flooding" of regions from markers.
Notes
-----
The algorithm was first proposed in *Random walks for image
segmentation*, Leo Grady, IEEE Trans Pattern Anal Mach Intell.
2006 Nov;28(11):1768-83.
The algorithm solves the diffusion equation at infinite times for
sources placed on markers of each phase in turn. A pixel is labeled with
the phase that has the greatest probability to diffuse first to the pixel.
The diffusion equation is solved by minimizing x.T L x for each phase,
where L is the Laplacian of the weighted graph of the image, and x is
the probability that a marker of the given phase arrives first at a pixel
by diffusion (x=1 on markers of the phase, x=0 on the other markers, and
the other coefficients are looked for). Each pixel is attributed the label
for which it has a maximal value of x. The Laplacian L of the image
is defined as:
- L_ii = d_i, the number of neighbors of pixel i (the degree of i)
- L_ij = -w_ij if i and j are adjacent pixels
The weight w_ij is a decreasing function of the norm of the local gradient.
This ensures that diffusion is easier between pixels of similar values.
When the Laplacian is decomposed into blocks of marked and unmarked pixels
L = M B.T
B A
with first indices corresponding to marked pixels, and then to unmarked
pixels, minimizing x.T L x for one phase amount to solving
A x = - B x_m
where x_m=1 on markers of the given phase, and 0 on other markers.
This linear system is solved in the algorithm using a direct method for
small images, and an iterative method for larger images.
Examples
--------
>>> a = np.zeros((10, 10)) + 0.2*np.random.random((10, 10))
>>> a[5:8, 5:8] += 1
>>> b = np.zeros_like(a)
>>> b[3,3] = 1 #Marker for first phase
>>> b[6,6] = 2 #Marker for second phase
>>> random_walker(a, b)
array([[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[ 1., 1., 1., 1., 1., 2., 2., 2., 1., 1.],
[ 1., 1., 1., 1., 1., 2., 2., 2., 1., 1.],
[ 1., 1., 1., 1., 1., 2., 2., 2., 1., 1.],
[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
"""
# We work with 3-D arrays
data = np.atleast_3d(data)
if copy:
labels = np.copy(labels)
labels = labels.astype(np.int)
# If the array has pruned zones, be sure that no isolated pixels
# exist between pruned zones (they could not be determined)
if np.any(labels < 0):
filled = ndimage.binary_propagation(labels > 0, mask=labels >= 0)
labels[np.logical_and(np.logical_not(filled), labels == 0)] = -1
del filled
labels = np.atleast_3d(labels)
if np.any(labels < 0):
lap_sparse = _build_laplacian(data, mask=labels >= 0, beta=beta)
else:
lap_sparse = _build_laplacian(data, beta=beta)
lap_sparse, B = _buildAB(lap_sparse, labels)
# We solve the linear system
# lap_sparse X = B
# where X[i, j] is the probability that a marker of label i arrives
# first at pixel j by anisotropic diffusion.
if mode == 'cg':
X = _solve_cg(lap_sparse, B, tol=tol)
if mode == 'cg_mg':
if not amg_loaded:
warnings.warn(
"""pyamg (http://code.google.com/p/pyamg/)) is needed to use
this mode, but is not installed. The 'cg' mode will be used
instead.""")
X = _solve_cg(lap_sparse, B, tol=tol)
else:
X = _solve_cg_mg(lap_sparse, B, tol=tol)
if mode == 'bf':
X = _solve_bf(lap_sparse, B)
X = _clean_labels_ar(X + 1, labels)
data = np.squeeze(data)
return X.reshape(data.shape)
def _solve_bf(lap_sparse, B):
"""
solves lap_sparse X_i = B_i for each phase i. An LU decomposition
of lap_sparse is computed first. For each pixel, the label i
corresponding to the maximal X_i is returned.
"""
lap_sparse = lap_sparse.tocsc()
solver = sparse.linalg.factorized(lap_sparse.astype(np.double))
X = np.array([solver(np.array((-B[i]).todense()).ravel())\
for i in range(len(B))])
X = np.argmax(X, axis=0)
return X
def _solve_cg(lap_sparse, B, tol):
"""
solves lap_sparse X_i = B_i for each phase i, using the conjugate
gradient method. For each pixel, the label i corresponding to the
maximal X_i is returned.
"""
lap_sparse = lap_sparse.tocsc()
X = []
for i in range(len(B)):
x0 = cg(lap_sparse, -B[i].todense(), tol=tol)[0]
X.append(x0)
X = np.array(X)
X = np.argmax(X, axis=0)
return X
def _solve_cg_mg(lap_sparse, B, tol):
"""
solves lap_sparse X_i = B_i for each phase i, using the conjugate
gradient method with a multigrid preconditioner (ruge-stuben from
pyamg). For each pixel, the label i corresponding to the maximal
X_i is returned.
"""
X = []
ml = ruge_stuben_solver(lap_sparse)
M = ml.aspreconditioner(cycle='V')
for i in range(len(B)):
x0 = cg(lap_sparse, -B[i].todense(), tol=tol, M=M, maxiter=30)[0]
X.append(x0)
X = np.array(X)
X = np.argmax(X, axis=0)
return X
@@ -0,0 +1,111 @@
import numpy as np
from skimage.segmentation import random_walker
try:
import pyamg
amg_loaded = True
except ImportError:
amg_loaded = False
def make_2d_syntheticdata(lx, ly=None):
if ly is None:
ly = lx
data = np.zeros((lx, ly)) + 0.1 * np.random.randn(lx, ly)
small_l = int(lx / 5)
data[lx / 2 - small_l:lx / 2 + small_l,
ly / 2 - small_l:ly / 2 + small_l] = 1
data[lx / 2 - small_l + 1:lx / 2 + small_l - 1,
ly / 2 - small_l + 1:ly / 2 + small_l - 1] = \
0.1 * np.random.randn(2 * small_l - 2, 2 * small_l - 2)
data[lx / 2 - small_l, ly / 2 - small_l / 8:ly / 2 + small_l / 8] = 0
seeds = np.zeros_like(data)
seeds[lx / 5, ly / 5] = 1
seeds[lx / 2 + small_l / 4, ly / 2 - small_l / 4] = 2
return data, seeds
def make_3d_syntheticdata(lx, ly=None, lz=None):
if ly is None:
ly = lx
if lz is None:
lz = lx
data = np.zeros((lx, ly, lz)) + 0.1 * np.random.randn(lx, ly, lz)
small_l = int(lx / 5)
data[lx / 2 - small_l:lx / 2 + small_l,
ly / 2 - small_l:ly / 2 + small_l,
lz / 2 - small_l:lz / 2 + small_l] = 1
data[lx / 2 - small_l + 1:lx / 2 + small_l - 1,
ly / 2 - small_l + 1:ly / 2 + small_l - 1,
lz / 2 - small_l + 1:lz / 2 + small_l - 1] = 0
# make a hole
hole_size = np.max([1, small_l / 8])
data[lx / 2 - small_l,
ly / 2 - hole_size:ly / 2 + hole_size,\
lz / 2 - hole_size:lz / 2 + hole_size] = 0
seeds = np.zeros_like(data)
seeds[lx / 5, ly / 5, lz / 5] = 1
seeds[lx / 2 + small_l / 4, ly / 2 - small_l / 4, lz / 2 - small_l / 4] = 2
return data, seeds
def test_2d_bf():
lx = 70
ly = 100
data, labels = make_2d_syntheticdata(lx, ly)
labels_bf = random_walker(data, labels, beta=90, mode='bf')
assert (labels_bf[25:45, 40:60] == 2).all()
return data, labels_bf
def test_2d_cg():
lx = 70
ly = 100
data, labels = make_2d_syntheticdata(lx, ly)
labels_cg = random_walker(data, labels, beta=90, mode='cg')
assert (labels_cg[25:45, 40:60] == 2).all()
return data, labels_cg
def test_2d_cg_mg():
lx = 70
ly = 100
data, labels = make_2d_syntheticdata(lx, ly)
labels_cg_mg = random_walker(data, labels, beta=90, mode='cg_mg')
assert (labels_cg_mg[25:45, 40:60] == 2).all()
return data, labels_cg_mg
def test_2d_inactive():
lx = 70
ly = 100
data, labels = make_2d_syntheticdata(lx, ly)
labels[10:20, 10:20] = -1
labels[46:50, 33:38] = -2
labels = random_walker(data, labels, beta=90)
assert (labels.reshape((lx, ly))[25:45, 40:60] == 2).all()
return data, labels
def test_3d():
n = 30
lx, ly, lz = n, n, n
data, labels = make_3d_syntheticdata(lx, ly, lz)
labels = random_walker(data, labels, mode='cg')
assert (labels.reshape(data.shape)[13:17, 13:17, 13:17] == 2).all()
return data, labels
def test_3d_inactive():
n = 30
lx, ly, lz = n, n, n
data, labels = make_3d_syntheticdata(lx, ly, lz)
old_labels = np.copy(labels)
labels[5:25, 26:29, 26:29] = -1
after_labels = np.copy(labels)
labels = random_walker(data, labels, mode='cg')
assert (labels.reshape(data.shape)[13:17, 13:17, 13:17] == 2).all()
return data, labels, old_labels, after_labels
if __name__ == '__main__':
from numpy import testing
testing.run_module_suite()