diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 3e9d2469..8f479945 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -47,7 +47,8 @@ - Emmanuelle Guillart Total variation noise filtering, integration of CellProfiler's - mathematical morphology tools, tutorials, and more. + mathematical morphology tools, random walker segmentation, + tutorials, and more. - Maƫl Primet Total variation noise filtering diff --git a/doc/examples/plot_random_walker_segmentation.py b/doc/examples/plot_random_walker_segmentation.py new file mode 100644 index 00000000..72a6fb7f --- /dev/null +++ b/doc/examples/plot_random_walker_segmentation.py @@ -0,0 +1,77 @@ +""" +========================== +Random walker segmentation +========================== + +The random walker algorithm [1]_ determines the segmentation of an image from +a set of markers labeling several phases (2 or more). An anisotropic diffusion +equation is solved with tracers initiated at the markers' position. The local +diffusivity coefficient is greater if neighboring pixels have similar values, +so that diffusion is difficult across high gradients. The label of each unknown +pixel is attributed to the label of the known marker that has the highest +probability to be reached first during this diffusion process. + +In this example, two phases are clearly visible, but the data are too +noisy to perform the segmentation from the histogram only. We determine +markers of the two phases from the extreme tails of the histogram of gray +values, and use the random walker for the segmentation. + +.. [1] *Random walks for image segmentation*, Leo Grady, IEEE Trans. Pattern + Anal. Mach. Intell. 2006 Nov; 28(11):1768-83 +""" +print __doc__ + +import numpy as np +from scipy import ndimage +from skimage.segmentation import random_walker +import matplotlib.pyplot as plt + + +def microstructure(l=256): + """ + Synthetic binary data: binary microstructure with blobs. + + Parameters + ---------- + + l: int, optional + linear size of the returned image + """ + n = 5 + x, y = np.ogrid[0:l, 0:l] + mask = np.zeros((l, l)) + generator = np.random.RandomState(1) + points = l * generator.rand(2, n ** 2) + mask[(points[0]).astype(np.int), (points[1]).astype(np.int)] = 1 + mask = ndimage.gaussian_filter(mask, sigma=l / (4. * n)) + return (mask > mask.mean()).astype(np.float) + + +# Generate noisy synthetic data +data = microstructure(l=128) +data += 0.35 * np.random.randn(*data.shape) +markers = np.zeros(data.shape, dtype=np.uint) +markers[data < -0.3] = 1 +markers[data > 1.3] = 2 + +# Run random walker algorithm +labels = random_walker(data, markers, beta=10, mode='bf') + +# Plot results +plt.figure(figsize=(9, 3.5)) +plt.subplot(131) +plt.imshow(data, cmap='gray', interpolation='nearest') +plt.axis('off') +plt.title('Noisy data') +plt.subplot(132) +plt.imshow(markers, cmap='hot', interpolation='nearest') +plt.axis('off') +plt.title('Markers') +plt.subplot(133) +plt.imshow(labels, cmap='gray', interpolation='nearest') +plt.axis('off') +plt.title('Segmentation') + +plt.subplots_adjust(hspace=0.01, wspace=0.01, top=1, bottom=0, left=0, + right=1) +plt.show() diff --git a/skimage/morphology/watershed.py b/skimage/morphology/watershed.py index 5c74374a..1966a1a8 100644 --- a/skimage/morphology/watershed.py +++ b/skimage/morphology/watershed.py @@ -42,25 +42,32 @@ def watershed(image, markers, connectivity=None, offset=None, mask=None): image: ndarray (2-D, 3-D, ...) of integers Data array where the lowest value points are labeled first. markers: ndarray of the same shape as `image` - An array marking the basins with the values to be assigned in the - label matrix. Zero means not a marker. This array should be of an + An array marking the basins with the values to be assigned in the + label matrix. Zero means not a marker. This array should be of an integer type. connectivity: ndarray, optional An array with the same number of dimensions as `image` whose non-zero elements indicate neighbors for connection. - Following the scipy convention, default is a one-connected array of + Following the scipy convention, default is a one-connected array of the dimension of the image. offset: array_like of shape image.ndim, optional offset of the connectivity (one offset per dimension) mask: ndarray of bools or 0s and 1s, optional - Array of same shape as `image`. Only points at which mask == True + Array of same shape as `image`. Only points at which mask == True will be labeled. - - Returns + + Returns ------- out: ndarray A labeled matrix of the same type and shape as markers - + + See also + -------- + + skimage.segmentation.random_walker: random walker segmentation + A segmentation algorithm based on anisotropic diffusion, usually + slower than the watershed but with good results on noisy data and + boundaries with holes. Notes ----- diff --git a/skimage/segmentation/__init__.py b/skimage/segmentation/__init__.py new file mode 100644 index 00000000..f6eaea4a --- /dev/null +++ b/skimage/segmentation/__init__.py @@ -0,0 +1 @@ +from random_walker_segmentation import random_walker diff --git a/skimage/segmentation/random_walker_segmentation.py b/skimage/segmentation/random_walker_segmentation.py new file mode 100644 index 00000000..fb26f2bf --- /dev/null +++ b/skimage/segmentation/random_walker_segmentation.py @@ -0,0 +1,367 @@ +""" +Random walker segmentation algorithm + +from *Random walks for image segmentation*, Leo Grady, IEEE Trans +Pattern Anal Mach Intell. 2006 Nov;28(11):1768-83. + +Installing pyamg and using the 'cg_mg' mode of random_walker improves +significantly the performance. +""" + +import warnings + +import numpy as np +from scipy import sparse, ndimage +try: + from scipy.sparse.linalg.dsolve import umfpack + u = umfpack.UmfpackContext() +except: + warnings.warn("""Scipy was built without UMFPACK. Consider rebuilding + Scipy with UMFPACK, this will greatly speed up the random walker + functions. You may also install pyamg and run the random walker function + in cg_mg mode (see the docstrings) + """) +try: + from pyamg import ruge_stuben_solver + amg_loaded = True +except ImportError: + amg_loaded = False +from scipy.sparse.linalg import cg + +#-----------Laplacian-------------------- + + +def _make_graph_edges_3d(n_x, n_y, n_z): + """ + Returns a list of edges for a 3D image. + + Parameters + ---------- + n_x: integer + The size of the grid in the x direction. + n_y: integer + The size of the grid in the y direction + n_z: integer + The size of the grid in the z direction + + Returns + ------- + edges : (2, N) ndarray + with the total number of edges N = n_x * n_y * (nz - 1) + + n_x * (n_y - 1) * nz + + (n_x - 1) * n_y * nz + Graph edges with each column describing a node-id pair. + """ + vertices = np.arange(n_x * n_y * n_z).reshape((n_x, n_y, n_z)) + edges_deep = np.vstack((vertices[:, :, :-1].ravel(), + vertices[:, :, 1:].ravel())) + edges_right = np.vstack((vertices[:, :-1].ravel(), + vertices[:, 1:].ravel())) + edges_down = np.vstack((vertices[:-1].ravel(), vertices[1:].ravel())) + edges = np.hstack((edges_deep, edges_right, edges_down)) + return edges + + +def _compute_weights_3d(data, beta=130, eps=1.e-6): + gradients = _compute_gradients_3d(data) ** 2 + beta /= 10 * data.std() + gradients *= beta + weights = np.exp(- gradients) + weights += eps + return weights + + +def _compute_gradients_3d(data): + gr_deep = np.abs(data[:, :, :-1] - data[:, :, 1:]).ravel() + gr_right = np.abs(data[:, :-1] - data[:, 1:]).ravel() + gr_down = np.abs(data[:-1] - data[1:]).ravel() + return np.r_[gr_deep, gr_right, gr_down] + + +def _make_laplacian_sparse(edges, weights): + """ + Sparse implementation + """ + pixel_nb = edges.max() + 1 + diag = np.arange(pixel_nb) + i_indices = np.hstack((edges[0], edges[1])) + j_indices = np.hstack((edges[1], edges[0])) + data = np.hstack((-weights, -weights)) + lap = sparse.coo_matrix((data, (i_indices, j_indices)), + shape=(pixel_nb, pixel_nb)) + connect = - np.ravel(lap.sum(axis=1)) + lap = sparse.coo_matrix((np.hstack((data, connect)), + (np.hstack((i_indices, diag)), np.hstack((j_indices, diag)))), + shape=(pixel_nb, pixel_nb)) + return lap.tocsr() + + +def _clean_labels_ar(X, labels): + labels = np.ravel(labels) + labels[labels == 0] = X + return labels + + +def _buildAB(lap_sparse, labels): + """ + Build the matrix A and rhs B of the linear system to solve. + A and B are two block of the laplacian of the image graph. + """ + labels = labels[labels >= 0] + indices = np.arange(labels.size) + unlabeled_indices = indices[labels == 0] + seeds_indices = indices[labels > 0] + # The following two lines take most of the time in this function + B = lap_sparse[unlabeled_indices][:, seeds_indices] + lap_sparse = lap_sparse[unlabeled_indices][:, unlabeled_indices] + nlabels = labels.max() + rhs = [] + for lab in range(1, nlabels + 1): + mask = (labels[seeds_indices] == lab) + fs = sparse.csr_matrix(mask) + fs = fs.transpose() + rhs.append(B * fs) + return lap_sparse, rhs + + +def _mask_edges_weights(edges, weights, mask): + """ + Remove edges of the graph connected to masked nodes, as well as + corresponding weights of the edges. + """ + mask0 = np.hstack((mask[:, :, :-1].ravel(), mask[:, :-1].ravel(), + mask[:-1].ravel())) + mask1 = np.hstack((mask[:, :, 1:].ravel(), mask[:, 1:].ravel(), + mask[1:].ravel())) + ind_mask = np.logical_and(mask0, mask1) + edges, weights = edges[:, ind_mask], weights[ind_mask] + max_node_index = edges.max() + # Reassign edges labels to 0, 1, ... edges_number - 1 + order = np.searchsorted(np.unique(edges.ravel()), + np.arange(max_node_index + 1)) + edges = order[edges] + return edges, weights + + +def _build_laplacian(data, mask=None, beta=50): + l_x, l_y, l_z = data.shape + edges = _make_graph_edges_3d(l_x, l_y, l_z) + weights = _compute_weights_3d(data, beta=beta, eps=1.e-10) + if mask is not None: + edges, weights = _mask_edges_weights(edges, weights, mask) + lap = _make_laplacian_sparse(edges, weights) + del edges, weights + return lap + + +#----------- Random walker algorithm -------------------------------- + + +def random_walker(data, labels, beta=130, mode='bf', tol=1.e-3, copy=True): + """ + Random walker algorithm for segmentation from markers. + + Parameters + ---------- + + data : array_like + Image to be segmented in phases. `data` can be two- or + three-dimensional. + + labels : array of ints, of same shape as `data` + Array of seed markers labeled with different positive integers + for different phases. Zero-labeled pixels are unlabeled pixels. + Negative labels correspond to inactive pixels that are not taken + into account (they are removed from the graph). + + beta : float + Penalization coefficient for the random walker motion + (the greater `beta`, the more difficult the diffusion). + + mode : {'bf', 'cg_mg', 'cg'} (default: 'bf') + Mode for solving the linear system in the random walker + algorithm. + + - 'bf' (brute force, default): an LU factorization of the + Laplacian is computed. This is fast for small images (<1024x1024), + but very slow (due to the memory cost) and memory-consuming for + big images (in 3-D for example). + + - 'cg' (conjugate gradient): the linear system is solved + iteratively using the Conjugate Gradient method from + scipy.sparse.linalg. This is less memory-consuming than the + brute force method for large images, but it is quite slow. + + - 'cg_mg' (conjugate gradient with multigrid preconditioner): + a preconditioner is computed using a multigrid solver, then + the solution is computed with the Conjugate Gradient method. + This mode requires that the pyamg module + (http://code.google.com/p/pyamg/) is installed. For images of + size > 512x512, this is the recommended (fastest) mode. + + tol : float + tolerance to achieve when solving the linear system, in + cg' and 'cg_mg' modes. + + copy : bool + If copy is False, the `labels` array will be overwritten with + the result of the segmentation. Use copy=False if you want to + save on memory. + + Returns + ------- + + output : ndarray of ints + Array in which each pixel has been labeled according to the marker + that reached the pixel first by anisotropic diffusion. + + See also + -------- + + skimage.morphology.watershed: watershed segmentation + A segmentation algorithm based on mathematical morphology + and "flooding" of regions from markers. + + Notes + ----- + + The algorithm was first proposed in *Random walks for image + segmentation*, Leo Grady, IEEE Trans Pattern Anal Mach Intell. + 2006 Nov;28(11):1768-83. + + The algorithm solves the diffusion equation at infinite times for + sources placed on markers of each phase in turn. A pixel is labeled with + the phase that has the greatest probability to diffuse first to the pixel. + + The diffusion equation is solved by minimizing x.T L x for each phase, + where L is the Laplacian of the weighted graph of the image, and x is + the probability that a marker of the given phase arrives first at a pixel + by diffusion (x=1 on markers of the phase, x=0 on the other markers, and + the other coefficients are looked for). Each pixel is attributed the label + for which it has a maximal value of x. The Laplacian L of the image + is defined as: + - L_ii = d_i, the number of neighbors of pixel i (the degree of i) + - L_ij = -w_ij if i and j are adjacent pixels + The weight w_ij is a decreasing function of the norm of the local gradient. + This ensures that diffusion is easier between pixels of similar values. + + When the Laplacian is decomposed into blocks of marked and unmarked pixels + + L = M B.T + B A + + with first indices corresponding to marked pixels, and then to unmarked + pixels, minimizing x.T L x for one phase amount to solving + + A x = - B x_m + + where x_m=1 on markers of the given phase, and 0 on other markers. + This linear system is solved in the algorithm using a direct method for + small images, and an iterative method for larger images. + + Examples + -------- + + >>> a = np.zeros((10, 10)) + 0.2*np.random.random((10, 10)) + >>> a[5:8, 5:8] += 1 + >>> b = np.zeros_like(a) + >>> b[3,3] = 1 #Marker for first phase + >>> b[6,6] = 2 #Marker for second phase + >>> random_walker(a, b) + array([[ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + [ 1., 1., 1., 1., 1., 2., 2., 2., 1., 1.], + [ 1., 1., 1., 1., 1., 2., 2., 2., 1., 1.], + [ 1., 1., 1., 1., 1., 2., 2., 2., 1., 1.], + [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], + [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]) + + """ + # We work with 3-D arrays + data = np.atleast_3d(data) + if copy: + labels = np.copy(labels) + labels = labels.astype(np.int) + # If the array has pruned zones, be sure that no isolated pixels + # exist between pruned zones (they could not be determined) + if np.any(labels < 0): + filled = ndimage.binary_propagation(labels > 0, mask=labels >= 0) + labels[np.logical_and(np.logical_not(filled), labels == 0)] = -1 + del filled + labels = np.atleast_3d(labels) + if np.any(labels < 0): + lap_sparse = _build_laplacian(data, mask=labels >= 0, beta=beta) + else: + lap_sparse = _build_laplacian(data, beta=beta) + lap_sparse, B = _buildAB(lap_sparse, labels) + # We solve the linear system + # lap_sparse X = B + # where X[i, j] is the probability that a marker of label i arrives + # first at pixel j by anisotropic diffusion. + if mode == 'cg': + X = _solve_cg(lap_sparse, B, tol=tol) + if mode == 'cg_mg': + if not amg_loaded: + warnings.warn( + """pyamg (http://code.google.com/p/pyamg/)) is needed to use + this mode, but is not installed. The 'cg' mode will be used + instead.""") + X = _solve_cg(lap_sparse, B, tol=tol) + else: + X = _solve_cg_mg(lap_sparse, B, tol=tol) + if mode == 'bf': + X = _solve_bf(lap_sparse, B) + X = _clean_labels_ar(X + 1, labels) + data = np.squeeze(data) + return X.reshape(data.shape) + + +def _solve_bf(lap_sparse, B): + """ + solves lap_sparse X_i = B_i for each phase i. An LU decomposition + of lap_sparse is computed first. For each pixel, the label i + corresponding to the maximal X_i is returned. + """ + lap_sparse = lap_sparse.tocsc() + solver = sparse.linalg.factorized(lap_sparse.astype(np.double)) + X = np.array([solver(np.array((-B[i]).todense()).ravel())\ + for i in range(len(B))]) + X = np.argmax(X, axis=0) + return X + + +def _solve_cg(lap_sparse, B, tol): + """ + solves lap_sparse X_i = B_i for each phase i, using the conjugate + gradient method. For each pixel, the label i corresponding to the + maximal X_i is returned. + """ + lap_sparse = lap_sparse.tocsc() + X = [] + for i in range(len(B)): + x0 = cg(lap_sparse, -B[i].todense(), tol=tol)[0] + X.append(x0) + X = np.array(X) + X = np.argmax(X, axis=0) + return X + + +def _solve_cg_mg(lap_sparse, B, tol): + """ + solves lap_sparse X_i = B_i for each phase i, using the conjugate + gradient method with a multigrid preconditioner (ruge-stuben from + pyamg). For each pixel, the label i corresponding to the maximal + X_i is returned. + """ + X = [] + ml = ruge_stuben_solver(lap_sparse) + M = ml.aspreconditioner(cycle='V') + for i in range(len(B)): + x0 = cg(lap_sparse, -B[i].todense(), tol=tol, M=M, maxiter=30)[0] + X.append(x0) + X = np.array(X) + X = np.argmax(X, axis=0) + return X diff --git a/skimage/segmentation/tests/test_random_walker.py b/skimage/segmentation/tests/test_random_walker.py new file mode 100644 index 00000000..41a86dc3 --- /dev/null +++ b/skimage/segmentation/tests/test_random_walker.py @@ -0,0 +1,111 @@ +import numpy as np +from skimage.segmentation import random_walker +try: + import pyamg + amg_loaded = True +except ImportError: + amg_loaded = False + + +def make_2d_syntheticdata(lx, ly=None): + if ly is None: + ly = lx + data = np.zeros((lx, ly)) + 0.1 * np.random.randn(lx, ly) + small_l = int(lx / 5) + data[lx / 2 - small_l:lx / 2 + small_l, + ly / 2 - small_l:ly / 2 + small_l] = 1 + data[lx / 2 - small_l + 1:lx / 2 + small_l - 1, + ly / 2 - small_l + 1:ly / 2 + small_l - 1] = \ + 0.1 * np.random.randn(2 * small_l - 2, 2 * small_l - 2) + data[lx / 2 - small_l, ly / 2 - small_l / 8:ly / 2 + small_l / 8] = 0 + seeds = np.zeros_like(data) + seeds[lx / 5, ly / 5] = 1 + seeds[lx / 2 + small_l / 4, ly / 2 - small_l / 4] = 2 + return data, seeds + + +def make_3d_syntheticdata(lx, ly=None, lz=None): + if ly is None: + ly = lx + if lz is None: + lz = lx + data = np.zeros((lx, ly, lz)) + 0.1 * np.random.randn(lx, ly, lz) + small_l = int(lx / 5) + data[lx / 2 - small_l:lx / 2 + small_l, + ly / 2 - small_l:ly / 2 + small_l, + lz / 2 - small_l:lz / 2 + small_l] = 1 + data[lx / 2 - small_l + 1:lx / 2 + small_l - 1, + ly / 2 - small_l + 1:ly / 2 + small_l - 1, + lz / 2 - small_l + 1:lz / 2 + small_l - 1] = 0 + # make a hole + hole_size = np.max([1, small_l / 8]) + data[lx / 2 - small_l, + ly / 2 - hole_size:ly / 2 + hole_size,\ + lz / 2 - hole_size:lz / 2 + hole_size] = 0 + seeds = np.zeros_like(data) + seeds[lx / 5, ly / 5, lz / 5] = 1 + seeds[lx / 2 + small_l / 4, ly / 2 - small_l / 4, lz / 2 - small_l / 4] = 2 + return data, seeds + + +def test_2d_bf(): + lx = 70 + ly = 100 + data, labels = make_2d_syntheticdata(lx, ly) + labels_bf = random_walker(data, labels, beta=90, mode='bf') + assert (labels_bf[25:45, 40:60] == 2).all() + return data, labels_bf + + +def test_2d_cg(): + lx = 70 + ly = 100 + data, labels = make_2d_syntheticdata(lx, ly) + labels_cg = random_walker(data, labels, beta=90, mode='cg') + assert (labels_cg[25:45, 40:60] == 2).all() + return data, labels_cg + + +def test_2d_cg_mg(): + lx = 70 + ly = 100 + data, labels = make_2d_syntheticdata(lx, ly) + labels_cg_mg = random_walker(data, labels, beta=90, mode='cg_mg') + assert (labels_cg_mg[25:45, 40:60] == 2).all() + return data, labels_cg_mg + + +def test_2d_inactive(): + lx = 70 + ly = 100 + data, labels = make_2d_syntheticdata(lx, ly) + labels[10:20, 10:20] = -1 + labels[46:50, 33:38] = -2 + labels = random_walker(data, labels, beta=90) + assert (labels.reshape((lx, ly))[25:45, 40:60] == 2).all() + return data, labels + + +def test_3d(): + n = 30 + lx, ly, lz = n, n, n + data, labels = make_3d_syntheticdata(lx, ly, lz) + labels = random_walker(data, labels, mode='cg') + assert (labels.reshape(data.shape)[13:17, 13:17, 13:17] == 2).all() + return data, labels + + +def test_3d_inactive(): + n = 30 + lx, ly, lz = n, n, n + data, labels = make_3d_syntheticdata(lx, ly, lz) + old_labels = np.copy(labels) + labels[5:25, 26:29, 26:29] = -1 + after_labels = np.copy(labels) + labels = random_walker(data, labels, mode='cg') + assert (labels.reshape(data.shape)[13:17, 13:17, 13:17] == 2).all() + return data, labels, old_labels, after_labels + +if __name__ == '__main__': + from numpy import testing + testing.run_module_suite()