diff --git a/bento.info b/bento.info index 227e51e8..d0891272 100644 --- a/bento.info +++ b/bento.info @@ -154,6 +154,9 @@ Library: Extension: skimage.feature._hessian_det_appx Sources: skimage/exposure/_hessian_det_appx.pyx + Extension: skimage.graph._ncut_cy + Sources: + skimage/graph/_ncut_cy.pyx Executable: skivi Module: skimage.scripts.skivi diff --git a/doc/examples/plot_ncut.py b/doc/examples/plot_ncut.py new file mode 100644 index 00000000..6b8a62ab --- /dev/null +++ b/doc/examples/plot_ncut.py @@ -0,0 +1,32 @@ +""" +============== +Normalized Cut +============== + +This example constructs a Region Adjacency Graph (RAG) and recursively performs +a Normalized Cut on it. + +References +---------- +.. [1] Shi, J.; Malik, J., "Normalized cuts and image segmentation", + Pattern Analysis and Machine Intelligence, + IEEE Transactions on, vol. 22, no. 8, pp. 888-905, August 2000. +""" +from skimage import graph, data, io, segmentation, color +from matplotlib import pyplot as plt + + +img = data.coffee() + +labels1 = segmentation.slic(img, compactness=30, n_segments=400) +out1 = color.label2rgb(labels1, img, kind='avg') + +g = graph.rag_mean_color(img, labels1, mode='similarity') +labels2 = graph.cut_normalized(labels1, g) +out2 = color.label2rgb(labels2, img, kind='avg') + +plt.figure() +io.imshow(out1) +plt.figure() +io.imshow(out2) +io.show() diff --git a/skimage/graph/__init__.py b/skimage/graph/__init__.py index 68c1206c..aca73bc4 100644 --- a/skimage/graph/__init__.py +++ b/skimage/graph/__init__.py @@ -1,7 +1,8 @@ from .spath import shortest_path from .mcp import MCP, MCP_Geometric, MCP_Connect, MCP_Flexible, route_through_array from .rag import rag_mean_color, RAG -from .graph_cut import cut_threshold +from .graph_cut import cut_threshold, cut_normalized +ncut = cut_normalized __all__ = ['shortest_path', 'MCP', @@ -11,4 +12,6 @@ __all__ = ['shortest_path', 'route_through_array', 'rag_mean_color', 'cut_threshold', + 'cut_normalized', + 'ncut', 'RAG'] diff --git a/skimage/graph/_ncut.py b/skimage/graph/_ncut.py new file mode 100644 index 00000000..2735bbd2 --- /dev/null +++ b/skimage/graph/_ncut.py @@ -0,0 +1,85 @@ +try: + import networkx as nx +except ImportError: + import warnings + warnings.warn('RAGs require networkx') +import numpy as np +from scipy import sparse +from . import _ncut_cy + + +def DW_matrices(graph): + """Returns the diagonal and weight matrices of a graph. + + Parameters + ---------- + graph : RAG + A Region Adjacency Graph. + + Returns + ------- + D : csc_matrix + The diagonal matrix of the graph. ``D[i, i]`` is the sum of weights of + all edges incident on `i`. All other enteries are `0`. + W : csc_matrix + The weight matrix of the graph. ``W[i, j]`` is the weight of the edge + joining `i` to `j`. + """ + # sparse.eighsh is most efficient with CSC-formatted input + W = nx.to_scipy_sparse_matrix(graph, format='csc') + entries = W.sum(axis=0) + D = sparse.dia_matrix((entries, 0), shape=W.shape).tocsc() + return D, W + + +def ncut_cost(cut, D, W): + """Returns the N-cut cost of a bi-partition of a graph. + + Parameters + ---------- + cut : ndarray + The mask for the nodes in the graph. Nodes corressponding to a `True` + value are in one set. + D : csc_matrix + The diagonal matrix of the graph. + W : csc_matrix + The weight matrix of the graph. + + Returns + ------- + cost : float + The cost of performing the N-cut. + + References + ---------- + .. [1] Normalized Cuts and Image Segmentation, Jianbo Shi and + Jitendra Malik, IEEE Transactions on Pattern Analysis and Machine + Intelligence, Page 889, Equation 2. + """ + cut = np.array(cut) + cut_cost = _ncut_cy.cut_cost(cut, W) + + # D has elements only along the diagonal, one per node, so we can directly + # index the data attribute with cut. + assoc_a = D.data[cut].sum() + assoc_b = D.data[~cut].sum() + + return (cut_cost / assoc_a) + (cut_cost / assoc_b) + + +def normalize(a): + """Normalize values in an array between `0` and `1`. + + Parameters + ---------- + a : ndarray + The array to be normalized. + + Returns + ------- + out : ndarray + The normalized array. + """ + mi = a.min() + mx = a.max() + return (a - mi) / (mx - mi) diff --git a/skimage/graph/_ncut_cy.pyx b/skimage/graph/_ncut_cy.pyx new file mode 100644 index 00000000..14f3a94a --- /dev/null +++ b/skimage/graph/_ncut_cy.pyx @@ -0,0 +1,78 @@ +# cython: cdivision=True +# cython: boundscheck=False +# cython: nonecheck=False +# cython: wraparound=False +cimport numpy as cnp +import numpy as np + + +def argmin2(cnp.double_t[:] array): + """Return the index of the 2nd smallest value in an array. + + Parameters + ---------- + array : array + The array to process. + + Returns + ------- + min_idx2 : int + The index of the second smallest value. + """ + cdef cnp.float64_t min1 = np.inf + cdef cnp.float64_t min2 = np.inf + cdef Py_ssize_t min_idx1 = 0 + cdef Py_ssize_t min_idx2 = 0 + cdef Py_ssize_t i = 0 + cdef Py_ssize_t n = array.shape[0] + + for i in range(n): + x = array[i] + if x < min1: + min2 = min1 + min_idx2 = min_idx1 + min1 = x + min_idx1 = i + elif x > min1 and x < min2: + min2 = x + min_idx2 = i + i += 1 + + return min_idx2 + + +def cut_cost(cut, W): + """Return the total weight of crossing edges in a bi-partition. + + Parameters + ---------- + cut : array + A array of booleans. Elements set to `True` belong to one + set. + W : array + The weight matrix of the graph. + + Returns + ------- + cost : float + The total weight of crossing edges. + """ + cdef cnp.ndarray[cnp.uint8_t, cast = True] cut_mask = np.array(cut) + cdef Py_ssize_t num_rows, num_cols + cdef cnp.int32_t row, col + cdef cnp.int32_t[:] indices = W.indices + cdef cnp.int32_t[:] indptr = W.indptr + cdef cnp.double_t[:] data = W.data.astype(np.double) + cdef cnp.int32_t row_index + cdef cnp.double_t cost = 0 + + num_rows = W.shape[0] + num_cols = W.shape[1] + + for col in range(num_cols): + for row_index in range(indptr[col], indptr[col + 1]): + row = indices[row_index] + if cut_mask[row] != cut_mask[col]: + cost += data[row_index] + + return cost * 0.5 diff --git a/skimage/graph/graph_cut.py b/skimage/graph/graph_cut.py index d9fcfdef..3f973e22 100644 --- a/skimage/graph/graph_cut.py +++ b/skimage/graph/graph_cut.py @@ -2,11 +2,14 @@ try: import networkx as nx except ImportError: import warnings - warnings.warn('"cut_threshold" requires networkx') + warnings.warn('RAGs require networkx') import numpy as np +from . import _ncut +from . import _ncut_cy +from scipy.sparse import linalg -def cut_threshold(labels, rag, thresh): +def cut_threshold(labels, rag, thresh, in_place=True): """Combine regions seperated by weight less than threshold. Given an image's labels and its RAG, output new labels by @@ -22,6 +25,10 @@ def cut_threshold(labels, rag, thresh): thresh : float The threshold. Regions connected by edges with smaller weights are combined. + in_place : bool + If set, modifies `rag` in place. The function will remove the edges + with weights less that `thresh`. If set to `False` the function + makes a copy of `rag` before proceeding. Returns ------- @@ -43,6 +50,9 @@ def cut_threshold(labels, rag, thresh): http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.11.5274 """ + if not in_place: + rag = rag.copy() + # Because deleting edges while iterating through them produces an error. to_remove = [(x, y) for x, y, d in rag.edges_iter(data=True) if d['weight'] >= thresh] @@ -60,3 +70,206 @@ def cut_threshold(labels, rag, thresh): map_array[label] = i return map_array[labels] + + +def cut_normalized(labels, rag, thresh=0.001, num_cuts=10, in_place=True): + """Perform Normalized Graph cut on the Region Adjacency Graph. + + Given an image's labels and its similarity RAG, recursively perform + a 2-way normalized cut on it. All nodes belonging to a subgraph + that cannot be cut further are assigned a unique label in the + output. + + Parameters + ---------- + labels : ndarray + The array of labels. + rag : RAG + The region adjacency graph. + thresh : float + The threshold. A subgraph won't be further subdivided if the + value of the N-cut exceeds `thresh`. + num_cuts : int + The number or N-cuts to perform before determining the optimal one. + in_place : bool + If set, modifies `rag` in place. For each node `n` the function will + set a new attribute ``rag.node[n]['ncut label']``. + + Returns + ------- + out : ndarray + The new labeled array. + + Examples + -------- + >>> from skimage import data, graph, segmentation + >>> img = data.lena() + >>> labels = segmentation.slic(img, compactness=30, n_segments=400) + >>> rag = graph.rag_mean_color(img, labels, mode='similarity') + >>> new_labels = graph.cut_normalized(labels, rag) + + References + ---------- + .. [1] Shi, J.; Malik, J., "Normalized cuts and image segmentation", + Pattern Analysis and Machine Intelligence, + IEEE Transactions on, vol. 22, no. 8, pp. 888-905, August 2000. + + """ + if not in_place: + rag = rag.copy() + + _ncut_relabel(rag, thresh, num_cuts) + + map_array = np.zeros(labels.max() + 1) + # Mapping from old labels to new + for n, d in rag.nodes_iter(data=True): + map_array[d['labels']] = d['ncut label'] + + return map_array[labels] + + +def partition_by_cut(cut, rag): + """Compute resulting subgraphs from given bi-parition. + + Parameters + ---------- + cut : array + A array of booleans. Elements set to `True` belong to one + set. + rag : RAG + The Region Adjacency Graph. + + Returns + ------- + sub1, sub2 : RAG + The two resulting subgraphs from the bi-partition. + """ + # `cut` is derived from `D` and `W` matrices, which also follow the + # ordering returned by `rag.nodes()` because we use + # nx.to_scipy_sparse_matrix. + + # Example + # rag.nodes() = [3, 7, 9, 13] + # cut = [True, False, True, False] + # nodes1 = [3, 9] + # nodes2 = [7, 10] + + nodes1 = [n for i, n in enumerate(rag.nodes()) if cut[i]] + nodes2 = [n for i, n in enumerate(rag.nodes()) if not cut[i]] + + sub1 = rag.subgraph(nodes1) + sub2 = rag.subgraph(nodes2) + + return sub1, sub2 + + +def get_min_ncut(ev, d, w, num_cuts): + """Threshold an eigenvector evenly, to determine minimum ncut. + + Parameters + ---------- + ev : array + The eigenvector to threshold. + d : ndarray + The diagonal matrix of the graph. + w : ndarray + The weight matrix of the graph. + num_cuts : int + The number of evenly spaced thresholds to check for. + + Returns + ------- + mask : array + The array of booleans which denotes the bi-partition. + mcut : float + The value of the minimum ncut. + """ + mcut = np.inf + + # Refer Shi & Malik 2001, Section 3.1.3, Page 892 + # Perform evenly spaced n-cuts and determine the optimal one. + for t in np.linspace(0, 1, num_cuts, endpoint=False): + mask = ev > t + cost = _ncut.ncut_cost(mask, d, w) + if cost < mcut: + min_mask = mask + mcut = cost + + return min_mask, mcut + + +def _label_all(rag, attr_name): + """Assign a unique integer to the given attribute in the RAG. + + This function assumes that all labels in `rag` are unique. It + picks up a random label from them and assigns it to the `attr_name` + attribute of all the nodes. + + rag : RAG + The Region Adjacency Graph. + attr_name : string + The attribute to which a unique integer is assigned. + """ + node = rag.nodes()[0] + new_label = rag.node[node]['labels'][0] + for n, d in rag.nodes_iter(data=True): + d[attr_name] = new_label + + +def _ncut_relabel(rag, thresh, num_cuts): + """Perform Normalized Graph cut on the Region Adjacency Graph. + + Recursively partition the graph into 2, until further subdivision + yields a cut greather than `thresh` or such a cut cannot be computed. + For such a subgraph, indices to labels of all its nodes map to a single + unique value. + + Parameters + ---------- + labels : ndarray + The array of labels. + rag : RAG + The region adjacency graph. + thresh : float + The threshold. A subgraph won't be further subdivided if the + value of the N-cut exceeds `thresh`. + num_cuts : int + The number or N-cuts to perform before determining the optimal one. + map_array : array + The array which maps old labels to new ones. This is modified inside + the function. + """ + d, w = _ncut.DW_matrices(rag) + m = w.shape[0] + + if m > 2: + d2 = d.copy() + # Since d is diagonal, we can directly operate on its data + # the inverse of the square root + d2.data = np.reciprocal(np.sqrt(d2.data, out=d2.data), out=d2.data) + + # Refer Shi & Malik 2001, Equation 7, Page 891 + vals, vectors = linalg.eigsh(d2 * (d - w) * d2, which='SM', + k=min(100, m - 2)) + + # Pick second smallest eigenvector. + # Refer Shi & Malik 2001, Section 3.2.3, Page 893 + vals, vectors = np.real(vals), np.real(vectors) + index2 = _ncut_cy.argmin2(vals) + ev = _ncut.normalize(vectors[:, index2]) + + cut_mask, mcut = get_min_ncut(ev, d, w, num_cuts) + if (mcut < thresh): + # Sub divide and perform N-cut again + # Refer Shi & Malik 2001, Section 3.2.5, Page 893 + sub1, sub2 = partition_by_cut(cut_mask, rag) + + _ncut_relabel(sub1, thresh, num_cuts) + _ncut_relabel(sub2, thresh, num_cuts) + return + + # The N-cut wasn't small enough, or could not be computed. + # The remaining graph is a region. + # Assign `ncut label` by picking any label from the existing nodes, since + # `labels` are unique, `new_label` is also unique. + _label_all(rag, 'ncut label') diff --git a/skimage/graph/rag.py b/skimage/graph/rag.py index 7c1b7f07..5ef9a83b 100644 --- a/skimage/graph/rag.py +++ b/skimage/graph/rag.py @@ -2,6 +2,7 @@ try: import networkx as nx except ImportError: msg = "Graph functions require networkx, which is not installed" + class nx: class Graph: def __init__(self, *args, **kwargs): @@ -11,6 +12,7 @@ except ImportError: import numpy as np from scipy.ndimage import filters from scipy import ndimage as nd +import math def min_weight(graph, src, dst, n): @@ -118,14 +120,15 @@ def _add_edge_filter(values, graph): return 0 -def rag_mean_color(image, labels, connectivity=2): +def rag_mean_color(image, labels, connectivity=2, mode='distance', + sigma=255.0): """Compute the Region Adjacency Graph using mean colors. Given an image and its initial segmentation, this method constructs the corresponsing Region Adjacency Graph (RAG). Each node in the RAG represents a set of pixels within `image` with the same label in `labels`. - The weight between two adjacent regions is the difference in their mean - color. + The weight between two adjacent regions represents how similar or + dissimilar two regions are depending on the `mode` parameter. Parameters ---------- @@ -140,6 +143,23 @@ def rag_mean_color(image, labels, connectivity=2): are considered adjacent. It can range from 1 to `labels.ndim`. Its behavior is the same as `connectivity` parameter in `scipy.ndimage.filters.generate_binary_structure`. + mode : {'distance', 'similarity'}, optional + The strategy to assign edge weights. + + 'distance' : The weight between two adjacent regions is the + :math:`|c_1 - c_2|`, where :math:`c_1` and :math:`c_2` are the mean + colors of the two regions. It represents the Euclidean distance in + their average color. + + 'similarity' : The weight between two adjacent is + :math:`e^{-d^2/sigma}` where :math:`d=|c_1 - c_2|`, where + :math:`c_1` and :math:`c_2` are the mean colors of the two regions. + It represents how similar two regions are. + sigma : float, optional + Used for computation when `mode` is "similarity". It governs how + close to each other two colors should be, for their corresponding edge + weight to be significant. A very large value of `sigma` could make + any two colors behave as though they were similar. Returns ------- @@ -205,8 +225,14 @@ def rag_mean_color(image, labels, connectivity=2): graph.node[n]['mean color'] = (graph.node[n]['total color'] / graph.node[n]['pixel count']) - for x, y in graph.edges_iter(): + for x, y, d in graph.edges_iter(data=True): diff = graph.node[x]['mean color'] - graph.node[y]['mean color'] - graph[x][y]['weight'] = np.linalg.norm(diff) + diff = np.linalg.norm(diff) + if mode == 'similarity': + d['weight'] = math.e ** (-(diff ** 2) / sigma) + elif mode == 'distance': + d['weight'] = diff + else: + raise ValueError("The mode '%s' is not recognised" % mode) return graph diff --git a/skimage/graph/setup.py b/skimage/graph/setup.py index 463d2739..edc7b653 100644 --- a/skimage/graph/setup.py +++ b/skimage/graph/setup.py @@ -17,6 +17,7 @@ def configuration(parent_package='', top_path=None): cython(['_spath.pyx'], working_path=base_path) cython(['_mcp.pyx'], working_path=base_path) cython(['heap.pyx'], working_path=base_path) + cython(['_ncut_cy.pyx'], working_path=base_path) config.add_extension('_spath', sources=['_spath.c'], include_dirs=[get_numpy_include_dirs()]) @@ -24,7 +25,8 @@ def configuration(parent_package='', top_path=None): include_dirs=[get_numpy_include_dirs()]) config.add_extension('heap', sources=['heap.c'], include_dirs=[get_numpy_include_dirs()]) - + config.add_extension('_ncut_cy', sources=['_ncut_cy.c'], + include_dirs=[get_numpy_include_dirs()]) return config if __name__ == '__main__': diff --git a/skimage/graph/tests/test_rag.py b/skimage/graph/tests/test_rag.py index 753bcb74..0e62aa1a 100644 --- a/skimage/graph/tests/test_rag.py +++ b/skimage/graph/tests/test_rag.py @@ -2,6 +2,8 @@ import numpy as np from skimage import graph from skimage._shared.version_requirements import is_installed from numpy.testing.decorators import skipif +from skimage import segmentation +from numpy import testing def max_edge(g, src, dst, n): @@ -62,7 +64,45 @@ def test_threshold_cut(): labels[50:, 50:] = 3 rag = graph.rag_mean_color(img, labels) - new_labels = graph.cut_threshold(labels, rag, 10) - + new_labels = graph.cut_threshold(labels, rag, 10, in_place=False) # Two labels assert new_labels.max() == 1 + + new_labels = graph.cut_threshold(labels, rag, 10) + # Two labels + assert new_labels.max() == 1 + + +@skipif(not is_installed('networkx')) +def test_cut_normalized(): + + img = np.zeros((100, 100, 3), dtype='uint8') + img[:50, :50] = 255, 255, 255 + img[:50, 50:] = 254, 254, 254 + img[50:, :50] = 2, 2, 2 + img[50:, 50:] = 1, 1, 1 + + labels = np.zeros((100, 100), dtype='uint8') + labels[:50, :50] = 0 + labels[:50, 50:] = 1 + labels[50:, :50] = 2 + labels[50:, 50:] = 3 + + rag = graph.rag_mean_color(img, labels, mode='similarity') + + new_labels = graph.cut_normalized(labels, rag, in_place=False) + new_labels, _, _ = segmentation.relabel_sequential(new_labels) + # Two labels + assert new_labels.max() == 1 + + new_labels = graph.cut_normalized(labels, rag) + new_labels, _, _ = segmentation.relabel_sequential(new_labels) + assert new_labels.max() == 1 + + +@skipif(not is_installed('networkx')) +def test_rag_error(): + img = np.zeros((10, 10, 3), dtype='uint8') + labels = np.zeros((10, 10), dtype='uint8') + testing.assert_raises(ValueError, graph.rag_mean_color, img, labels, + 2, 'non existant mode')