diff --git a/skimage/measure/_ccomp.pyx b/skimage/measure/_ccomp.pyx index 26b9645a..4ccb932a 100644 --- a/skimage/measure/_ccomp.pyx +++ b/skimage/measure/_ccomp.pyx @@ -24,15 +24,166 @@ See also: """ +DTYPE = np.intp + +# Short int - could be more graceful to the CPU cache +ctypedef cnp.int32_t INTS_t + +cdef struct s_shpinfo + +ctypedef s_shpinfo shpinfo +ctypedef s_bginfo bginfo +ctypedef int (* fun_ravel)(int, int, int, shpinfo *) + + +cdef struct s_bginfo: + DTYPE_t background_val + DTYPE_t background_node + + +# Structure for centralised access to shape data +cdef struct s_shpinfo: + INTS_t x + INTS_t y + INTS_t z + + DTYPE_t numels + INTS_t ndim + + #INTS_t Dee + INTS_t Ded + + INTS_t Dea + INTS_t Deb + INTS_t Dec + + INTS_t Def + INTS_t Deg + INTS_t Deh + INTS_t Dei + INTS_t Dej + INTS_t Dek + INTS_t Del + INTS_t Dem + INTS_t Den + + fun_ravel ravel_index + + +cdef shpinfo get_triple(inarr_shape): + cdef shpinfo res = shpinfo() + + res.y = 1 + res.z = 1 + res.ravel_index = ravel_index2D + + res.ndim = len(inarr_shape) + if res.ndim == 1: + res.x = inarr_shape[0] + res.ravel_index = ravel_index1D + elif res.ndim == 2: + res.x = inarr_shape[1] + res.y = inarr_shape[0] + res.ravel_index = ravel_index2D + elif res.ndim == 3: + res.x = inarr_shape[2] + res.y = inarr_shape[1] + res.z = inarr_shape[0] + res.ravel_index = ravel_index3D + else: + assert "Only for images of dimension 1-3 (got %s)" % res.ndim + + res.numels = res.x * res.y * res.z + + # Our point of interest is E. + # z=1 z=0 x + # ----------------------> + # | A B C F G H + # | D E . I J K + # | . . . L M N + # | + # y V + # + # Difference between E and G is (x=0, y=-1, z=-1), E and A (-1, -1, 0) etc. + # Here, it is recalculated to linear (raveled) indices of flattened arrays + # with their last (=contiguous) dimension is x. + + # So now the 1st (needed for 1D, 2D and 3D) part, y = 1, z = 1 + res.Ded = - 1 + + # Not needed, just for illustration + # + enabling it prolongs the exec time quite considerably - why? + #res.Dee = 0 + + # So now the 2nd (needed for 2D and 3D) part, y = 0, z = 1 + res.Dea = res.ravel_index(-1, -1, 0, & res) + res.Deb = res.Dea + 1 + res.Dec = res.Deb + 1 + + # And now the 3rd (needed only for 3D) part, z = 0 + res.Def = res.ravel_index(-1, -1, -1, & res) + res.Deg = res.Def + 1 + res.Deh = res.Def + 2 + res.Dei = res.Def - res.Deb # Deb = one row up, remember? + res.Dej = res.Dei + 1 + res.Dek = res.Dei + 2 + res.Del = res.Dei - 2 * res.Deb + res.Dem = res.Del + 1 + res.Den = res.Del + 2 + + return res + + +cdef int ravel_index1D(int x, int y, int z, shpinfo * shapeinfo): + """ + Ravel index of a 1D array - trivial. y and z are ignored. + """ + return x + + +cdef int ravel_index2D(int x, int y, int z, shpinfo * shapeinfo): + """ + Ravel index of a 2D array. z is ignored + """ + cdef int ret = x + y * shapeinfo.x + return ret + + +cdef int ravel_index3D(int x, int y, int z, shpinfo * shapeinfo): + """ + Ravel index of a 3D array + """ + cdef int ret = x + y * shapeinfo.x + z * shapeinfo.y * shapeinfo.x + return ret + + # Tree operations implemented by an array as described in Wu et al. # The term "forest" is used to indicate an array that stores one or more trees -DTYPE = np.intp - +# Consider a following tree: +# +# 5 ----> 3 ----> 2 ----> 1 <---- 6 <---- 7 +# | | +# 4 >----/ \----< 8 <---- 9 +# +# The vertices are a unique number, so the tree can be represented by an +# array where a the tuple (index, array[index]) represents an edge, +# so for our example, array[2] == 1, array[7] == 6 and array[1] == 1, because +# 1 is the root. +# Last but not least, one array can hold more than one tree as long as their +# indices are different. It is the case in this algorithm, so for that reason +# the array is referred to as the "forrest" = multiple trees next to each +# other. +# +# In this algorithm, there are as many indices as there are elements in the +# array to label and array[x] == x for all x. As the labelling progresses, +# equivalence between so-called provisional (i.e. not final) labels is +# discovered and trees begin to surface. +# When we found out that label 5 and 3 are the same, we assign array[5] = 3. cdef DTYPE_t find_root(DTYPE_t *forest, DTYPE_t n): """Find the root of node n. - + Given the example above, for any integer from 1 to 9, 1 is always returned """ cdef DTYPE_t root = n while (forest[root] < root): @@ -43,7 +194,10 @@ cdef DTYPE_t find_root(DTYPE_t *forest, DTYPE_t n): cdef inline void set_root(DTYPE_t *forest, DTYPE_t n, DTYPE_t root): """ Set all nodes on a path to point to new_root. - + Given the example above, given n=9, root=6, it would "reconnect" the tree. + so forest[9] = 6 and forest[8] = 6 + The ultimate goal is that all tree nodes point to the real root, + which is element 1 in this case. """ cdef DTYPE_t j while (forest[n] < n): @@ -56,12 +210,17 @@ cdef inline void set_root(DTYPE_t *forest, DTYPE_t n, DTYPE_t root): cdef inline void join_trees(DTYPE_t *forest, DTYPE_t n, DTYPE_t m): """Join two trees containing nodes n and m. - + If we imagine that in the example tree, the root 1 is not known, we + rather have two disjoint trees with roots 2 and 6. + Joining them would mean that all elements of both trees become connected + to the element 2, so forest[9] == 2, forest[6] == 2 etc. + However, when the relationship between 1 and 2 can still be discovered later. """ - cdef DTYPE_t root = find_root(forest, n) + cdef DTYPE_t root cdef DTYPE_t root_m if (n != m): + root = find_root(forest, n) root_m = find_root(forest, m) if (root > root_m): @@ -147,94 +306,153 @@ def label(input, DTYPE_t neighbors=8, background=None, return_num=False): [-1 -1 -1]] """ - cdef DTYPE_t rows = input.shape[0] - cdef DTYPE_t cols = input.shape[1] - cdef cnp.ndarray[DTYPE_t, ndim=2] data = np.array(input, copy=True, - dtype=DTYPE) - cdef cnp.ndarray[DTYPE_t, ndim=2] forest + cdef cnp.ndarray[DTYPE_t, ndim=1] data + cdef cnp.ndarray[DTYPE_t, ndim=1] forest - forest = np.arange(data.size, dtype=DTYPE).reshape((rows, cols)) + # Having data a 2D array slows down access considerably using linear + # indices even when using the data_p pointer :-( + data = input.flatten().astype(DTYPE, copy=True) + forest = np.arange(data.size, dtype=DTYPE) cdef DTYPE_t *forest_p = forest.data cdef DTYPE_t *data_p = data.data - cdef DTYPE_t i, j + cdef shpinfo shapeinfo + cdef bginfo bg - cdef DTYPE_t background_val + shapeinfo = get_triple(input.shape) + + bg.background_val = 0 + bg.background_node = -999 if background is None: - background_val = -1 + bg.background_val = -1 warnings.warn(DeprecationWarning( 'The default value for `background` will change to 0 in v0.12' )) else: - background_val = background - - cdef DTYPE_t background_node = -999 + bg.background_val = background if neighbors != 4 and neighbors != 8: raise ValueError('Neighbors must be either 4 or 8.') - # Initialize the first row - if data[0, 0] == background_val: - link_bg(forest_p, 0, &background_node) - - for j in range(1, cols): - if data[0, j] == background_val: - link_bg(forest_p, j, &background_node) - - if data[0, j] == data[0, j-1]: - join_trees(forest_p, j, j-1) - - for i in range(1, rows): - # Handle the first column - if data[i, 0] == background_val: - link_bg(forest_p, i * cols, &background_node) - - if data[i, 0] == data[i-1, 0]: - join_trees(forest_p, i*cols, (i-1)*cols) - - if neighbors == 8: - if data[i, 0] == data[i-1, 1]: - join_trees(forest_p, i*cols, (i-1)*cols + 1) - - for j in range(1, cols): - if data[i, j] == background_val: - link_bg(forest_p, i * cols + j, &background_node) - - if neighbors == 8: - if data[i, j] == data[i-1, j-1]: - join_trees(forest_p, i*cols + j, (i-1)*cols + j - 1) - - if data[i, j] == data[i-1, j]: - join_trees(forest_p, i*cols + j, (i-1)*cols + j) - - if neighbors == 8: - if j < cols - 1: - if data[i, j] == data[i - 1, j + 1]: - join_trees(forest_p, i*cols + j, (i-1)*cols + j + 1) - - if data[i, j] == data[i, j-1]: - join_trees(forest_p, i*cols + j, i*cols + j - 1) + if shapeinfo.ndim == 1: + scan1D(data_p, forest_p, & shapeinfo, & bg, neighbors, 0, 0) + elif shapeinfo.ndim == 2: + scan2D(data_p, forest_p, & shapeinfo, & bg, neighbors, 0) # Label output cdef DTYPE_t ctr = 0 - for i in range(rows): - for j in range(cols): - if (i*cols + j) == background_node: - data[i, j] = -1 - elif (i*cols + j) == forest[i, j]: - data[i, j] = ctr - ctr = ctr + 1 - else: - data[i, j] = data_p[forest[i, j]] + ctr = resolve_labels(data_p, forest_p, & shapeinfo, & bg) # Work around a bug in ndimage's type checking on 32-bit platforms if data.dtype == np.int32: data = data.view(np.int32) + res = data.reshape(input.shape) + if return_num: - return data, ctr + return res, ctr else: - return data + return res + + +cdef DTYPE_t resolve_labels(DTYPE_t * data_p, DTYPE_t * forest_p, + shpinfo * shapeinfo, bginfo * bg): + """ + We iterate through the provisional labels and assign final labels based on + our knowledge of prov. labels relationship. + We also track how many distinct final labels we have. + """ + cdef DTYPE_t counter = 0 + for i in range(shapeinfo.numels): + if i == bg.background_node: + data_p[i] = -1 + elif i == forest_p[i]: + data_p[i] = counter + counter += 1 + else: + data_p[i] = data_p[forest_p[i]] + return counter + + +# Here, we work with flat arrays regardless whether the data is 1, 2 or 3D. +# The lookup to the neighbor in a 2D array is achieved by precalculating an +# offset and ading it to the index. +# The forward scan mask looks like this (the center point is actually E): +# (take a look at shpinfo docs for more exhaustive info) +# A B C +# D E +# +# So if I am in the point E and want to take a look to A, I take the index of +# E and add shapeinfo.Dea to it and teg the index of A. +# The 1D indices are "raveled" or "linear", that's where "rindex" comes from. + + +cdef scan1D(DTYPE_t * data_p, DTYPE_t * forest_p, shpinfo * shapeinfo, + bginfo * bg, DTYPE_t neighbors, DTYPE_t y, DTYPE_t z): + """ + Perform forward scan on a 1D object, usually the first row of an image + """ + # Initialize the first row + cdef DTYPE_t x, rindex + rindex = shapeinfo.ravel_index(0, y, z, shapeinfo) + + if data_p[rindex] == bg.background_val: + link_bg(forest_p, rindex, & bg.background_node) + + for x in range(1, shapeinfo.x): + rindex += 1 + # Handle the first row + # First row => rindex == j + if data_p[rindex] == bg.background_val: + link_bg(forest_p, rindex, & bg.background_node) + + if data_p[rindex] == data_p[rindex + shapeinfo.Ded]: + join_trees(forest_p, rindex, rindex + shapeinfo.Ded) + + +cdef scan2D(DTYPE_t * data_p, DTYPE_t * forest_p, shpinfo * shapeinfo, + bginfo * bg, DTYPE_t neighbors, DTYPE_t z): + """ + Perform forward scan on a 2D array. + """ + cdef DTYPE_t x, y, rindex + scan1D(data_p, forest_p, shapeinfo, bg, neighbors, 0, z) + for y in range(1, shapeinfo.y): + rindex = shapeinfo.ravel_index(0, y, 0, shapeinfo) + # Handle the first column + if data_p[rindex] == bg.background_val: + link_bg(forest_p, rindex, & bg.background_node) + + if data_p[rindex] == data_p[rindex + shapeinfo.Deb]: + join_trees(forest_p, rindex, rindex + shapeinfo.Deb) + + if neighbors == 8: + if data_p[rindex] == data_p[rindex + shapeinfo.Dec]: + join_trees(forest_p, rindex, rindex + shapeinfo.Dec) + + # Handle the rest of columns + for x in range(1, shapeinfo.x): + # We have just moved to another column (of the same row) + # so we increment the raveled index. It will be reset when we get + # to another row, so we don't have to worry about altering it here. + rindex += 1 + if data_p[rindex] == bg.background_val: + link_bg(forest_p, rindex, & bg.background_node) + + if neighbors == 8: + if data_p[rindex] == data_p[rindex + shapeinfo.Dea]: + join_trees(forest_p, rindex, rindex + shapeinfo.Dea) + + if data_p[rindex] == data_p[rindex + shapeinfo.Deb]: + join_trees(forest_p, rindex, rindex + shapeinfo.Deb) + + if neighbors == 8: + if x < shapeinfo.x - 1: + if data_p[rindex] == data_p[rindex + shapeinfo.Dec]: + join_trees(forest_p, rindex, rindex + shapeinfo.Dec) + + if data_p[rindex] == data_p[rindex + shapeinfo.Ded]: + join_trees(forest_p, rindex, rindex + shapeinfo.Ded)