Add initial 3D modifications (not working)

2026-07-01 05:08:02 +08:00 · 2013-03-14 01:56:14 +11:00
parent 0132998ca1
commit a2e32cc90c
2 changed files with 98 additions and 56 deletions
@@ -2,6 +2,7 @@
 #cython: boundscheck=False
 #cython: nonecheck=False
 #cython: wraparound=False
+import collections as coll
 import numpy as np
 from time import time
 from scipy import ndimage
@@ -13,24 +14,28 @@ from ..color import rgb2lab, gray2rgb


 def slic(image, n_segments=100, ratio=10., max_iter=10, sigma=1,
-         convert2lab=True):
+         multichannel=True, convert2lab=True):
    """Segments image using k-means clustering in Color-(x,y) space.

    Parameters
    ----------
-    image : (width, height [, 3]) ndarray
-        Input image.
-    n_segments : int, optional (default 100)
+    image : (width, height [, depth] [, 3]) ndarray
+        Input image, which can be 2D or 3D, and grayscale or multi-channel
+        (see `multichannel` parameter).
+    n_segments : int, optional (default: 100)
        The (approximate) number of labels in the segmented output image.
-    ratio: float, optional (default 10)
+    ratio: float, optional (default: 10)
        Balances color-space proximity and image-space proximity.
        Higher values give more weight to color-space.
-    max_iter : int, optional (default 10)
+    max_iter : int, optional (default: 10)
        Maximum number of iterations of k-means.
-    sigma : float, optional (default 1)
+    sigma : float, optional (default: 1)
        Width of Gaussian smoothing kernel for preprocessing. Zero means no
        smoothing.
-    convert2lab : bool, optional (default True)
+    multichannel : bool, optional (default: True)
+        Whether the last axis of the image is to be interpreted as multiple
+        channels. Only 3 channels are supported.
+    convert2lab : bool, optional (default: True)
        Whether the input should be converted to Lab colorspace prior to
        segmentation.  For this purpose, the input is assumed to be RGB. Highly
        recommended.
@@ -40,9 +45,19 @@ def slic(image, n_segments=100, ratio=10., max_iter=10, sigma=1,
    segment_mask : (width, height) ndarray
        Integer mask indicating segment labels.

+    Raises
+    ------
+    ValueError
+        If:
+            - the image dimension is not 2 or 3 and `multichannel == False`, OR
+            - the image dimension is not 3 or 4 and `multichannel == True`, OR
+            - `multichannel == True` and the length of the last dimension of
+            the image is not 3.
+
    Notes
    -----
-    The image is smoothed using a Gaussian kernel prior to segmentation.
+    The image is optionally smoothed using a Gaussian kernel prior to
+    segmentation.

    References
    ----------
@@ -59,42 +74,64 @@ def slic(image, n_segments=100, ratio=10., max_iter=10, sigma=1,
    >>> # Increasing the ratio parameter yields more square regions
    >>> segments = slic(img, n_segments=100, ratio=20)
    """
-    if image.ndim == 2:
+    if ((not multichannel and image.ndim not in [2, 3]) or
+            (multichannel and image.ndim not in [3, 4]) or
+            (multichannel and image.shape[-1] != 3)):
+        ValueError("Only 1- or 3-channel 2- or 3-D images are supported.")
+    if image.ndim in [2, 3] and not multichannel:
        image = gray2rgb(image)
-    if image.ndim != 3 or image.shape[2] != 3:
-        ValueError("Only 1- or 3-channel 2D images are supported.")
-    image = ndimage.gaussian_filter(img_as_float(image), [sigma, sigma, 0])
+    if image.ndim == 3:
+        # See 2D RGB image as 3D RGB image with Z = 1
+        image = image[np.newaxis, ...]
+    if not isinstance(sigma, coll.Iterable):
+        sigma = np.array([sigma, sigma, sigma, 0])
+    if (sigma > 0).any():
+        image = ndimage.gaussian_filter(img_as_float(image), sigma)
    if convert2lab:
        image = rgb2lab(image)

    # initialize on grid:
-    cdef Py_ssize_t height, width
-    height, width = image.shape[:2]
+    cdef Py_ssize_t depth, height, width
+    depth, height, width = image.shape[:3]
    # approximate grid size for desired n_segments
-    cdef Py_ssize_t step = int(np.ceil(np.sqrt(height * width / n_segments)))
-    grid_y, grid_x = np.mgrid[:height, :width]
-    means_y = grid_y[::step, ::step]
-    means_x = grid_x[::step, ::step]
+    cdef Py_ssize_t step = int(np.ceil(
+                              (depth * height * width / n_segments) **
+                              (1.0/3)))
+    grid_z, grid_y, grid_x = np.mgrid[:depth, :height, :width]
+    means_z = grid_z[::step, ::step, ::step]
+    means_y = grid_y[::step, ::step, ::step]
+    means_x = grid_x[::step, ::step, ::step]

-    means_color = np.zeros((means_y.shape[0], means_y.shape[1], 3))
-    cdef cnp.ndarray[dtype=cnp.float_t, ndim=2] means \
-            = np.dstack([means_y, means_x, means_color]).reshape(-1, 5)
+    means_color = np.zeros(means_z.shape + (3,))
+    cdef cnp.ndarray[dtype=cnp.float_t, ndim=2] means = \
+            np.concatenate([
+                            means_z[..., np.newaxis],
+                            means_y[..., np.newaxis],
+                            means_x[..., np.newaxis],
+                            means_color
+                           ], axis=-1).reshape(-1, 6)
    cdef cnp.float_t* current_mean
    cdef cnp.float_t* mean_entry
    n_means = means.shape[0]
    # we do the scaling of ratio in the same way as in the SLIC paper
    # so the values have the same meaning
    ratio = (ratio / float(step)) ** 2
-    cdef cnp.ndarray[dtype=cnp.float_t, ndim=3] image_yx \
-            = np.dstack([grid_y, grid_x, image / ratio]).copy("C")
-    cdef Py_ssize_t i, k, x, y, x_min, x_max, y_min, y_max, changes
+    cdef cnp.ndarray[dtype=cnp.float_t, ndim=4] image_zyx \
+            = np.concatenate([
+                              grid_y[..., np.newaxis],
+                              grid_x[..., np.newaxis],
+                              grid_z[..., np.newaxis],
+                              image / ratio
+                             ], axis=-1).copy("C")
+    cdef Py_ssize_t i, k, x, y, z, x_min, x_max, y_min, y_max, z_min, z_max, \
+            changes
    cdef double dist_mean

-    cdef cnp.ndarray[dtype=cnp.intp_t, ndim=2] nearest_mean \
-            = np.zeros((height, width), dtype=np.intp)
-    cdef cnp.ndarray[dtype=cnp.float_t, ndim=2] distance \
-            = np.empty((height, width))
-    cdef cnp.float_t* image_p = <cnp.float_t*> image_yx.data
+    cdef cnp.ndarray[dtype=cnp.intp_t, ndim=3] nearest_mean \
+            = np.zeros((depth, height, width), dtype=np.intp)
+    cdef cnp.ndarray[dtype=cnp.float_t, ndim=3] distance \
+            = np.empty((depth, height, width))
+    cdef cnp.float_t* image_p = <cnp.float_t*> image_zyx.data
    cdef cnp.float_t* distance_p = <cnp.float_t*> distance.data
    cdef cnp.float_t* current_distance
    cdef cnp.float_t* current_pixel
@@ -106,35 +143,40 @@ def slic(image, n_segments=100, ratio=10., max_iter=10, sigma=1,
        # assign pixels to means
        for k in range(n_means):
            # compute windows:
-            y_min = int(max(current_mean[0] - 2 * step, 0))
-            y_max = int(min(current_mean[0] + 2 * step, height))
-            x_min = int(max(current_mean[1] - 2 * step, 0))
-            x_max = int(min(current_mean[1] + 2 * step, width))
-            for y in range(y_min, y_max):
-                current_pixel = &image_p[5 * (y * width + x_min)]
-                current_distance = &distance_p[y * width + x_min]
-                for x in range(x_min, x_max):
-                    mean_entry = current_mean
-                    dist_mean = 0
-                    for c in range(5):
-                        # you would think the compiler can optimize the squaring
-                        # itself. mine can't (with O2)
-                        tmp = current_pixel[0] - mean_entry[0]
-                        dist_mean += tmp * tmp
-                        current_pixel += 1
-                        mean_entry += 1
-                    # some precision issue here. Doesnt work if testing ">"
-                    if current_distance[0] - dist_mean > 1e-10:
-                        nearest_mean[y, x] = k
-                        current_distance[0] = dist_mean
-                        changes += 1
-                    current_distance += 1
-            current_mean += 5
+            z_min = int(max(current_mean[0] - 2 * step, 0))
+            z_max = int(min(current_mean[0] + 2 * step, depth))
+            y_min = int(max(current_mean[1] - 2 * step, 0))
+            y_max = int(min(current_mean[1] + 2 * step, height))
+            x_min = int(max(current_mean[2] - 2 * step, 0))
+            x_max = int(min(current_mean[2] + 2 * step, width))
+            for z in range(z_min, z_max):
+                for y in range(y_min, y_max):
+                    current_pixel = \
+                            &image_p[5 * ((z * height + y) * width + x_min)]
+                    current_distance = \
+                            &distance_p[(z * height + y) * width + x_min]
+                    for x in range(x_min, x_max):
+                        mean_entry = current_mean
+                        dist_mean = 0
+                        for c in range(5):
+                            # you would think the compiler can optimize the
+                            # squaring itself. mine can't (with O2)
+                            tmp = current_pixel[0] - mean_entry[0]
+                            dist_mean += tmp * tmp
+                            current_pixel += 1
+                            mean_entry += 1
+                        # some precision issue here. Doesnt work if testing ">"
+                        if current_distance[0] - dist_mean > 1e-10:
+                            nearest_mean[z, y, x] = k
+                            current_distance[0] = dist_mean
+                            changes += 1
+                        current_distance += 1
+            current_mean += 6
        if changes == 0:
            break
        # recompute means:
        means_list = [np.bincount(nearest_mean.ravel(),
-                      image_yx[:, :, j].ravel()) for j in range(5)]
+                      image_zyx[:, :, :, j].ravel()) for j in range(6)]
        in_mean = np.bincount(nearest_mean.ravel())
        in_mean[in_mean == 0] = 1
        means = (np.vstack(means_list) / in_mean).T.copy("C")
@@ -30,7 +30,7 @@ def test_gray():
    img += 0.0033 * rnd.normal(size=img.shape)
    img[img > 1] = 1
    img[img < 0] = 0
-    seg = slic(img, sigma=0, n_segments=4, ratio=50.0)
+    seg = slic(img, sigma=0, n_segments=4, ratio=50.0, multichannel=False)

    assert_equal(len(np.unique(seg)), 4)
    assert_array_equal(seg[:10, :10], 0)