diff --git a/doc/examples/plot_watershed.py b/doc/examples/plot_watershed.py
new file mode 100644
index 00000000..0fea79f2
--- /dev/null
+++ b/doc/examples/plot_watershed.py
@@ -0,0 +1,59 @@
+"""
+======================
+Watershed segmentation
+======================
+
+The watershed is a classical algorithm used for **segmentation**, that is, for 
+separating different objects in an image.
+
+Starting from user-defined markers, the watershed algorithm treats pixels
+values as a local topography (elevation). The algorithm floods basins from the
+markers, until basins attributed to different markers meet on watershed lines.
+In many cases, markers are chosen as local minima of the image, from which
+basins are flooded.
+
+In the example below, two overlapping circles are to be separated. To do so,
+one computes an image that is the distance to the background. The maxima of
+this distance (i.e., the minima of the opposite of the distance) are chosen as
+markers, and the flooding of basins from such markers separates the two circles
+along a watershed line.
+
+See http://en.wikipedia.org/wiki/Watershed_(image_processing) for more details
+on the algorithm.
+"""
+
+import numpy as np
+from scipy import ndimage
+import matplotlib.pyplot as plt
+from scikits.image.morphology import watershed, is_local_maximum
+
+# Generate an initial image with two overlapping circles
+x, y = np.indices((80, 80))
+x1, y1, x2, y2 = 28, 28, 44, 52
+r1, r2 = 16, 20
+mask_circle1 = (x - x1)**2 + (y - y1)**2 < r1**2
+mask_circle2 = (x - x2)**2 + (y - y2)**2 < r2**2
+image = np.logical_or(mask_circle1, mask_circle2)
+# Now we want to separate the two objects in image
+# Generate the markers as local maxima of the distance
+# to the background
+from scipy import ndimage
+distance = ndimage.distance_transform_edt(image)
+local_maxi = is_local_maximum(distance, image, np.ones((3, 3)))
+markers = ndimage.label(local_maxi)[0]
+labels = watershed(-distance, markers, mask=image)
+
+plt.figure(figsize=(9, 3))
+plt.subplot(131)
+plt.imshow(image, cmap=plt.cm.gray, interpolation='nearest')
+plt.axis('off')
+plt.subplot(132)
+plt.imshow(-distance, cmap=plt.cm.jet, interpolation='nearest')
+plt.axis('off')
+plt.subplot(133)
+plt.imshow(labels, cmap=plt.cm.spectral, interpolation='nearest')
+plt.axis('off')
+
+plt.subplots_adjust(hspace=0.01, wspace=0.01, top=1, bottom=0, left=0,
+                    right=1)
+plt.show()
diff --git a/scikits/image/filter/__init__.py b/scikits/image/filter/__init__.py
index bec0034d..d3baf934 100644
--- a/scikits/image/filter/__init__.py
+++ b/scikits/image/filter/__init__.py
@@ -3,3 +3,4 @@ from ctmf import median_filter
 from canny import canny
 from edges import sobel, hsobel, vsobel, hprewitt, vprewitt, prewitt
 from tv_denoise import tv_denoise
+from rank_order import rank_order
diff --git a/scikits/image/filter/rank_order.py b/scikits/image/filter/rank_order.py
index 399641f8..3da98974 100644
--- a/scikits/image/filter/rank_order.py
+++ b/scikits/image/filter/rank_order.py
@@ -11,10 +11,39 @@ Original author: Lee Kamentstky
 import numpy
 
 def rank_order(image):
-    """Return an image of the same shape where each pixel has the
-    rank-order value of the corresponding pixel in the image.
-    The returned image's elements are of type numpy.uint32 which
-    simplifies processing in C code.
+    """Return an image of the same shape where each pixel is the
+    index of the pixel value in the ascending order of the unique 
+    values of `image`, aka the rank-order value.
+
+    Parameters
+    ----------
+    image: ndarray
+
+    Returns
+    -------
+    labels: ndarray of type np.uint32, of shape image.shape
+        New array where each pixel has the rank-order value of the
+        corresponding pixel in `image`. Pixel values are between 0 and
+        n - 1, where n is the number of distinct unique values in
+        `image`.
+
+    original_values: 1-d ndarray
+        Unique original values of `image`
+
+    Examples
+    --------
+    >>> a = np.array([[1, 4, 5], [4, 4, 1], [5, 1, 1]])
+    >>> a
+    array([[1, 4, 5],
+        [4, 4, 1],
+        [5, 1, 1]])
+    >>> rank_order(a)
+    (array([[0, 1, 2],
+        [1, 1, 0],
+        [2, 0, 0]], dtype=uint32), array([1, 4, 5]))
+    >>> b = np.array([-1., 2.5, 3.1, 2.5])
+    >>> rank_order(b)
+    (array([0, 1, 2, 1], dtype=uint32), array([-1. ,  2.5,  3.1]))
     """
     flat_image = image.ravel()
     sort_order = flat_image.argsort().astype(numpy.uint32)
diff --git a/scikits/image/morphology/__init__.py b/scikits/image/morphology/__init__.py
index 33f96662..4036cc25 100644
--- a/scikits/image/morphology/__init__.py
+++ b/scikits/image/morphology/__init__.py
@@ -1,3 +1,4 @@
 from grey import *
 from selem import *
 from ccomp import label
+from watershed import watershed, is_local_maximum
diff --git a/scikits/image/morphology/_watershed.pyx b/scikits/image/morphology/_watershed.pyx
new file mode 100644
index 00000000..c86d8744
--- /dev/null
+++ b/scikits/image/morphology/_watershed.pyx
@@ -0,0 +1,109 @@
+"""watershed.pyx - scithon implementation of guts of watershed
+
+Originally part of CellProfiler, code licensed under both GPL and BSD licenses.
+Website: http://www.cellprofiler.org
+
+Copyright (c) 2003-2009 Massachusetts Institute of Technology
+Copyright (c) 2009-2011 Broad Institute
+All rights reserved.
+
+Original author: Lee Kamentsky
+"""
+
+cdef extern from "numpy/arrayobject.h":
+        cdef void import_array()
+import_array()
+
+import numpy as np
+cimport numpy as np
+cimport cython
+
+DTYPE_INT32 = np.int32
+ctypedef np.int32_t DTYPE_INT32_t
+DTYPE_BOOL = np.bool
+ctypedef np.int8_t DTYPE_BOOL_t
+
+include "heap_watershed.pxi"
+
+@cython.boundscheck(False)
+def watershed(np.ndarray[DTYPE_INT32_t, ndim=1, negative_indices=False, 
+              mode='c'] image,
+              np.ndarray[DTYPE_INT32_t, ndim=2, negative_indices=False, 
+              mode='c'] pq,
+              DTYPE_INT32_t age,
+              np.ndarray[DTYPE_INT32_t, ndim=2, negative_indices=False, 
+              mode='c'] structure,
+              DTYPE_INT32_t ndim,
+              np.ndarray[DTYPE_BOOL_t, ndim=1, negative_indices=False, 
+              mode='c'] mask,
+              np.ndarray[DTYPE_INT32_t, ndim=1, negative_indices=False, 
+              mode='c'] image_shape,
+              np.ndarray[DTYPE_INT32_t, ndim=1, negative_indices=False, 
+              mode='c'] output):
+    """Do heavy lifting of watershed algorithm
+   
+    Parameters
+    ----------
+
+    image - the flattened image pixels, converted to rank-order
+    pq    - the priority queue, starts with the marked pixels
+            the first element in each row is the image intensity
+            the second element is the age at entry into the queue
+            the third element is the index into the flattened image or labels
+            the remaining elements are the coordinates of the point
+    age   - the next age to assign to a pixel
+    structure - a numpy int32 array containing the structuring elements
+                that define nearest neighbors. For each row, the first
+                element is the stride from the point to its neighbor
+                in a flattened array. The remaining elements are the
+                offsets from the point to its neighbor in the various
+                dimensions
+    ndim - # of dimensions in the image
+    mask  - numpy boolean (char) array indicating which pixels to consider
+            and which to ignore. Also flattened.
+    image_shape - the dimensions of the image, for boundary checking,
+                  a numpy array of np.int32
+    output - put the image labels in here
+    """
+    cdef Heapitem elem
+    cdef Heapitem new_elem
+    cdef DTYPE_INT32_t nneighbors = structure.shape[0] 
+    cdef DTYPE_INT32_t i = 0
+    cdef DTYPE_INT32_t index = 0
+    cdef DTYPE_INT32_t old_index = 0
+    cdef DTYPE_INT32_t max_index = image.shape[0]
+
+    cdef Heap *hp = <Heap *> heap_from_numpy2()
+
+    for i in range(pq.shape[0]):
+        elem.value = pq[i, 0]
+        elem.age = pq[i, 1]
+        elem.index = pq[i, 2]
+        heappush(hp, &elem)
+
+    while hp.items > 0:
+        #
+        # Pop off an item to work on
+        #
+        heappop(hp, &elem)
+        ####################################################
+        # loop through each of the structuring elements
+        #
+        old_index = elem.index
+        for i in range(nneighbors):
+            # get the flattened address of the neighbor
+            index = structure[i, 0] + old_index
+            if index < 0 or index >= max_index or output[index] or \
+                    not mask[index]:
+                continue
+
+            new_elem.value = image[index]
+            new_elem.age = elem.age + 1
+            new_elem.index = index
+            age += 1
+            output[index] = output[old_index]
+            #
+            # Push the neighbor onto the heap to work on it later
+            #
+            heappush(hp, &new_elem)
+    heap_done(hp)
diff --git a/scikits/image/morphology/heap_general.pxi b/scikits/image/morphology/heap_general.pxi
new file mode 100644
index 00000000..a113b98e
--- /dev/null
+++ b/scikits/image/morphology/heap_general.pxi
@@ -0,0 +1,135 @@
+"""
+Originally part of CellProfiler, code licensed under both GPL and BSD
+licenses.
+Website: http://www.cellprofiler.org
+
+Copyright (c) 2003-2009 Massachusetts Institute of Technology
+Copyright (c) 2009-2011 Broad Institute
+All rights reserved.
+
+Original author: Lee Kamentsky
+"""
+
+cdef extern from "stdlib.h":
+   ctypedef unsigned long size_t
+   void free(void *ptr)
+   void *malloc(size_t size)
+   void *realloc(void *ptr, size_t size)
+
+cdef struct Heap:
+    unsigned int items
+    unsigned int space
+    Heapitem *data
+    Heapitem **ptrs
+
+cdef inline Heap *heap_from_numpy2():
+    cdef unsigned int k
+    cdef Heap *heap 
+    heap = <Heap *> malloc(sizeof (Heap))
+    heap.items = 0
+    heap.space = 1000
+    heap.data = <Heapitem *> malloc(heap.space * sizeof(Heapitem))
+    heap.ptrs = <Heapitem **> malloc(heap.space * sizeof(Heapitem *))
+    for k in range(heap.space):
+        heap.ptrs[k] = heap.data + k
+    return heap
+
+cdef inline void heap_done(Heap *heap):
+   free(heap.data)
+   free(heap.ptrs)
+   free(heap)
+
+cdef inline void swap(unsigned int a, unsigned int b, Heap *h):
+    h.ptrs[a], h.ptrs[b] = h.ptrs[b], h.ptrs[a]
+
+
+######################################################
+# heappop - inlined
+#
+# pop an element off the heap, maintaining heap invariant
+# 
+# Note: heap ordering is the same as python heapq, i.e., smallest first.
+######################################################
+cdef inline void heappop(Heap *heap,
+                  Heapitem *dest):
+    cdef unsigned int i, smallest, l, r # heap indices
+    
+    #
+    # Start by copying the first element to the destination
+    #
+    dest[0] = heap.ptrs[0][0]
+    heap.items -= 1
+
+    # if the heap is now empty, we can return, no need to fix heap.
+    if heap.items == 0:
+        return
+
+    #
+    # Move the last element in the heap to the first.
+    #
+    swap(0, heap.items, heap)
+
+    #
+    # Restore the heap invariant.
+    #
+    i = 0
+    smallest = i
+    while True:
+        # loop invariant here: smallest == i
+        
+        # find smallest of (i, l, r), and swap it to i's position if necessary
+        l = i*2+1 #__left(i)
+        r = i*2+2 #__right(i)
+        if l < heap.items:
+            if smaller(heap.ptrs[l], heap.ptrs[i]):
+                smallest = l
+            if r < heap.items and smaller(heap.ptrs[r], heap.ptrs[smallest]):
+                smallest = r
+        else:
+            # this is unnecessary, but trims 0.04 out of 0.85 seconds...
+            break
+        # the element at i is smaller than either of its children, heap invariant restored.
+        if smallest == i:
+                break
+        # swap
+        swap(i, smallest, heap)
+        i = smallest
+        
+##################################################
+# heappush - inlined
+#
+# push the element onto the heap, maintaining the heap invariant
+#
+# Note: heap ordering is the same as python heapq, i.e., smallest first.
+##################################################
+cdef inline void heappush(Heap *heap,
+                          Heapitem *new_elem):
+  cdef unsigned int child         = heap.items
+  cdef unsigned int parent
+  cdef unsigned int k
+  cdef Heapitem *new_data
+
+  # grow if necessary
+  if heap.items == heap.space:
+      heap.space = heap.space * 2
+      new_data = <Heapitem *> realloc(<void *> heap.data, <size_t> (heap.space * sizeof(Heapitem)))
+      heap.ptrs = <Heapitem **> realloc(<void *> heap.ptrs, <size_t> (heap.space * sizeof(Heapitem *)))
+      for k in range(heap.items):
+          heap.ptrs[k] = new_data + (heap.ptrs[k] - heap.data)
+      for k in range(heap.items, heap.space):
+          heap.ptrs[k] = new_data + k
+      heap.data = new_data
+
+  # insert new data at child
+  heap.ptrs[child][0] = new_elem[0]
+  heap.items += 1
+
+  # restore heap invariant, all parents <= children
+  while child>0:
+      parent = (child + 1) / 2 - 1 # __parent(i)
+      
+      if smaller(heap.ptrs[child], heap.ptrs[parent]):
+          swap(parent, child, heap)
+          child = parent
+      else:
+          break
diff --git a/scikits/image/morphology/heap_watershed.pxi b/scikits/image/morphology/heap_watershed.pxi
new file mode 100644
index 00000000..ea66da26
--- /dev/null
+++ b/scikits/image/morphology/heap_watershed.pxi
@@ -0,0 +1,26 @@
+"""
+Originally part of CellProfiler, code licensed under both GPL and BSD
+licenses.
+Website: http://www.cellprofiler.org
+
+Copyright (c) 2003-2009 Massachusetts Institute of Technology
+Copyright (c) 2009-2011 Broad Institute
+All rights reserved.
+
+Original author: Lee Kamentsky
+"""
+import numpy as np
+cimport numpy as np
+cimport cython
+
+cdef struct Heapitem:
+    np.int32_t value
+    np.int32_t age
+    np.int32_t index
+
+cdef inline int smaller(Heapitem *a, Heapitem *b):
+    if a.value <> b.value:
+      return a.value < b.value
+    return a.age < b.age
+
+include "heap_general.pxi"
diff --git a/scikits/image/morphology/setup.py b/scikits/image/morphology/setup.py
index 24a41851..3764fe0c 100644
--- a/scikits/image/morphology/setup.py
+++ b/scikits/image/morphology/setup.py
@@ -13,11 +13,14 @@ def configuration(parent_package='', top_path=None):
 
     cython(['ccomp.pyx'], working_path=base_path)
     cython(['cmorph.pyx'], working_path=base_path)
+    cython(['_watershed.pyx'], working_path=base_path)
 
     config.add_extension('ccomp', sources=['ccomp.c'],
                          include_dirs=[get_numpy_include_dirs()])
     config.add_extension('cmorph', sources=['cmorph.c'],
                          include_dirs=[get_numpy_include_dirs()])
+    config.add_extension('_watershed', sources=['_watershed.c'],
+                         include_dirs=[get_numpy_include_dirs()])
 
     return config
 
diff --git a/scikits/image/morphology/tests/test_watershed.py b/scikits/image/morphology/tests/test_watershed.py
new file mode 100644
index 00000000..c275a281
--- /dev/null
+++ b/scikits/image/morphology/tests/test_watershed.py
@@ -0,0 +1,495 @@
+"""test_watershed.py - tests the watershed function
+
+Originally part of CellProfiler, code licensed under both GPL and BSD licenses.
+Website: http://www.cellprofiler.org
+
+Copyright (c) 2003-2009 Massachusetts Institute of Technology
+Copyright (c) 2009-2011 Broad Institute
+All rights reserved.
+
+Original author: Lee Kamentsky
+"""
+#Portions of this test were taken from scipy's watershed test in test_ndimage.py
+#
+# Copyright (C) 2003-2005 Peter J. Verveer
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above
+#    copyright notice, this list of conditions and the following
+#    disclaimer in the documentation and/or other materials provided
+#    with the distribution.
+#
+# 3. The name of the author may not be used to endorse or promote
+#    products derived from this software without specific prior
+#    written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import math
+import time
+import unittest
+
+import numpy as np
+import scipy.ndimage
+
+from scikits.image.morphology.watershed import watershed \
+        _slow_watershed, is_local_maximum
+
+eps = 1e-12
+
+def diff(a, b):
+    if not isinstance(a, np.ndarray):
+        a = np.asarray(a)
+    if not isinstance(b, np.ndarray):
+        b = np.asarray(b)
+    if (0 in a.shape) and (0 in b.shape):
+        return 0.0
+    b[a==0] = 0
+    if (a.dtype in [np.complex64, np.complex128] or
+        b.dtype in [np.complex64, np.complex128]):
+        a = np.asarray(a, np.complex128)
+        b = np.asarray(b, np.complex128)
+        t = ((a.real - b.real)**2).sum() + ((a.imag - b.imag)**2).sum()
+    else:
+        a = np.asarray(a)
+        a = a.astype(np.float64)
+        b = np.asarray(b)
+        b = b.astype(np.float64)
+        t = ((a - b)**2).sum()
+    return math.sqrt(t)
+
+class TestWatershed(unittest.TestCase):
+    eight = np.ones((3, 3),bool)
+    def test_watershed01(self):
+        "watershed 1"
+        data = np.array([[0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                               [0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0]], np.uint8)
+        markers = np.array([[ -1, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 1, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0]],
+                                 np.int8)
+        out = watershed(data, markers,self.eight)
+        expected = np.array([[-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1]])
+        error = diff(expected, out)
+        assert error < eps
+        out = _slow_watershed(data, markers, 8)
+        error = diff(expected, out)
+        assert error < eps
+
+    def test_watershed02(self):
+        "watershed 2"
+        data = np.array([[0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0],
+                            [0, 0, 0, 0, 0, 0, 0],
+                            [0, 1, 1, 1, 1, 1, 0],
+                               [0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0]], np.uint8)
+        markers = np.array([[ -1, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 1, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0]],
+                                 np.int8)
+        out = watershed(data, markers)
+        error = diff([[-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1,  1,  1,  1, -1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1, -1,  1,  1,  1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1]], out)
+        self.failUnless(error < eps)
+
+    def test_watershed03(self):
+        "watershed 3"
+        data = np.array([[0, 0, 0, 0, 0, 0, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0]], np.uint8)
+        markers = np.array([[ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 2, 0, 3, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, -1]],
+                                 np.int8)
+        out = watershed(data, markers)
+        error = diff([[-1, -1, -1, -1, -1, -1, -1],
+                      [-1,  0,  2,  0,  3,  0, -1],
+                      [-1,  2,  2,  0,  3,  3, -1],
+                      [-1,  2,  2,  0,  3,  3, -1],
+                      [-1,  2,  2,  0,  3,  3, -1],
+                      [-1,  0,  2,  0,  3,  0, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1]], out)
+        self.failUnless(error < eps)
+
+    def test_watershed04(self):
+        "watershed 4"
+        data = np.array([[0, 0, 0, 0, 0, 0, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0]], np.uint8)
+        markers = np.array([[ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 2, 0, 3, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, -1]],
+                                 np.int8)
+        out = watershed(data, markers, self.eight)
+        error = diff([[-1, -1, -1, -1, -1, -1, -1],
+                      [-1,  2,  2,  0,  3,  3, -1],
+                      [-1,  2,  2,  0,  3,  3, -1],
+                      [-1,  2,  2,  0,  3,  3, -1],
+                      [-1,  2,  2,  0,  3,  3, -1],
+                      [-1,  2,  2,  0,  3,  3, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],                      
+                      [-1, -1, -1, -1, -1, -1, -1],                      
+                      [-1, -1, -1, -1, -1, -1, -1],                      
+                      [-1, -1, -1, -1, -1, -1, -1]], out)
+        self.failUnless(error < eps)
+
+    def test_watershed05(self):
+        "watershed 5"
+        data = np.array([[0, 0, 0, 0, 0, 0, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 0, 1, 0, 1, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0]], np.uint8)
+        markers = np.array([[ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 3, 0, 2, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, 0],
+                                  [ 0, 0, 0, 0, 0, 0, -1]],
+                                 np.int8)
+        out = watershed(data, markers, self.eight)
+        error = diff([[-1, -1, -1, -1, -1, -1, -1],
+                      [-1,  3,  3,  0,  2,  2, -1],
+                      [-1,  3,  3,  0,  2,  2, -1],
+                      [-1,  3,  3,  0,  2,  2, -1],
+                      [-1,  3,  3,  0,  2,  2, -1],
+                      [-1,  3,  3,  0,  2,  2, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1]], out)
+        self.failUnless(error < eps)
+
+    def test_watershed06(self):
+        "watershed 6"
+        data = np.array([[0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 0, 0, 0, 1, 0],
+                               [0, 1, 1, 1, 1, 1, 0],
+                               [0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                               [0, 0, 0, 0, 0, 0, 0]], np.uint8)
+        markers = np.array([[ 0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 1, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  0, 0, 0, 0, 0, 0, 0],
+                                  [  -1, 0, 0, 0, 0, 0, 0]],
+                                 np.int8)
+        out = watershed(data, markers, self.eight)
+        error = diff([[-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1,  1,  1,  1,  1,  1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1],
+                      [-1, -1, -1, -1, -1, -1, -1]], out)
+        self.failUnless(error < eps)
+
+    def test_watershed07(self):
+        "A regression test of a competitive case that failed"
+        data = np.array([[255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255],
+                            [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255],
+                            [255,255,255,255,255,204,204,204,204,204,204,255,255,255,255,255],
+                            [255,255,255,204,204,183,153,153,153,153,183,204,204,255,255,255],
+                            [255,255,204,183,153,141,111,103,103,111,141,153,183,204,255,255],
+                            [255,255,204,153,111, 94, 72, 52, 52, 72, 94,111,153,204,255,255],
+                            [255,255,204,153,111, 72, 39,  1, 1, 39, 72,111,153,204,255,255],
+                            [255,255,204,183,141,111, 72, 39, 39, 72,111,141,183,204,255,255],
+                            [255,255,255,204,183,141,111, 72, 72,111,141,183,204,255,255,255],
+                            [255,255,255,255,204,183,141, 94, 94,141,183,204,255,255,255,255],
+                            [255,255,255,255,255,204,153,103,103,153,204,255,255,255,255,255],
+                            [255,255,255,255,204,183,141, 94, 94,141,183,204,255,255,255,255],
+                            [255,255,255,204,183,141,111, 72, 72,111,141,183,204,255,255,255],
+                            [255,255,204,183,141,111, 72, 39, 39, 72,111,141,183,204,255,255],
+                            [255,255,204,153,111, 72, 39,  1,  1, 39, 72,111,153,204,255,255],
+                            [255,255,204,153,111, 94, 72, 52, 52, 72, 94,111,153,204,255,255],
+                            [255,255,204,183,153,141,111,103,103,111,141,153,183,204,255,255],
+                            [255,255,255,204,204,183,153,153,153,153,183,204,204,255,255,255],
+                            [255,255,255,255,255,204,204,204,204,204,204,255,255,255,255,255],
+                            [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255],
+                            [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]])
+        mask = (data!=255)
+        markers = np.zeros(data.shape,int)
+        markers[6, 7] = 1
+        markers[14, 7] = 2
+        out = watershed(data, markers, self.eight, mask=mask)
+        #
+        # The two objects should be the same size, except possibly for the
+        # border region
+        #
+        size1 = np.sum(out == 1)
+        size2 = np.sum(out == 2)
+        self.assertTrue(abs(size1-size2) <= 6)
+    
+    def test_watershed08(self):
+        "The border pixels + an edge are all the same value"
+        data = np.array([[255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255],
+                            [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255],
+                            [255,255,255,255,255,204,204,204,204,204,204,255,255,255,255,255],
+                            [255,255,255,204,204,183,153,153,153,153,183,204,204,255,255,255],
+                            [255,255,204,183,153,141,111,103,103,111,141,153,183,204,255,255],
+                            [255,255,204,153,111, 94, 72, 52, 52, 72, 94,111,153,204,255,255],
+                            [255,255,204,153,111, 72, 39,  1, 1, 39, 72,111,153,204,255,255],
+                            [255,255,204,183,141,111, 72, 39, 39, 72,111,141,183,204,255,255],
+                            [255,255,255,204,183,141,111, 72, 72,111,141,183,204,255,255,255],
+                            [255,255,255,255,204,183,141, 94, 94,141,183,204,255,255,255,255],
+                            [255,255,255,255,255,204,153,141,141,153,204,255,255,255,255,255],
+                            [255,255,255,255,204,183,141, 94, 94,141,183,204,255,255,255,255],
+                            [255,255,255,204,183,141,111, 72, 72,111,141,183,204,255,255,255],
+                            [255,255,204,183,141,111, 72, 39, 39, 72,111,141,183,204,255,255],
+                            [255,255,204,153,111, 72, 39,  1,  1, 39, 72,111,153,204,255,255],
+                            [255,255,204,153,111, 94, 72, 52, 52, 72, 94,111,153,204,255,255],
+                            [255,255,204,183,153,141,111,103,103,111,141,153,183,204,255,255],
+                            [255,255,255,204,204,183,153,153,153,153,183,204,204,255,255,255],
+                            [255,255,255,255,255,204,204,204,204,204,204,255,255,255,255,255],
+                            [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255],
+                            [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]])
+        mask = (data != 255)
+        markers = np.zeros(data.shape,int)
+        markers[6,7] = 1
+        markers[14,7] = 2
+        out = watershed(data, markers, self.eight, mask=mask)
+        #
+        # The two objects should be the same size, except possibly for the
+        # border region
+        #
+        size1 = np.sum(out == 1)
+        size2 = np.sum(out == 2)
+        self.assertTrue(abs(size1-size2) <= 6)
+    
+    def test_watershed09(self):
+        """Test on an image of reasonable size
+        
+        This is here both for timing (does it take forever?) and to
+        ensure that the memory constraints are reasonable
+        """
+        image = np.zeros((1000, 1000))
+        coords = np.random.uniform(0, 1000, (100, 2)).astype(int)
+        markers = np.zeros((1000, 1000),int)
+        idx = 1
+        for x,y in coords:
+            image[x,y] = 1
+            markers[x, y] = idx
+            idx += 1
+        
+        image = scipy.ndimage.gaussian_filter(image, 4)
+        before = time.clock() 
+        out = watershed(image, markers, self.eight)
+        elapsed = time.clock() - before
+        print "Fast watershed ran a megapixel image in %f seconds"%(elapsed)
+        before = time.clock()
+        out = scipy.ndimage.watershed_ift(image.astype(np.uint16), markers, self.eight)
+        elapsed = time.clock() - before
+        print "Scipy watershed ran a megapixel image in %f seconds"%(elapsed)
+
+
+class TestIsLocalMaximum(unittest.TestCase):
+    def test_00_00_empty(self):
+        image = np.zeros((10, 20))
+        labels = np.zeros((10, 20), int)
+        result = is_local_maximum(image, labels, np.ones((3, 3), bool))
+        self.assertTrue(np.all(~ result))
+        
+    def test_01_01_one_point(self):
+        image = np.zeros((10, 20))
+        labels = np.zeros((10, 20), int)
+        image[5, 5] = 1
+        labels[5, 5] = 1
+        result = is_local_maximum(image, labels, np.ones((3, 3), bool))
+        self.assertTrue(np.all(result == (labels == 1)))
+        
+    def test_01_02_adjacent_and_same(self):
+        image = np.zeros((10, 20))
+        labels = np.zeros((10, 20), int)
+        image[5, 5:6] = 1
+        labels[5, 5:6] = 1
+        result = is_local_maximum(image, labels, np.ones((3, 3), bool))
+        self.assertTrue(np.all(result == (labels == 1)))
+        
+    def test_01_03_adjacent_and_different(self):
+        image = np.zeros((10, 20))
+        labels = np.zeros((10, 20), int)
+        image[5, 5] = 1
+        image[5, 6] = .5
+        labels[5, 5:6] = 1
+        expected = (image == 1)
+        result = is_local_maximum(image, labels, np.ones((3, 3), bool))
+        self.assertTrue(np.all(result == expected))
+        result = is_local_maximum(image, labels)
+        self.assertTrue(np.all(result == expected))
+        
+    def test_01_04_not_adjacent_and_different(self):
+        image = np.zeros((10,20))
+        labels = np.zeros((10,20), int)
+        image[5,5] = 1
+        image[5,8] = .5
+        labels[image > 0] = 1
+        expected = (labels == 1)
+        result = is_local_maximum(image, labels, np.ones((3,3), bool))
+        self.assertTrue(np.all(result == expected))
+        
+    def test_01_05_two_objects(self):
+        image = np.zeros((10,20))
+        labels = np.zeros((10,20), int)
+        image[5,5] = 1
+        image[5,15] = .5
+        labels[5,5] = 1
+        labels[5,15] = 2
+        expected = (labels > 0)
+        result = is_local_maximum(image, labels, np.ones((3,3), bool))
+        self.assertTrue(np.all(result == expected))
+
+    def test_01_06_adjacent_different_objects(self):
+        image = np.zeros((10,20))
+        labels = np.zeros((10,20), int)
+        image[5,5] = 1
+        image[5,6] = .5
+        labels[5,5] = 1
+        labels[5,6] = 2
+        expected = (labels > 0)
+        result = is_local_maximum(image, labels, np.ones((3,3), bool))
+        self.assertTrue(np.all(result == expected))
+        
+    def test_02_01_four_quadrants(self):
+        np.random.seed(21)
+        image = np.random.uniform(size=(40,60))
+        i,j = np.mgrid[0:40,0:60]
+        labels = 1 + (i >= 20) + (j >= 30) * 2
+        i,j = np.mgrid[-3:4,-3:4]
+        footprint = (i*i + j*j <=9)
+        expected = np.zeros(image.shape, float)
+        for imin, imax in ((0, 20), (20, 40)):
+            for jmin, jmax in ((0, 30), (30, 60)):
+                expected[imin:imax,jmin:jmax] = scipy.ndimage.maximum_filter(
+                    image[imin:imax, jmin:jmax], footprint = footprint)
+        expected = (expected == image)
+        result = is_local_maximum(image, labels, footprint)
+        self.assertTrue(np.all(result == expected))
+        
+    def test_03_01_disk_1(self):
+        '''regression test of img-1194, footprint = [1]
+        
+        Test is_local_maximum when every point is a local maximum
+        '''
+        np.random.seed(31)
+        image = np.random.uniform(size=(10,20))
+        footprint = np.array([[1]])
+        result = is_local_maximum(image, np.ones((10,20)), footprint)
+        self.assertTrue(np.all(result))
+        result = is_local_maximum(image, footprint=footprint)
+        self.assertTrue(np.all(result))
+        
+
+
diff --git a/scikits/image/morphology/watershed.py b/scikits/image/morphology/watershed.py
new file mode 100644
index 00000000..759773fe
--- /dev/null
+++ b/scikits/image/morphology/watershed.py
@@ -0,0 +1,449 @@
+"""watershed.py - watershed algorithm
+
+This module implements a watershed algorithm that apportions pixels into
+marked basins. The algorithm uses a priority queue to hold the pixels
+with the metric for the priority queue being pixel value, then the time
+of entry into the queue - this settles ties in favor of the closest marker.
+
+Some ideas taken from
+Soille, "Automated Basin Delineation from Digital Elevation Models Using
+Mathematical Morphology", Signal Processing 20 (1990) 171-182.
+
+The most important insight in the paper is that entry time onto the queue
+solves two problems: a pixel should be assigned to the neighbor with the
+largest gradient or, if there is no gradient, pixels on a plateau should
+be split between markers on opposite sides.
+
+Originally part of CellProfiler, code licensed under both GPL and BSD licenses.
+Website: http://www.cellprofiler.org
+
+Copyright (c) 2003-2009 Massachusetts Institute of Technology
+Copyright (c) 2009-2011 Broad Institute
+All rights reserved.
+
+Original author: Lee Kamentsky
+"""
+
+from _heapq import heapify, heappush, heappop
+import numpy as np
+import scipy.ndimage
+from ..filter import rank_order
+
+import _watershed
+import warnings
+
+def watershed(image, markers, connectivity=None, offset=None, mask=None):
+    """
+    Return a matrix labeled using the watershed segmentation algorithm
+
+    Parameters
+    ----------
+
+    image: ndarray (2-D, 3-D, ...) of integers
+        Data array where the lowest value points are labeled first.
+    markers: ndarray of the same shape as `image`
+        An array marking the basins with the values to be assigned in the 
+        label matrix. Zero means not a marker. This array should be of an 
+        integer type.
+    connectivity: ndarray, optional
+        An array with the same number of dimensions as `image` whose
+        non-zero elements indicate neighbors for connection.
+        Following the scipy convention, default is a one-connected array of 
+        the dimension of the image.
+    offset: array_like of shape image.ndim, optional
+        offset of the connectivity (one offset per dimension)
+    mask: ndarray of bools or 0s and 1s, optional
+        Array of same shape as `image`. Only points at which mask == True 
+        will be labeled.
+    
+    Returns 
+    -------
+    out: ndarray
+        A labeled matrix of the same type and shape as markers
+    
+
+    Notes
+    -----
+    This function implements a watershed algorithm [1]_that apportions pixels
+    into marked basins. The algorithm uses a priority queue to hold the pixels
+    with the metric for the priority queue being pixel value, then the time of
+    entry into the queue - this settles ties in favor of the closest marker.
+
+    Some ideas taken from
+    Soille, "Automated Basin Delineation from Digital Elevation Models Using
+    Mathematical Morphology", Signal Processing 20 (1990) 171-182
+
+    The most important insight in the paper is that entry time onto the queue
+    solves two problems: a pixel should be assigned to the neighbor with the
+    largest gradient or, if there is no gradient, pixels on a plateau should
+    be split between markers on opposite sides.
+
+    This implementation converts all arguments to specific, lowest common
+    denominator types, then passes these to a C algorithm.
+    
+    Markers can be determined manually, or automatically using for example 
+    the local minima of the gradient of the image, or the local maxima of the
+    distance function to the background for separating overlapping objects
+    (see example).
+
+    References
+    ----------
+    .. [1] http://en.wikipedia.org/wiki/Watershed_%28image_processing%29
+
+    .. [2] http://cmm.ensmp.fr/~beucher/wtshed.html
+
+    Examples
+    --------
+    The watershed algorithm is very useful to separate overlapping objects
+
+    >>> # Generate an initial image with two overlapping circles
+    >>> x, y = np.indices((80, 80))
+    >>> x1, y1, x2, y2 = 28, 28, 44, 52
+    >>> r1, r2 = 16, 20
+    >>> mask_circle1 = (x - x1)**2 + (y - y1)**2 < r1**2
+    >>> mask_circle2 = (x - x2)**2 + (y - y2)**2 < r2**2
+    >>> image = np.logical_or(mask_circle1, mask_circle2)
+    >>> # Now we want to separate the two objects in image
+    >>> # Generate the markers as local maxima of the distance
+    >>> # to the background
+    >>> from scipy import ndimage
+    >>> distance = ndimage.distance_transform_edt(image)
+    >>> local_maxi = is_local_maximum(distance, image, np.ones((3, 3)))
+    >>> markers = ndimage.label(local_maxi)[0]
+    >>> labels = watershed(-distance, markers, mask=image)
+
+    The algorithm works also for 3-D images, and can be used for example to
+    separate overlapping spheres.
+    """
+    
+    if connectivity == None:
+        c_connectivity = scipy.ndimage.generate_binary_structure(image.ndim, 1)
+    else:
+        c_connectivity = np.array(connectivity, bool)
+        if c_connectivity.ndim != image.ndim:
+            raise ValueError,"Connectivity dimension must be same as image"
+    if offset == None:
+        if any([x%2==0 for x in c_connectivity.shape]):
+            raise ValueError,"Connectivity array must have an unambiguous \
+            center"
+        #
+        # offset to center of connectivity array
+        #
+        offset = np.array(c_connectivity.shape) / 2
+
+    # pad the image, markers, and mask so that we can use the mask to 
+    # keep from running off the edges
+    pads = offset
+
+    def pad(im):
+        new_im = np.zeros([i + 2*p for i, p in zip(im.shape, pads)], im.dtype)
+        new_im[[slice(p, -p, None) for p in pads]] = im
+        return new_im
+
+    if mask is not None:
+        mask = pad(mask)
+    else:
+        mask = pad(np.ones(image.shape, bool))
+    image = pad(image)
+    markers = pad(markers)
+
+    c_image = rank_order(image)[0].astype(np.int32)
+    c_markers = np.ascontiguousarray(markers, dtype=np.int32)
+    if c_markers.ndim != c_image.ndim:
+        raise ValueError,\
+            "markers (ndim=%d) must have same # of dimensions "\
+            "as image (ndim=%d)"%(c_markers.ndim, cimage.ndim)
+    if c_markers.shape != c_image.shape:
+        raise ValueError("image and markers must have the same shape")
+    if mask != None:
+        c_mask = np.ascontiguousarray(mask, dtype=bool)
+        if c_mask.ndim != c_markers.ndim:
+            raise ValueError, "mask must have same # of dimensions as image"
+        if c_markers.shape != c_mask.shape:
+            raise ValueError, "mask must have same shape as image"
+        c_markers[np.logical_not(mask)]=0
+    else:
+        c_mask = None
+    c_output = c_markers.copy()
+
+    #
+    # We pass a connectivity array that pre-calculates the stride for each
+    # neighbor.
+    #
+    # The result of this bit of code is an array with one row per
+    # point to be considered. The first column is the pre-computed stride
+    # and the second through last are the x,y...whatever offsets
+    # (to do bounds checking).
+    c = []
+    image_stride = np.array(image.strides) / image.itemsize
+    for i in range(np.product(c_connectivity.shape)):
+        multiplier = 1
+        offs = []
+        indexes = []
+        ignore = True
+        for j in range(len(c_connectivity.shape)):
+            elems = c_image.shape[j]
+            idx   = (i / multiplier) % c_connectivity.shape[j]
+            off   = idx - offset[j]
+            if off:
+                ignore = False
+            offs.append(off)
+            indexes.append(idx)
+            multiplier *= c_connectivity.shape[j]
+        if (not ignore) and c_connectivity.__getitem__(tuple(indexes)):
+            stride = np.dot(image_stride, np.array(offs))
+            offs.insert(0, stride)
+            c.append(offs)
+    c = np.array(c, np.int32)
+
+    pq, age = __heapify_markers(c_markers, c_image)
+    pq = np.ascontiguousarray(pq, dtype=np.int32)
+    if np.product(pq.shape) > 0:
+        # If nothing is labeled, the output is empty and we don't have to
+        # do anything
+        c_output = c_output.flatten()
+        if c_mask == None:
+            c_mask = np.ones(c_image.shape, np.int8).flatten()
+        else:
+            c_mask = c_mask.astype(np.int8).flatten()
+        _watershed.watershed(c_image.flatten(),
+                             pq, age, c, 
+                             c_image.ndim, 
+                             c_mask,
+                             np.array(c_image.shape,np.int32),
+                             c_output)
+    c_output = c_output.reshape(c_image.shape)[[slice(1, -1, None)] *
+                                                image.ndim]
+    try:
+        return c_output.astype(markers.dtype)
+    except:
+        return c_output
+
+
+def is_local_maximum(image, labels=None, footprint=None):
+    """
+    Return a boolean array of points that are local maxima
+ 
+    Parameters
+    ----------
+    image: ndarray (2-D, 3-D, ...)
+        intensity image
+        
+    labels: ndarray, optional 
+        find maxima only within labels. Zero is reserved for background.
+    
+    footprint: ndarray of bools, optional
+        binary mask indicating the neighborhood to be examined
+        `footprint` must be a matrix with odd dimensions, the center is taken 
+        to be the point in question.
+
+    Returns
+    -------
+    result: ndarray of bools
+        mask that is True for pixels that are local maxima of `image`
+
+    Examples
+    --------
+    >>> image = np.zeros((4, 4))
+    >>> image[1, 2] = 2
+    >>> image[3, 3] = 1
+    >>> image
+    array([[ 0.,  0.,  0.,  0.],
+        [ 0.,  0.,  2.,  0.],
+        [ 0.,  0.,  0.,  0.],
+        [ 0.,  0.,  0.,  1.]])
+    >>> is_local_maximum(image)
+    array([[ True, False, False, False],
+        [ True, False,  True, False],
+        [ True, False, False, False],
+        [ True,  True, False,  True]], dtype=bool)
+    >>> image = np.arange(16).reshape((4, 4))
+    >>> labels = np.array([[1, 2], [3, 4]])
+    >>> labels = np.repeat(np.repeat(labels, 2, axis=0), 2, axis=1)
+    >>> labels
+    array([[1, 1, 2, 2],
+        [1, 1, 2, 2],
+        [3, 3, 4, 4],
+        [3, 3, 4, 4]])
+    >>> image
+    array([[ 0,  1,  2,  3],
+        [ 4,  5,  6,  7],
+        [ 8,  9, 10, 11],
+        [12, 13, 14, 15]])
+    >>> is_local_maximum(image, labels=labels)
+    array([[False, False, False, False],
+        [False,  True, False,  True],
+        [False, False, False, False],
+        [False,  True, False,  True]], dtype=bool)
+    """
+    if labels is None:
+        labels = np.ones(image.shape, dtype=np.uint8)
+    if footprint is None:
+        footprint = np.ones([3] * image.ndim, dtype=np.uint8)
+    assert((np.all(footprint.shape) & 1) == 1)
+    footprint = (footprint != 0)
+    footprint_extent = (np.array(footprint.shape)-1) / 2
+    if np.all(footprint_extent == 0):
+        return labels > 0
+    result = (labels > 0).copy()
+    #
+    # Create a labels matrix with zeros at the borders that might be
+    # hit by the footprint.
+    #
+    big_labels = np.zeros(np.array(labels.shape) + footprint_extent*2,
+                          labels.dtype)
+    big_labels[[slice(fe,-fe) for fe in footprint_extent]] = labels
+    #
+    # Find the relative indexes of each footprint element
+    #
+    image_strides = np.array(image.strides) / image.dtype.itemsize
+    big_strides = np.array(big_labels.strides) / big_labels.dtype.itemsize
+    result_strides = np.array(result.strides) / result.dtype.itemsize
+    footprint_offsets = np.mgrid[[slice(-fe,fe+1) for fe in footprint_extent]]
+    
+    fp_image_offsets = np.sum(image_strides[:, np.newaxis] *
+                              footprint_offsets[:, footprint], 0)
+    fp_big_offsets = np.sum(big_strides[:, np.newaxis] *
+                            footprint_offsets[:, footprint], 0)
+    #
+    # Get the index of each labeled pixel in the image and big_labels arrays
+    #
+    indexes = np.mgrid[[slice(0,x) for x in labels.shape]][:, labels > 0]
+    image_indexes = np.sum(image_strides[:, np.newaxis] * indexes, 0)
+    big_indexes = np.sum(big_strides[:, np.newaxis] * 
+                         (indexes + footprint_extent[:, np.newaxis]), 0)
+    result_indexes = np.sum(result_strides[:, np.newaxis] * indexes, 0)
+    #
+    # Now operate on the raveled images
+    #
+    big_labels_raveled = big_labels.ravel()
+    image_raveled = image.ravel()
+    result_raveled = result.ravel()
+    #
+    # A hit is a hit if the label at the offset matches the label at the pixel
+    # and if the intensity at the pixel is greater or equal to the intensity
+    # at the offset.
+    #
+    for fp_image_offset, fp_big_offset in zip(fp_image_offsets, fp_big_offsets):
+        same_label = (big_labels_raveled[big_indexes + fp_big_offset] ==
+                      big_labels_raveled[big_indexes])
+        less_than = (image_raveled[image_indexes[same_label]] <
+                     image_raveled[image_indexes[same_label]+ fp_image_offset])
+        result_raveled[result_indexes[same_label][less_than]] = False
+        
+    return result
+
+
+
+# ---------------------- deprecated ------------------------------
+# Deprecate slower pure-Python code, that we keep only for 
+# pedagogical purposes
+
+
+def __heapify_markers(markers, image):
+    """Create a priority queue heap with the markers on it"""
+    stride = np.array(image.strides) / image.itemsize
+    coords = np.argwhere(markers != 0)
+    ncoords = coords.shape[0]
+    if ncoords > 0:
+        pixels = image[markers != 0]
+        age    = np.arange(ncoords)
+        offset = np.zeros(coords.shape[0], int)
+        for i in range(image.ndim):
+            offset = offset + stride[i] * coords[:, i]
+        pq = np.column_stack((pixels, age, offset, coords))
+        # pixels = top priority, age=second
+        ordering = np.lexsort((age, pixels)) 
+        pq = pq[ordering, :]
+    else:
+        pq = np.zeros((0, markers.ndim + 3), int)
+    return (pq, ncoords)
+    
+def _slow_watershed(image, markers, connectivity=8, mask=None):
+    """Return a matrix labeled using the watershed algorithm
+    
+    Use the `watershed` function for a faster execution.
+    This pure Python function is solely for pedagogical purposes.
+    
+    Parameters
+    ----------
+    image: 2-d ndarray of integers
+        a two-dimensional matrix where the lowest value points are
+        labeled first.
+    markers: 2-d ndarray of integers
+        a two-dimensional matrix marking the basins with the values
+        to be assigned in the label matrix. Zero means not a marker.
+    connectivity: {4, 8}, optional 
+        either 4 for four-connected or 8 (default) for eight-connected
+    mask: 2-d ndarray of bools, optional    
+        don't label points in the mask
+
+    Returns 
+    -------
+    out: ndarray
+        A labeled matrix of the same type and shape as markers
+    
+
+    Notes
+    -----
+    This function implements a watershed algorithm [1]_that apportions pixels
+    into marked basins. The algorithm uses a priority queue to hold the pixels
+    with the metric for the priority queue being pixel value, then the time of
+    entry into the queue - this settles ties in favor of the closest marker.
+
+    Some ideas taken from
+    Soille, "Automated Basin Delineation from Digital Elevation Models Using
+    Mathematical Morphology", Signal Processing 20 (1990) 171-182
+
+    The most important insight in the paper is that entry time onto the queue
+    solves two problems: a pixel should be assigned to the neighbor with the
+    largest gradient or, if there is no gradient, pixels on a plateau should
+    be split between markers on opposite sides.
+
+    This implementation converts all arguments to specific, lowest common
+    denominator types, then passes these to a C algorithm.
+    
+    Markers can be determined manually, or automatically using for example 
+    the local minima of the gradient of the image, or the local maxima of the
+    distance function to the background for separating overlapping objects.
+    """
+    if connectivity not in (4, 8):
+        raise ValueError("Connectivity was %d: it should be either \
+        four or eight" %(connectivity))
+    
+    image = np.array(image)
+    markers = np.array(markers)
+    labels = markers.copy()
+    max_x  = markers.shape[0]
+    max_y  = markers.shape[1]
+    if connectivity == 4:
+        connect_increments = ((1, 0), (0, 1), (-1, 0), (0, -1))
+    else:
+        connect_increments = ((1, 0), (1, 1), (0, 1), (-1, 1),
+                              (-1, 0), (-1, -1), (0, -1), (1, -1))
+    pq, age = __heapify_markers(markers, image)
+    pq = pq.tolist()
+    #
+    # The second step pops a value off of the queue, then labels and pushes
+    # the neighbors
+    #
+    while len(pq):
+        pix_value, pix_age, ignore, pix_x, pix_y = heappop(pq)
+        pix_label = labels[pix_x, pix_y]
+        for xi, yi in connect_increments:
+            x = pix_x + xi
+            y = pix_y + yi
+            if x < 0 or y < 0 or x >= max_x or y >= max_y:
+                continue
+            if labels[x, y]:
+                continue
+            if mask != None and not mask[x, y]:
+                continue
+            # label the pixel
+            labels[x, y] = pix_label
+            # put the pixel onto the queue
+            heappush(pq, (image[x, y], age, 0, x, y))
+            age += 1
+    return labels
+
+