From bdb2df89c030a16bf9d59eb7dc3602d935a96d0f Mon Sep 17 00:00:00 2001 From: "Thouis (Ray) Jones" Date: Tue, 12 Feb 2013 14:32:08 -0500 Subject: [PATCH] use SSE to add/subtract histograms --- skimage/filter/_ctmf.pyx | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/skimage/filter/_ctmf.pyx b/skimage/filter/_ctmf.pyx index c133d8d6..d24a9693 100644 --- a/skimage/filter/_ctmf.pyx +++ b/skimage/filter/_ctmf.pyx @@ -17,6 +17,13 @@ cimport cython from libc.stdlib cimport malloc, free from libc.string cimport memset +cdef extern from "emmintrin.h": + ctypedef long __m128i + __m128i _mm_load_si128 (__m128i *p) + __m128i _mm_adds_epu16 (__m128i a, __m128i b) + __m128i _mm_subs_epu16 (__m128i a, __m128i b) + void _mm_store_si128 (__m128i *p, __m128i a) + np.import_array() ############################################################################## @@ -342,14 +349,30 @@ cdef inline np.int32_t trailing_edge_colidx(Histograms *ph, np.int32_t colidx): # TO_DO - optimize using SIMD instructions # cdef inline void add16(np.uint16_t *dest, np.uint16_t *src): - cdef int i - for i in range(16): - dest[i] += src[i] + cdef __m128i d, s, *pd, *ps + pd = <__m128i *> dest + ps = <__m128i *> src + d = _mm_load_si128(pd) + s = _mm_load_si128(ps) + d = _mm_adds_epu16 (d, s) + _mm_store_si128(pd, d) + d = _mm_load_si128(pd + 1) + s = _mm_load_si128(ps + 1) + d = _mm_adds_epu16 (d, s) + _mm_store_si128(pd + 1, d) cdef inline void sub16(np.uint16_t *dest, np.uint16_t *src): - cdef int i - for i in range(16): - dest[i] -= src[i] + cdef __m128i d, s, *pd, *ps + pd = <__m128i *> dest + ps = <__m128i *> src + d = _mm_load_si128(pd) + s = _mm_load_si128(ps) + d = _mm_subs_epu16 (d, s) + _mm_store_si128(pd, d) + d = _mm_load_si128(pd + 1) + s = _mm_load_si128(ps + 1) + d = _mm_subs_epu16 (d, s) + _mm_store_si128(pd + 1, d) ############################################################################ #