Merge pull request #51 from IndicoDataSolutions/development

Development
2026-06-27 16:10:34 +08:00 · 2015-05-28 15:59:28 -04:00
parent 2c7d6f2d16 c82fd16b11
commit 0c4dfbcad6
9 changed files with 167 additions and 102 deletions
@@ -21,4 +21,5 @@ v0.4.15, Sat Dec 20 -- Bug fix release
 v0.5.0, Friday Feb 27 -- Updated to support private cloud, allows for indicorc file to reduce redundant authorization calls, README updates
 v0.5.1, Friday Feb 27 -- More README updates, fixed rst formatting issue, added classifiers
 v0.5.2, Tue March 7 -- Required API keys, configuration settings
-v0.5.3, Wed Apr 15 -- Added scipy to requirements, edited Readme to not break pypi page
+v0.5.3, Wed Apr 15 -- Added scipy to requirements, edited Readme to not break pypi page
+v0.6.0, Thu May 29 -- Remove numpy / scipy dependency in favor of Pillow
@@ -1,8 +1,13 @@
 from functools import partial

-JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'}
+JSON_HEADERS = {
+    'Content-type': 'application/json',
+    'Accept': 'application/json',
+    'client-lib': 'python',
+    'version-number': '0.6.0'
+}

-Version, version, __version__, VERSION = ('0.5.3',) * 4
+Version, version, __version__, VERSION = ('0.6.0',) * 4

 from indicoio.text.sentiment import political, posneg
 from indicoio.text.sentiment import posneg as sentiment
@@ -1,8 +1,6 @@
 import requests
-import numpy as np

-from indicoio.utils import image_preprocess, api_handler, is_url
-import indicoio.config as config
+from indicoio.utils import image_preprocess, api_handler

 def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):
    """
@@ -26,6 +24,7 @@ def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):
    :type image: list of lists
    :rtype: List containing feature responses
    """
+    image = image_preprocess(image, batch=batch)
    return api_handler(image, cloud=cloud, api="facialfeatures", batch=batch, api_key=api_key, **kwargs)

 def image_features(image, cloud=None, batch=False, api_key=None, **kwargs):
@@ -58,5 +57,5 @@ def image_features(image, cloud=None, batch=False, api_key=None, **kwargs):
    :type image: numpy.ndarray
    :rtype: List containing features
    """
-    image = image_preprocess(image, batch=batch)
+    image = image_preprocess(image, batch=batch, size=(64,64))
    return api_handler(image, cloud=cloud, api="imagefeatures", batch=batch, api_key=api_key, **kwargs)
@@ -1,7 +1,6 @@
 import requests
-import numpy as np

-from indicoio.utils import api_handler
+from indicoio.utils import api_handler, image_preprocess
 import indicoio.config as config

 def fer(image, cloud=None, batch=False, api_key=None, **kwargs):
@@ -27,5 +26,5 @@ def fer(image, cloud=None, batch=False, api_key=None, **kwargs):
    :type image: list of lists
    :rtype: Dictionary containing emotion probability pairs
    """
-
+    image = image_preprocess(image, batch=batch)
    return api_handler(image, cloud=cloud, api="fer", batch=batch, api_key=api_key, **kwargs)
@@ -1,6 +1,4 @@
-from indicoio import JSON_HEADERS
 from indicoio.utils import api_handler
-import indicoio.config as config

 def political(text, cloud=None, batch=False, api_key=None, **kwargs):
    """
@@ -1,11 +1,13 @@
-import inspect, json, getpass, os
+import inspect, json, getpass, os.path, base64, StringIO, re, warnings
 import requests
-import numpy as np
-from skimage.transform import resize
+from PIL import Image

 from indicoio import JSON_HEADERS
 from indicoio import config

+B64_PATTERN = re.compile("^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{4}|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)")
+
+
 def api_handler(arg, cloud, api, batch=False, api_key=None, **kwargs):
    data = {'data': arg}
    data.update(**kwargs)
@@ -81,69 +83,102 @@ class DataStructureException(Exception):
        """ % (self.callback, self.structure, str(self.accepted))


-@TypeCheck((list, dict, np.ndarray), 'array')
-def normalize(array, distribution=1, norm_range=(0, 1), **kwargs):
-    """
-    First arg is an array, whether that's in the form of a numpy array,
-    a list, or a dictionary that contains the data in its values.
-
-    Second arg is the desired distribution which would be applied before
-    normalization.
-        Supports linear, exponential, logarithmic and raising to whatever
-        power specified (in which case you just put a number)
-
-    Third arg is the range across which you want the data normalized
-    """
-    # Handling dictionary array input
-    # Note: lists and numpy arrays behave the same in this program
-    dict_array = isinstance(array, dict)
-
-    if dict_array:
-        keys = array.keys()
-        array = np.array(array.values()).astype('float')
-    else:  # Decorator errors if this isn't a list or a numpy array
-        array = np.array(array).astype('float')
-
-    # Handling various distributions
-    if type(distribution) in [float, int]:
-        array = np.power(array, distribution)
-    else:
-        array = getattr(np, distribution)(array, **kwargs)
-
-    # Prep for normalization
-    x_max, x_min = (np.max(array), np.min(array))
-
-    def norm(element,x_min,x_max):
-        base_span = (element - x_min)*(norm_range[-1] - norm_range[0])
-        return norm_range[0] + base_span / (x_max - x_min)
-
-    norm_array = np.vectorize(norm)(array, x_min, x_max)
-
-    if dict_array:
-        return dict(zip(keys, norm_array))
-    return norm_array
-
-
-def image_preprocess(image, batch=False):
+def image_preprocess(image, size=(48,48), batch=False):
    """
    Takes an image and prepares it for sending to the api including
    resizing and image data/structure standardizing.
    """
    if batch:
        return [image_preprocess(img, batch=False) for img in image]
-    if isinstance(image,list):
-        image = np.asarray(image)
-    if type(image).__module__ != np.__name__:
-        raise ValueError('Image was not of type numpy.ndarray or list.')
-    if str(image.dtype) in ['int64','uint8']:
-        image = image/255.
-    if len(image.shape) == 2:
-        image = np.dstack((image,image,image))
-    if len(image.shape) == 4:
-        image = image[:,:,:3]
-    image = resize(image,(64,64))
-    image = image.tolist()
-    return image
+
+    if isinstance(image, basestring):
+        b64_str = re.sub('^data:image/.+;base64,', '', image)
+        if os.path.isfile(image):
+            # check type of element
+            outImage = Image.open(image)
+        elif B64_PATTERN.match(b64_str) is not None:
+            return b64_str
+        else:
+            raise ValueError("Snose tring provided must be a valid filepath or base64 encoded string")
+
+    elif isinstance(image, list): # image passed in is a list and not np.array
+        warnings.warn(
+            "Input as lists of pixels will be deprecated in the next major update",
+            DeprecationWarning
+        )
+        outImage = process_list_image(image)
+    elif type(image).__name__ == "ndarray": # image is from numpy/scipy
+        out_image = Image.fromarray(image)
+    else:
+        raise ValueError("Image must be a filepath, base64 encoded string, or a numpy array")
+
+    # image resizing
+    outImage = outImage.resize(size)
+
+    # convert to base64
+    temp_output = StringIO.StringIO()
+    outImage.save(temp_output, format='PNG')
+    temp_output.seek(0)
+    output_s = temp_output.read()
+
+    return base64.b64encode(output_s)
+
+
+def get_list_dimensions(_list):
+    """
+    Takes a nested list and returns the size of each dimension followed
+    by the element type in the list
+    """
+    if isinstance(_list, list) or isinstance(_list, tuple):
+        return [len(_list)] + get_list_dimensions(_list[0])
+    return []
+
+
+def get_element_type(_list, dimens):
+    """
+    Given the dimensions of a nested list and the list, returns the type of the
+    elements in the inner list.
+    """
+    elem = _list
+    for _ in xrange(len(dimens)):
+        elem = elem[0]
+    return type(elem)
+
+
+def process_list_image(_list):
+    """
+    Processes list to be [[(int, int, int), ...]]
+    """
+    # Check if list is empty
+    if not _list:
+        return _list
+
+    dimens = get_list_dimensions(_list)
+    data_type = get_element_type(_list, dimens)
+
+    seq_obj = []
+
+    outImage = Image.new("RGB", (dimens[0], dimens[1]))
+    for i in xrange(dimens[0]):
+        for j in xrange(dimens[1]):
+            elem = _list[i][j]
+            if len(dimens) >= 3:
+                #RGB(A)
+                if data_type == float:
+                    seq_obj.append((int(elem[0] * 255), int(elem[1] * 255), int(elem[2] * 255)))
+                else:
+                    seq_obj.append(elem[0:3])
+            elif data_type == float:
+                #Grayscale 0 - 1.0f
+                seq_obj.append((int(elem * 255), ) * 3)
+            else:
+                #Grayscale 0 - 255
+                seq_obj.append((elem, ) * 3)
+
+    #Needs to be 0 - 255 in flattened list of (R, G, B)
+    outImage.putdata(data = seq_obj)
+
+    return outImage


 def is_url(data, batch=False):
@@ -152,5 +187,3 @@ def is_url(data, batch=False):
    if not batch and isinstance(data, basestring):
        return True
    return False
-
-
@@ -1,5 +1,3 @@
-numpy>=1.8.0
 six>=1.3.0
-scikit-image>=0.10.1
 requests>=2.2.1
-scipy>=0.14.0
+Pillow>=2.8.1
@@ -8,7 +8,7 @@ except ImportError:

 setup(
    name="IndicoIo",
-    version='0.5.3',
+    version='0.6.0',
    packages=[
        "indicoio",
        "indicoio.text",
@@ -46,14 +46,12 @@ setup(
        "Topic :: Software Development :: Libraries :: Python Modules",
    ],
    setup_requires=[
-        "numpy >= 1.8.1",
        "six >= 1.3.0",
+        "pillow >= 2.8.1"
    ],
    install_requires=[
        "requests >= 1.2.3",
        "six >= 1.3.0",
-        "numpy >= 1.8.1",
-        "scipy >= 0.14.0",
-        "scikit-image >= 0.10.1",
+        "pillow >= 2.8.1"
    ],
 )
@@ -1,9 +1,8 @@
 import unittest
-import os
+import os, random
+from PIL import Image
 from requests import ConnectionError

-import numpy as np
-import skimage.io
 from nose.plugins.skip import Skip, SkipTest

 from indicoio import config
@@ -42,13 +41,34 @@ class BatchAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, list))

    def test_batch_fer(self):
-        test_data = [np.random.rand(48, 48).tolist()]
+        test_data = [generate_array((48,48))]
        response = batch_fer(test_data, api_key=self.api_key)
        self.assertTrue(isinstance(response, list))
        self.assertTrue(isinstance(response[0], dict))

+    def test_batch_fer_bad_b64(self):
+        test_data = ["$bad#FI jeaf9(#0"]
+        self.assertRaises(ValueError, batch_fer, test_data, api_key=self.api_key)
+
+    def test_batch_fer_good_b64(self):
+        test_data = ["iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAg5JREFUeNrEV4uNgzAMpegGyAgZgQ3KBscIjMAGx03QEdqbgG5AOwG3AWwAnSCXqLZkuUkwhfYsvaLm5xc7sZ1dIhdtUVjsLZRFTvp+LSaLq8UZ/s+KMSbZCcY5RV9E4QQKHG7QtgeCGv4PFt8WpzkCcztu3TiL0eJgkQmsVFn0MK+LzYkRKEGpG1GDyZdKRdaolhAoJewXnJsO1jtKCFDlChZAFxyJj2PnBRU20KZg7oMlOAENijpi8hwmGkKkZW2GzONtVLA/DxHAhTO2I7MCVBSQ6nGDlEBJDhyVYiUBHXBxzQm0wE4FzPYsGs856dA9SAAP2oENzFYqR6iAFQpHIAUzO/nxnOgthF/lM3w/3U8KYXTwxG/1IgIulF+wPQUXDMl75UoJZIHstRWpaGb8IGYqwBoKlG/lgpzoUEBoj50p8QtVrmHgaaXyC/H3BFC+e9kGFlCB0CtBF7FifQ8D9zjQQHj0pdOM3F1pUBoFKdxtqkMClScHJCSDlSxhHSNRT5K+FaZnHglrz+AGoxZLKNLYH6s3CkkuyJlp58wviZ4PuSCWDXl5hmjZtxcSCGbDUD3gK7EMOZBLCETrgVBF5K0lI5bIZ0wfrYh8NWHIAiNTPHpuTOKpCes1VTFaiNaFdGwPfdmaqlj6LmjJbgoSSfUW74K3voz+/W0oIeB7HWu2s+dfx3N+eLX8CTAAwUmKjK/dHS4AAAAASUVORK5CYII="]
+        response = batch_fer(test_data, api_key=self.api_key)
+        self.assertTrue(isinstance(response, list))
+        self.assertTrue(isinstance(response[0], dict))
+
+    def test_batch_fer_filepath(self):
+        test_data = [os.path.normpath(os.path.join(DIR, "data/fear.png"))]
+        response = batch_fer(test_data, api_key=self.api_key)
+        self.assertTrue(isinstance(response, list))
+        self.assertTrue(isinstance(response[0], dict))
+
+    def test_batch_fer_nonexistant_filepath(self):
+        test_data = ["data/unhappy.png"]
+        self.assertRaises(ValueError, batch_fer, test_data, api_key=self.api_key)
+
+
    def test_batch_facial_features(self):
-        test_data = [np.random.rand(48, 48).tolist()]
+        test_data = [generate_array((48,48))]
        response = batch_facial_features(test_data, api_key=self.api_key)
        self.assertTrue(isinstance(response, list))
        self.assertTrue(isinstance(response[0], list))
@@ -68,14 +88,14 @@ class BatchAPIRun(unittest.TestCase):
    # have decided how we are dealing with them

    def test_batch_image_features_greyscale(self):
-        test_data = [np.random.rand(64, 64).tolist()]
+        test_data = [generate_array((48,48))]
        response = batch_image_features(test_data, api_key=self.api_key)
        self.assertTrue(isinstance(response, list))
        self.assertTrue(isinstance(response[0], list))
        self.assertEqual(len(response[0]), 2048)

    def test_batch_image_features_rgb(self):
-        test_data = [np.random.rand(64, 64, 3).tolist()]
+        test_data = [generate_array((48,48))]
        response = batch_image_features(test_data, api_key=self.api_key)
        self.assertTrue(isinstance(response, list))
        self.assertTrue(isinstance(response[0], list))
@@ -99,15 +119,19 @@ class BatchAPIRun(unittest.TestCase):
 class FullAPIRun(unittest.TestCase):

    def load_image(self, relpath, as_grey=False):
-        image_path = os.path.normpath(os.path.join(DIR, relpath))
-        image = skimage.io.imread(image_path, as_grey=True).tolist()
-        return image
+        im = Image.open(os.path.normpath(os.path.join(DIR, relpath))).convert('L');
+        pixels = list(im.getdata())
+        width, height = im.size
+        pixels = [pixels[i * width:(i + 1) * width] for i in xrange(height)]
+        return pixels

-    def check_range(self, list, minimum=0.9, maximum=0.1, span=0.5):
-        vector = np.asarray(list)
-        self.assertTrue(vector.max() > maximum)
-        self.assertTrue(vector.min() < minimum)
-        self.assertTrue(np.ptp(vector) > span)
+    def check_range(self, _list, minimum=0.9, maximum=0.1, span=0.5):
+        vector = list(flatten(_list))
+        _max = max(vector)
+        _min = min(vector)
+        self.assertTrue(max(vector) > maximum)
+        self.assertTrue(min(vector) < minimum)
+        self.assertTrue(_max - _min > span)

    def test_text_tags(self):
        text = "On Monday, president Barack Obama will be..."
@@ -148,7 +172,7 @@ class FullAPIRun(unittest.TestCase):

    def test_good_fer(self):
        fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
-        test_face = np.random.rand(48,48).tolist()
+        test_face = generate_array((48,48))
        response = fer(test_face)

        self.assertTrue(isinstance(response, dict))
@@ -168,14 +192,14 @@ class FullAPIRun(unittest.TestCase):

    def test_bad_fer(self):
        fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
-        test_face = np.random.rand(56,56).tolist()
+        test_face = generate_array((56, 56))
        response = fer(test_face)

        self.assertTrue(isinstance(response, dict))
        self.assertEqual(fer_set, set(response.keys()))

    def test_good_facial_features(self):
-        test_face = np.random.rand(48,48).tolist()
+        test_face = generate_array((48,48))
        response = facial_features(test_face)

        self.assertTrue(isinstance(response, list))
@@ -193,7 +217,7 @@ class FullAPIRun(unittest.TestCase):
    #     self.check_range(response)

    def test_good_image_features_greyscale(self):
-        test_image = np.random.rand(64, 64).tolist()
+        test_image = generate_array((48,48))
        response = image_features(test_image)

        self.assertTrue(isinstance(response, list))
@@ -201,7 +225,7 @@ class FullAPIRun(unittest.TestCase):
        self.check_range(response)

    def test_good_image_features_rgb(self):
-        test_image = np.random.rand(64, 64, 3).tolist()
+        test_image = [[(random.random(),) * 3 for _ in xrange(48)] for _ in xrange(48)]
        response = image_features(test_image)

        self.assertTrue(isinstance(response, list))
@@ -288,6 +312,16 @@ class FullAPIRun(unittest.TestCase):

        config.api_key = temp_api_key

+def flatten(container):
+    for i in container:
+        if isinstance(i, list) or isinstance(i, tuple):
+            for j in flatten(i):
+                yield j
+        else:
+            yield i
+
+def generate_array(size):
+    return [[random.random() for _ in xrange(size[0])] for _ in xrange(size[1])]


 if __name__ == "__main__":