FIX: Moved image processing to PIL

This commit is contained in:
Chris Lee
2015-05-24 10:00:16 -04:00
committed by Madison May
parent 159152a200
commit dd09f34a89
9 changed files with 139 additions and 100 deletions
+2 -1
View File
@@ -21,4 +21,5 @@ v0.4.15, Sat Dec 20 -- Bug fix release
v0.5.0, Friday Feb 27 -- Updated to support private cloud, allows for indicorc file to reduce redundant authorization calls, README updates
v0.5.1, Friday Feb 27 -- More README updates, fixed rst formatting issue, added classifiers
v0.5.2, Tue March 7 -- Required API keys, configuration settings
v0.5.3, Wed Apr 15 -- Added scipy to requirements, edited Readme to not break pypi page
v0.5.3, Wed Apr 15 -- Added scipy to requirements, edited Readme to not break pypi page
v0.6.0, Thu May 29 -- Remove numpy / scipy dependency in favor of Pillow
+6 -1
View File
@@ -1,6 +1,11 @@
from functools import partial
JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain', 'client-lib': 'python'}
JSON_HEADERS = {
'Content-type': 'application/json',
'Accept': 'application/json',
'client-lib': 'python',
'version-number': '0.6.0'
}
Version, version, __version__, VERSION = ('0.5.3',) * 4
+2 -4
View File
@@ -1,8 +1,6 @@
import requests
import numpy as np
from indicoio.utils import image_preprocess, api_handler, is_url
import indicoio.config as config
from indicoio.utils import image_preprocess, api_handler
def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):
"""
@@ -58,5 +56,5 @@ def image_features(image, cloud=None, batch=False, api_key=None, **kwargs):
:type image: numpy.ndarray
:rtype: List containing features
"""
image = image_preprocess(image, batch=batch)
image = image_preprocess(image, batch=batch, size=(64,64))
return api_handler(image, cloud=cloud, api="imagefeatures", batch=batch, api_key=api_key, **kwargs)
-1
View File
@@ -1,5 +1,4 @@
import requests
import numpy as np
from indicoio.utils import api_handler
import indicoio.config as config
-2
View File
@@ -1,6 +1,4 @@
from indicoio import JSON_HEADERS
from indicoio.utils import api_handler
import indicoio.config as config
def political(text, cloud=None, batch=False, api_key=None, **kwargs):
"""
+92 -63
View File
@@ -1,11 +1,13 @@
import inspect, json, getpass, os
import inspect, json, getpass, os.path, base64, StringIO, re, warnings
import requests
import numpy as np
from skimage.transform import resize
from PIL import Image
from indicoio import JSON_HEADERS
from indicoio import config
B64_PATTERN = re.compile("^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{4}|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)")
def api_handler(arg, cloud, api, batch=False, api_key=None, **kwargs):
data = {'data': arg}
data.update(**kwargs)
@@ -80,70 +82,99 @@ class DataStructureException(Exception):
function %s does not accept %s, accepted types are: %s
""" % (self.callback, self.structure, str(self.accepted))
@TypeCheck((list, dict, np.ndarray), 'array')
def normalize(array, distribution=1, norm_range=(0, 1), **kwargs):
"""
First arg is an array, whether that's in the form of a numpy array,
a list, or a dictionary that contains the data in its values.
Second arg is the desired distribution which would be applied before
normalization.
Supports linear, exponential, logarithmic and raising to whatever
power specified (in which case you just put a number)
Third arg is the range across which you want the data normalized
"""
# Handling dictionary array input
# Note: lists and numpy arrays behave the same in this program
dict_array = isinstance(array, dict)
if dict_array:
keys = array.keys()
array = np.array(array.values()).astype('float')
else: # Decorator errors if this isn't a list or a numpy array
array = np.array(array).astype('float')
# Handling various distributions
if type(distribution) in [float, int]:
array = np.power(array, distribution)
else:
array = getattr(np, distribution)(array, **kwargs)
# Prep for normalization
x_max, x_min = (np.max(array), np.min(array))
def norm(element,x_min,x_max):
base_span = (element - x_min)*(norm_range[-1] - norm_range[0])
return norm_range[0] + base_span / (x_max - x_min)
norm_array = np.vectorize(norm)(array, x_min, x_max)
if dict_array:
return dict(zip(keys, norm_array))
return norm_array
def image_preprocess(image, batch=False):
def image_preprocess(image, size=(48,48), batch=False):
"""
Takes an image and prepares it for sending to the api including
resizing and image data/structure standardizing.
"""
if batch:
return [image_preprocess(img, batch=False) for img in image]
if isinstance(image,list):
image = np.asarray(image)
if type(image).__module__ != np.__name__:
raise ValueError('Image was not of type numpy.ndarray or list.')
if str(image.dtype) in ['int64','uint8']:
image = image/255.
if len(image.shape) == 2:
image = np.dstack((image,image,image))
if len(image.shape) == 4:
image = image[:,:,:3]
image = resize(image,(64,64))
image = image.tolist()
return image
if isinstance(image, str):
b64_str = re.sub('^data:image/.+;base64,', '', image)
if os.path.isfile(image):
# check type of element
outImage = Image.open(image)
elif B64_PATTERN.match(b64_str) is not None:
return b64_str
else:
raise ValueError("string provided must be a valid filepath or base64 encoded string")
elif isinstance(image, list): # image passed in is a list and not np.array
warnings.warn(
"Input as lists of pixels will be deprecated in the next major update",
DeprecationWarning
)
outImage = process_list_image(image)
elif type(image).__name__ == "ndarray": # image is from numpy/scipy
out_image = Image.fromarray(image)
else:
raise ValueError("image must be a filepath, base64 encoded string, or a numpy array")
# image resizing
outImage = outImage.resize(size)
# convert to base64
temp_output = StringIO.StringIO()
outImage.save(temp_output, format='PNG')
temp_output.seek(0)
output_s = temp_output.read()
return base64.b64encode(output_s)
def get_list_dimensions(_list):
"""
Takes a nested list and returns the size of each dimension followed
by the element type in the list
"""
if isinstance(_list, list) or isinstance(_list, tuple):
return [len(_list)] + get_list_dimensions(_list[0])
return []
def get_element_type(_list, dimens):
"""
Given the dimensions of a nested list and the list, returns the type of the
elements in the inner list.
"""
elem = _list
for _ in xrange(len(dimens)):
elem = elem[0]
return type(elem)
def process_list_image(_list):
"""
Processes list to be [[(int, int, int), ...]]
"""
# Check if list is empty
if not _list:
return _list
dimens = get_list_dimensions(_list)
data_type = get_element_type(_list, dimens)
seq_obj = []
outImage = Image.new("RGB", (dimens[0], dimens[1]))
for i in xrange(dimens[0]):
for j in xrange(dimens[1]):
elem = _list[i][j]
if len(dimens) >= 3:
#RGB(A)
if data_type == float:
seq_obj.append((int(elem[0] * 255), int(elem[1] * 255), int(elem[2] * 255)))
else:
seq_obj.append(elem[0:3])
elif data_type == float:
#Grayscale 0 - 1.0f
seq_obj.append((int(elem * 255), ) * 3)
else:
#Grayscale 0 - 255
seq_obj.append((elem, ) * 3)
#Needs to be 0 - 255 in flattened list of (R, G, B)
outImage.putdata(data = seq_obj)
return outImage
def is_url(data, batch=False):
@@ -152,5 +183,3 @@ def is_url(data, batch=False):
if not batch and isinstance(data, basestring):
return True
return False
+1 -3
View File
@@ -1,5 +1,3 @@
numpy>=1.8.0
six>=1.3.0
scikit-image>=0.10.1
requests>=2.2.1
scipy>=0.14.0
Pillow>=2.8.1
+3 -5
View File
@@ -8,7 +8,7 @@ except ImportError:
setup(
name="IndicoIo",
version='0.5.3',
version='0.6.0',
packages=[
"indicoio",
"indicoio.text",
@@ -46,14 +46,12 @@ setup(
"Topic :: Software Development :: Libraries :: Python Modules",
],
setup_requires=[
"numpy >= 1.8.1",
"six >= 1.3.0",
"pillow >= 2.8.1"
],
install_requires=[
"requests >= 1.2.3",
"six >= 1.3.0",
"numpy >= 1.8.1",
"scipy >= 0.14.0",
"scikit-image >= 0.10.1",
"pillow >= 2.8.1"
],
)
+33 -20
View File
@@ -1,9 +1,8 @@
import unittest
import os
import os, random
from PIL import Image
from requests import ConnectionError
import numpy as np
import skimage.io
from nose.plugins.skip import Skip, SkipTest
from indicoio import config
@@ -42,13 +41,13 @@ class BatchAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, list))
def test_batch_fer(self):
test_data = [np.random.rand(48, 48).tolist()]
test_data = [generate_array((48,48))]
response = batch_fer(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], dict))
def test_batch_facial_features(self):
test_data = [np.random.rand(48, 48).tolist()]
test_data = [generate_array((48,48))]
response = batch_facial_features(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], list))
@@ -68,14 +67,14 @@ class BatchAPIRun(unittest.TestCase):
# have decided how we are dealing with them
def test_batch_image_features_greyscale(self):
test_data = [np.random.rand(64, 64).tolist()]
test_data = [generate_array((48,48))]
response = batch_image_features(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], list))
self.assertEqual(len(response[0]), 2048)
def test_batch_image_features_rgb(self):
test_data = [np.random.rand(64, 64, 3).tolist()]
test_data = [generate_array((48,48))]
response = batch_image_features(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], list))
@@ -99,15 +98,19 @@ class BatchAPIRun(unittest.TestCase):
class FullAPIRun(unittest.TestCase):
def load_image(self, relpath, as_grey=False):
image_path = os.path.normpath(os.path.join(DIR, relpath))
image = skimage.io.imread(image_path, as_grey=True).tolist()
return image
im = Image.open(os.path.normpath(os.path.join(DIR, relpath))).convert('L');
pixels = list(im.getdata())
width, height = im.size
pixels = [pixels[i * width:(i + 1) * width] for i in xrange(height)]
return pixels
def check_range(self, list, minimum=0.9, maximum=0.1, span=0.5):
vector = np.asarray(list)
self.assertTrue(vector.max() > maximum)
self.assertTrue(vector.min() < minimum)
self.assertTrue(np.ptp(vector) > span)
def check_range(self, _list, minimum=0.9, maximum=0.1, span=0.5):
vector = list(flatten(_list))
_max = max(vector)
_min = min(vector)
self.assertTrue(max(vector) > maximum)
self.assertTrue(min(vector) < minimum)
self.assertTrue(_max - _min > span)
def test_text_tags(self):
text = "On Monday, president Barack Obama will be..."
@@ -148,7 +151,7 @@ class FullAPIRun(unittest.TestCase):
def test_good_fer(self):
fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
test_face = np.random.rand(48,48).tolist()
test_face = generate_array((48,48))
response = fer(test_face)
self.assertTrue(isinstance(response, dict))
@@ -168,14 +171,14 @@ class FullAPIRun(unittest.TestCase):
def test_bad_fer(self):
fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
test_face = np.random.rand(56,56).tolist()
test_face = generate_array((56, 56))
response = fer(test_face)
self.assertTrue(isinstance(response, dict))
self.assertEqual(fer_set, set(response.keys()))
def test_good_facial_features(self):
test_face = np.random.rand(48,48).tolist()
test_face = generate_array((48,48))
response = facial_features(test_face)
self.assertTrue(isinstance(response, list))
@@ -193,7 +196,7 @@ class FullAPIRun(unittest.TestCase):
# self.check_range(response)
def test_good_image_features_greyscale(self):
test_image = np.random.rand(64, 64).tolist()
test_image = generate_array((48,48))
response = image_features(test_image)
self.assertTrue(isinstance(response, list))
@@ -201,7 +204,7 @@ class FullAPIRun(unittest.TestCase):
self.check_range(response)
def test_good_image_features_rgb(self):
test_image = np.random.rand(64, 64, 3).tolist()
test_image = [[(random.random(),) * 3 for _ in xrange(48)] for _ in xrange(48)]
response = image_features(test_image)
self.assertTrue(isinstance(response, list))
@@ -288,6 +291,16 @@ class FullAPIRun(unittest.TestCase):
config.api_key = temp_api_key
def flatten(container):
for i in container:
if isinstance(i, list) or isinstance(i, tuple):
for j in flatten(i):
yield j
else:
yield i
def generate_array(size):
return [[random.random() for _ in xrange(size[0])] for _ in xrange(size[1])]
if __name__ == "__main__":