Merge pull request #109 from IndicoDataSolutions/development

Development
This commit is contained in:
Madison May
2015-08-07 16:33:34 -04:00
12 changed files with 175 additions and 36 deletions
+1
View File
@@ -33,3 +33,4 @@ v0.7.6 Tue Jul 7 -- Add Keywords API
v0.8.0 Fri Jul 10 -- Add Content Filtering API, Named Entities API, Facial Emotion with Localization
v0.8.1 Wed Jul 22 -- Add Sentiment HQ to predict_text API
v0.9.0 Tue Jul 28 -- Deprecate batch function calls in favor of type inference
v0.9.1 Mon Aug 3 -- Add Facial Localization API, image resizing updates
+5 -3
View File
@@ -1,7 +1,7 @@
from functools import wraps, partial
import warnings
Version, version, __version__, VERSION = ('0.9.0',) * 4
Version, version, __version__, VERSION = ('0.9.1',) * 4
JSON_HEADERS = {
'Content-type': 'application/json',
@@ -10,6 +10,7 @@ JSON_HEADERS = {
'version-number': VERSION
}
from indicoio.text.twitter_engagement import twitter_engagement
from indicoio.text.sentiment import political, posneg, sentiment_hq
from indicoio.text.sentiment import posneg as sentiment
from indicoio.text.lang import language
@@ -18,6 +19,7 @@ from indicoio.text.keywords import keywords
from indicoio.text.ner import named_entities
from indicoio.images.fer import fer
from indicoio.images.features import facial_features
from indicoio.images.faciallocalization import facial_localization
from indicoio.images.features import image_features
from indicoio.images.filtering import content_filtering
from indicoio.utils.multi import predict_image, predict_text
@@ -41,9 +43,9 @@ def detect_batch_decorator(f):
kwargs['batch'] = True
return f(*args, **kwargs)
return wrapper
apis = dict((api, globals().get(api)) for api in API_NAMES)
for api in apis:
globals()[api] = partial(detect_batch_decorator(apis[api]))
globals()[api] = detect_batch_decorator(apis[api])
globals()['batch_' + api] = partial(deprecation_decorator(apis[api], api), batch=True)
+2 -1
View File
@@ -52,7 +52,8 @@ TEXT_APIS = [
'language',
'sentiment_hq',
'keywords',
'named_entities'
'named_entities',
'twitter_engagement'
]
IMAGE_APIS = [
+31
View File
@@ -0,0 +1,31 @@
import requests
from indicoio.utils.image import image_preprocess
from indicoio.utils.api import api_handler
def facial_localization(image, cloud=None, batch=False, api_key=None, **kwargs):
"""
Given an image, returns a list of faces found within the image.
For each face, we return a dictionary containing the upper left corner and lower right corner.
If crop is True, the cropped face is included in the dictionary.
Input should be in a numpy ndarray or a filename.
Example usage:
.. code-block:: python
>>> from indicoio import facial_localization
>>> import numpy as np
>>> img = np.zeros([image of a face])
>>> faces = facial_localization(img)
>>> len(faces)
1
:param image: The image to be analyzed.
:type image: filepath or ndarray
:rtype: List of faces (dict) found.
"""
image = image_preprocess(image, batch=batch)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(image, cloud=cloud, api="faciallocalization", url_params=url_params, **kwargs)
+2 -2
View File
@@ -16,7 +16,7 @@ def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):
>>> from indicoio import facial_features
>>> import numpy as np
>>> face = np.zeros((48,48)).tolist()
>>> face = np.zeros((48,48))
>>> features = facial_features(face)
>>> len(features)
48
@@ -25,7 +25,7 @@ def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):
:type image: list of lists
:rtype: List containing feature responses
"""
image = image_preprocess(image, batch=batch)
image = image_preprocess(image, batch=batch, size=(48,48))
url_params = {"batch": batch, "api_key": api_key}
return api_handler(image, cloud=cloud, api="facialfeatures", url_params=url_params, **kwargs)
+5 -1
View File
@@ -27,6 +27,10 @@ def fer(image, cloud=None, batch=False, api_key=None, **kwargs):
:type image: list of lists
:rtype: Dictionary containing emotion probability pairs
"""
image = image_preprocess(image, batch=batch)
image = image_preprocess(image, batch=batch,
size=None if kwargs.get("detect") else (48, 48)
)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs)
+1 -1
View File
@@ -24,6 +24,6 @@ def content_filtering(image, cloud=None, batch=False, api_key=None, **kwargs):
:type image: list of lists
:rtype: float of nsfwness
"""
image = image_preprocess(image, batch=batch, size=None, min_axis=128)
image = image_preprocess(image, batch=batch, min_axis=128)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs)
+22
View File
@@ -0,0 +1,22 @@
from indicoio.utils.api import api_handler
import indicoio.config as config
def twitter_engagement(text, cloud=None, batch=False, api_key=None, **kwargs):
"""
Given input text, returns an engagment score between 0 and 1
Example usage:
.. code-block:: python
>>> import indicoio
>>> import numpy as np
>>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.'
>>> engagement = indicoio.twitter_engagement(text)
:param text: The text to be analyzed.
:type text: str or unicode
:rtype: Float of engagement between 0 and 1
"""
url_params = {"batch": batch, "api_key": api_key}
return api_handler(text, cloud=cloud, api="twitterengagement", url_params=url_params, **kwargs)
+22 -19
View File
@@ -9,36 +9,39 @@ from indicoio import JSON_HEADERS
from indicoio import config
def api_handler(arg, cloud, api, url_params=None, **kwargs):
if url_params is None:
url_params = {"api_key":None, batch:False }
"""
Sends finalized request data to ML server and receives response.
"""
data = {'data': arg}
data.update(**kwargs)
json_data = json.dumps(data)
if not cloud:
cloud = config.cloud
if cloud:
host = "%s.indico.domains" % cloud
else:
# default to indico public cloud
host = config.PUBLIC_API_HOST
url = config.url_protocol + "//%s/%s" % (host, api)
url = url + "/batch" if url_params.get("batch", False) else url
url += "?key=%s" % (url_params.get("api_key", None) or config.api_key)
apis = url_params.get("apis", [])
if apis:
url += "&apis=%s" % ",".join(apis)
cloud = cloud or config.cloud
host = "%s.indico.domains" % cloud if cloud else config.PUBLIC_API_HOST
url = create_url(host, api, url_params)
response = requests.post(url, data=json_data, headers=JSON_HEADERS)
if response.status_code == 503 and cloud != None:
raise IndicoError("Private cloud '%s' does not include api '%s'" % (cloud, api))
json_results = response.json()
results = json_results.get('results', False)
if results is False:
error = json_results.get('error')
raise IndicoError(error)
return results
def create_url(host, api, url_params):
api_key = url_params.get("api_key") or config.api_key
is_batch = url_params.get("batch")
apis = url_params.get("apis")
host_url_seg = config.url_protocol + "//%s" % host
api_url_seg = "/%s" % api
batch_url_seg = "/batch" if is_batch else ""
key_url_seg = "?key=%s" % api_key
multi_url_seg = "&apis=%s" % ",".join(apis) if apis else ""
return host_url_seg + api_url_seg + batch_url_seg + key_url_seg + multi_url_seg
+6 -7
View File
@@ -10,7 +10,7 @@ from indicoio.utils.errors import IndicoError, DataStructureException
B64_PATTERN = re.compile("^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{4}|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)")
def image_preprocess(image, size=(48,48), min_axis=None, batch=False):
def image_preprocess(image, size=None, min_axis=None, batch=False):
"""
Takes an image and prepares it for sending to the api including
resizing and image data/structure standardizing.
@@ -41,7 +41,8 @@ def image_preprocess(image, size=(48,48), min_axis=None, batch=False):
else:
raise IndicoError("Image must be a filepath, base64 encoded string, or a numpy array")
out_image = resize_image(out_image, size, min_axis)
if size or min_axis:
out_image = resize_image(out_image, size, min_axis)
# convert to base64
temp_output = StringIO.StringIO()
@@ -53,8 +54,6 @@ def image_preprocess(image, size=(48,48), min_axis=None, batch=False):
def resize_image(image, size, min_axis):
if size:
image = image.resize(size)
if min_axis:
min_idx, other_idx = (0,1) if image.size[0] < image.size[1] else (1,0)
aspect = image.size[other_idx]/float(image.size[min_idx])
@@ -62,12 +61,13 @@ def resize_image(image, size, min_axis):
warnings.warn(
"An aspect ratio greater than 10:1 is not recommended",
Warning
)
)
size_arr = [0,0]
size_arr[min_idx] = min_axis
size_arr[other_idx] = int(min_axis * aspect)
image = image.resize(tuple(size_arr))
elif size:
image = image.resize(size)
return image
@@ -90,4 +90,3 @@ def get_element_type(_list, dimens):
for _ in xrange(len(dimens)):
elem = elem[0]
return type(elem)
+1 -1
View File
@@ -9,7 +9,7 @@ except ImportError:
setup(
name="IndicoIo",
version="0.9.0",
version="0.9.1",
packages=[
"indicoio",
"indicoio.text",
+77 -1
View File
@@ -1,3 +1,5 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import unittest
import os, random
from PIL import Image
@@ -6,11 +8,12 @@ from requests import ConnectionError
from nose.plugins.skip import Skip, SkipTest
from indicoio import config
from indicoio import political, sentiment, fer, facial_features, content_filtering, language, image_features, text_tags
from indicoio import political, sentiment, fer, facial_features, facial_localization, content_filtering, language, image_features, text_tags
from indicoio import batch_political, batch_sentiment, batch_fer, batch_content_filtering, batch_facial_features
from indicoio import batch_language, batch_image_features, batch_text_tags
from indicoio import keywords, batch_keywords
from indicoio import sentiment_hq, batch_sentiment_hq
from indicoio import twitter_engagement, batch_twitter_engagement
from indicoio import named_entities, batch_named_entities
from indicoio import predict_image, predict_text, batch_predict_image, batch_predict_text
from indicoio.utils.errors import IndicoError
@@ -86,6 +89,13 @@ class BatchAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], dict))
def test_fer_detect(self):
test_data = os.path.normpath(os.path.join(DIR, "data/fear.png"))
response = fer(test_data, api_key=self.api_key, detect=True)
self.assertIsInstance(response, list)
self.assertEqual(len(response), 1)
self.assertIn("location", response[0])
def test_batch_fer_pil_image(self):
test_data = [Image.open(os.path.normpath(os.path.join(DIR, "data/fear.png")))]
response = fer(test_data, api_key=self.api_key)
@@ -242,6 +252,38 @@ class FullAPIRun(unittest.TestCase):
for v in results.values():
assert v >= .1
def test_keywords_language(self):
text = "La semaine suivante, il remporte sa premiere victoire, dans la descente de Val Gardena en Italie, près de cinq ans après la dernière victoire en Coupe du monde d'un Français dans cette discipline, avec le succès de Nicolas Burtin à Kvitfjell."
words = set(text.lower().split())
results = keywords(text, language = 'detect')
sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
self.assertTrue(set(results.keys()).issubset(words))
results = keywords(text, top_n=3)
assert len(results) is 3
results = keywords(text, threshold=.1)
for v in results.values():
assert v >= .1
def test_keywords_language(self):
text = "La semaine suivante, il remporte sa premiere victoire, dans la descente de Val Gardena en Italie, près de cinq ans après la dernière victoire en Coupe du monde d'un Français dans cette discipline, avec le succès de Nicolas Burtin à Kvitfjell."
words = set(text.lower().split())
results = keywords(text, language = 'French')
sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
self.assertTrue(set(results.keys()).issubset(words))
results = keywords(text, top_n=3)
assert len(results) is 3
results = keywords(text, threshold=.1)
for v in results.values():
assert v >= .1
def test_named_entities(self):
text = "London Underground's boss Mike Brown warned that the strike ..."
expected_entities = ("London Underground", "Mike Brown")
@@ -289,6 +331,22 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, float))
self.assertTrue(response > 0.5)
def test_twitter_engagement(self):
test_string = "Worst song ever."
response = twitter_engagement(test_string)
self.assertIsInstance(response, float)
self.assertTrue(response <= 1)
self.assertTrue(response >= 0)
def test_batch_twitter_engagement(self):
test_string = "Worst song ever."
response = batch_twitter_engagement([test_string, test_string])
self.assertTrue(isinstance(response, list))
self.assertIsInstance(response[0], float)
self.assertEqual(response[0], response[1])
def test_good_fer(self):
fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
test_face = os.path.normpath(os.path.join(DIR, "data/48by48.png"))
@@ -331,6 +389,24 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, dict))
self.assertEqual(fer_set, set(response.keys()))
def test_facial_localization(self):
test_face = os.path.normpath(os.path.join(DIR, "data/happy.png"))
res = facial_localization(test_face)[0]
self.assertTrue(res["top_left_corner"][0] < res["bottom_right_corner"][0])
self.assertTrue(res["top_left_corner"][1] < res["bottom_right_corner"][1])
def test_facial_localization_sensitivity(self):
test_face = os.path.normpath(os.path.join(DIR, "data/happy.png"))
low_sens = facial_localization(test_face, sensitivity=0.1)
high_sens = facial_localization(test_face, sensitivity=0.9)
self.assertEqual(len(low_sens), 1)
self.assertTrue(len(high_sens) > 1)
def test_facial_localization_crop(self):
test_face = os.path.normpath(os.path.join(DIR, "data/happy.png"))
res = facial_localization(test_face, crop=True)[0]
self.assertTrue(res.get("image"))
def test_safe_content_filtering(self):
test_face = os.path.normpath(os.path.join(DIR, "data/happy.png"))
response = content_filtering(test_face)