Merge pull request #109 from IndicoDataSolutions/development

Development
2026-06-27 16:10:34 +08:00 · 2015-08-07 16:33:34 -04:00
parent c8a11f901a b201eb6841
commit 18a67c966f
12 changed files with 175 additions and 36 deletions
@@ -33,3 +33,4 @@ v0.7.6 Tue Jul 7 -- Add Keywords API
 v0.8.0 Fri Jul 10 -- Add Content Filtering API, Named Entities API, Facial Emotion with Localization
 v0.8.1 Wed Jul 22 -- Add Sentiment HQ to predict_text API
 v0.9.0 Tue Jul 28 -- Deprecate batch function calls in favor of type inference
+v0.9.1 Mon Aug 3 -- Add Facial Localization API, image resizing updates
@@ -1,7 +1,7 @@
 from functools import wraps, partial
 import warnings

-Version, version, __version__, VERSION = ('0.9.0',) * 4
+Version, version, __version__, VERSION = ('0.9.1',) * 4

 JSON_HEADERS = {
    'Content-type': 'application/json',
@@ -10,6 +10,7 @@ JSON_HEADERS = {
    'version-number': VERSION
 }

+from indicoio.text.twitter_engagement import twitter_engagement
 from indicoio.text.sentiment import political, posneg, sentiment_hq
 from indicoio.text.sentiment import posneg as sentiment
 from indicoio.text.lang import language
@@ -18,6 +19,7 @@ from indicoio.text.keywords import keywords
 from indicoio.text.ner import named_entities
 from indicoio.images.fer import fer
 from indicoio.images.features import facial_features
+from indicoio.images.faciallocalization import facial_localization
 from indicoio.images.features import image_features
 from indicoio.images.filtering import content_filtering
 from indicoio.utils.multi import predict_image, predict_text
@@ -41,9 +43,9 @@ def detect_batch_decorator(f):
            kwargs['batch'] = True
        return f(*args, **kwargs)
    return wrapper
-    
+
 apis = dict((api, globals().get(api)) for api in API_NAMES)

 for api in apis:
-    globals()[api] = partial(detect_batch_decorator(apis[api]))
+    globals()[api] = detect_batch_decorator(apis[api])
    globals()['batch_' + api] = partial(deprecation_decorator(apis[api], api), batch=True)
@@ -52,7 +52,8 @@ TEXT_APIS = [
    'language',
    'sentiment_hq',
    'keywords',
-    'named_entities'
+    'named_entities',
+    'twitter_engagement'
 ]

 IMAGE_APIS = [
@@ -0,0 +1,31 @@
+import requests
+
+from indicoio.utils.image import image_preprocess
+from indicoio.utils.api import api_handler
+
+
+def facial_localization(image, cloud=None, batch=False, api_key=None, **kwargs):
+    """
+    Given an image, returns a list of faces found within the image.
+    For each face, we return a dictionary containing the upper left corner and lower right corner.
+    If crop is True, the cropped face is included in the dictionary.
+    Input should be in a numpy ndarray or a filename.
+
+    Example usage:
+
+    .. code-block:: python
+
+       >>> from indicoio import facial_localization
+       >>> import numpy as np
+       >>> img = np.zeros([image of a face])
+       >>> faces = facial_localization(img)
+       >>> len(faces)
+       1
+
+    :param image: The image to be analyzed.
+    :type image: filepath or ndarray
+    :rtype: List of faces (dict) found. 
+    """
+    image = image_preprocess(image, batch=batch)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(image, cloud=cloud, api="faciallocalization", url_params=url_params, **kwargs)
@@ -16,7 +16,7 @@ def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):

       >>> from indicoio import facial_features
       >>> import numpy as np
-       >>> face = np.zeros((48,48)).tolist()
+       >>> face = np.zeros((48,48))
       >>> features = facial_features(face)
       >>> len(features)
       48
@@ -25,7 +25,7 @@ def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):
    :type image: list of lists
    :rtype: List containing feature responses
    """
-    image = image_preprocess(image, batch=batch)
+    image = image_preprocess(image, batch=batch, size=(48,48))
    url_params = {"batch": batch, "api_key": api_key}
    return api_handler(image, cloud=cloud, api="facialfeatures", url_params=url_params, **kwargs)

@@ -27,6 +27,10 @@ def fer(image, cloud=None, batch=False, api_key=None, **kwargs):
    :type image: list of lists
    :rtype: Dictionary containing emotion probability pairs
    """
-    image = image_preprocess(image, batch=batch)
+
+    image = image_preprocess(image, batch=batch,
+        size=None if kwargs.get("detect") else (48, 48)
+    )
+    
    url_params = {"batch": batch, "api_key": api_key}
    return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs)
@@ -24,6 +24,6 @@ def content_filtering(image, cloud=None, batch=False, api_key=None, **kwargs):
    :type image: list of lists
    :rtype: float of nsfwness
    """
-    image = image_preprocess(image, batch=batch, size=None, min_axis=128)
+    image = image_preprocess(image, batch=batch, min_axis=128)
    url_params = {"batch": batch, "api_key": api_key}
    return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs)
@@ -0,0 +1,22 @@
+from indicoio.utils.api import api_handler
+import indicoio.config as config
+
+def twitter_engagement(text, cloud=None, batch=False, api_key=None, **kwargs):
+    """
+    Given input text, returns an engagment score between 0 and 1
+
+    Example usage:
+
+    .. code-block:: python
+
+       >>> import indicoio
+       >>> import numpy as np
+       >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.'
+       >>> engagement = indicoio.twitter_engagement(text)
+
+    :param text: The text to be analyzed.
+    :type text: str or unicode
+    :rtype: Float of engagement between 0 and 1
+    """
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(text, cloud=cloud, api="twitterengagement", url_params=url_params, **kwargs)
@@ -9,36 +9,39 @@ from indicoio import JSON_HEADERS
 from indicoio import config

 def api_handler(arg, cloud, api, url_params=None, **kwargs):
-    if url_params is None:
-        url_params = {"api_key":None, batch:False }
+    """
+    Sends finalized request data to ML server and receives response.
+    """

    data = {'data': arg}
    data.update(**kwargs)
    json_data = json.dumps(data)
-    if not cloud:
-        cloud = config.cloud
-
-    if cloud:
-        host = "%s.indico.domains" % cloud
-
-    else:
-        # default to indico public cloud
-        host = config.PUBLIC_API_HOST
-
-    url = config.url_protocol + "//%s/%s" % (host, api)
-    url = url + "/batch" if url_params.get("batch", False) else url
-    url += "?key=%s" % (url_params.get("api_key", None) or config.api_key)
-    apis = url_params.get("apis", [])
-    if apis:
-        url += "&apis=%s" % ",".join(apis)
+    cloud = cloud or config.cloud
+    host = "%s.indico.domains" % cloud if cloud else config.PUBLIC_API_HOST
+    url = create_url(host, api, url_params)

    response = requests.post(url, data=json_data, headers=JSON_HEADERS)
+
    if response.status_code == 503 and cloud != None:
        raise IndicoError("Private cloud '%s' does not include api '%s'" % (cloud, api))
-
+    
    json_results = response.json()
    results = json_results.get('results', False)
    if results is False:
        error = json_results.get('error')
        raise IndicoError(error)
    return results
+
+
+def create_url(host, api, url_params):
+    api_key = url_params.get("api_key") or config.api_key
+    is_batch = url_params.get("batch")
+    apis = url_params.get("apis")
+
+    host_url_seg = config.url_protocol + "//%s" % host
+    api_url_seg = "/%s" % api
+    batch_url_seg = "/batch" if is_batch else ""
+    key_url_seg = "?key=%s" % api_key
+    multi_url_seg = "&apis=%s" % ",".join(apis) if apis else ""
+
+    return host_url_seg + api_url_seg + batch_url_seg + key_url_seg + multi_url_seg
@@ -10,7 +10,7 @@ from indicoio.utils.errors import IndicoError, DataStructureException

 B64_PATTERN = re.compile("^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{4}|[A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)")

-def image_preprocess(image, size=(48,48), min_axis=None, batch=False):
+def image_preprocess(image, size=None, min_axis=None, batch=False):
    """
    Takes an image and prepares it for sending to the api including
    resizing and image data/structure standardizing.
@@ -41,7 +41,8 @@ def image_preprocess(image, size=(48,48), min_axis=None, batch=False):
    else:
        raise IndicoError("Image must be a filepath, base64 encoded string, or a numpy array")

-    out_image = resize_image(out_image, size, min_axis)
+    if size or min_axis:
+        out_image = resize_image(out_image, size, min_axis)

    # convert to base64
    temp_output = StringIO.StringIO()
@@ -53,8 +54,6 @@ def image_preprocess(image, size=(48,48), min_axis=None, batch=False):


 def resize_image(image, size, min_axis):
-    if size:
-        image = image.resize(size)
    if min_axis:
        min_idx, other_idx = (0,1) if image.size[0] < image.size[1] else (1,0)
        aspect = image.size[other_idx]/float(image.size[min_idx])
@@ -62,12 +61,13 @@ def resize_image(image, size, min_axis):
            warnings.warn(
                "An aspect ratio greater than 10:1 is not recommended",
                Warning
-            )          
+            )
        size_arr = [0,0]
        size_arr[min_idx] = min_axis
        size_arr[other_idx] = int(min_axis * aspect)
        image = image.resize(tuple(size_arr))
-
+    elif size:
+        image = image.resize(size)
    return image


@@ -90,4 +90,3 @@ def get_element_type(_list, dimens):
    for _ in xrange(len(dimens)):
        elem = elem[0]
    return type(elem)
-
@@ -9,7 +9,7 @@ except ImportError:

 setup(
    name="IndicoIo",
-    version="0.9.0",
+    version="0.9.1",
    packages=[
        "indicoio",
        "indicoio.text",
@@ -1,3 +1,5 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
 import unittest
 import os, random
 from PIL import Image
@@ -6,11 +8,12 @@ from requests import ConnectionError
 from nose.plugins.skip import Skip, SkipTest

 from indicoio import config
-from indicoio import political, sentiment, fer, facial_features, content_filtering, language, image_features, text_tags
+from indicoio import political, sentiment, fer, facial_features, facial_localization, content_filtering, language, image_features, text_tags
 from indicoio import batch_political, batch_sentiment, batch_fer, batch_content_filtering, batch_facial_features
 from indicoio import batch_language, batch_image_features, batch_text_tags
 from indicoio import keywords, batch_keywords
 from indicoio import sentiment_hq, batch_sentiment_hq
+from indicoio import twitter_engagement, batch_twitter_engagement
 from indicoio import named_entities, batch_named_entities
 from indicoio import predict_image, predict_text, batch_predict_image, batch_predict_text
 from indicoio.utils.errors import IndicoError
@@ -86,6 +89,13 @@ class BatchAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, list))
        self.assertTrue(isinstance(response[0], dict))

+    def test_fer_detect(self):
+        test_data = os.path.normpath(os.path.join(DIR, "data/fear.png"))
+        response = fer(test_data, api_key=self.api_key, detect=True)
+        self.assertIsInstance(response, list)
+        self.assertEqual(len(response), 1)
+        self.assertIn("location", response[0])
+
    def test_batch_fer_pil_image(self):
        test_data = [Image.open(os.path.normpath(os.path.join(DIR, "data/fear.png")))]
        response = fer(test_data, api_key=self.api_key)
@@ -242,6 +252,38 @@ class FullAPIRun(unittest.TestCase):
        for v in results.values():
            assert v >= .1

+    def test_keywords_language(self):
+        text = "La semaine suivante, il remporte sa premiere victoire, dans la descente de Val Gardena en Italie, près de cinq ans après la dernière victoire en Coupe du monde d'un Français dans cette discipline, avec le succès de Nicolas Burtin à Kvitfjell."
+        words = set(text.lower().split())
+
+        results = keywords(text, language = 'detect')
+        sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
+
+        self.assertTrue(set(results.keys()).issubset(words))
+
+        results = keywords(text, top_n=3)
+        assert len(results) is 3
+
+        results = keywords(text, threshold=.1)
+        for v in results.values():
+            assert v >= .1
+
+    def test_keywords_language(self):
+        text = "La semaine suivante, il remporte sa premiere victoire, dans la descente de Val Gardena en Italie, près de cinq ans après la dernière victoire en Coupe du monde d'un Français dans cette discipline, avec le succès de Nicolas Burtin à Kvitfjell."
+        words = set(text.lower().split())
+
+        results = keywords(text, language = 'French')
+        sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
+
+        self.assertTrue(set(results.keys()).issubset(words))
+
+        results = keywords(text, top_n=3)
+        assert len(results) is 3
+
+        results = keywords(text, threshold=.1)
+        for v in results.values():
+            assert v >= .1
+
    def test_named_entities(self):
        text = "London Underground's boss Mike Brown warned that the strike ..."
        expected_entities = ("London Underground", "Mike Brown")
@@ -289,6 +331,22 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, float))
        self.assertTrue(response > 0.5)

+    def test_twitter_engagement(self):
+        test_string = "Worst song ever."
+        response = twitter_engagement(test_string)
+
+        self.assertIsInstance(response, float)
+        self.assertTrue(response <= 1)
+        self.assertTrue(response >= 0)
+
+    def test_batch_twitter_engagement(self):
+        test_string = "Worst song ever."
+        response = batch_twitter_engagement([test_string, test_string])
+
+        self.assertTrue(isinstance(response, list))
+        self.assertIsInstance(response[0], float)
+        self.assertEqual(response[0], response[1])
+
    def test_good_fer(self):
        fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
        test_face = os.path.normpath(os.path.join(DIR, "data/48by48.png"))
@@ -331,6 +389,24 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, dict))
        self.assertEqual(fer_set, set(response.keys()))

+    def test_facial_localization(self):
+        test_face = os.path.normpath(os.path.join(DIR, "data/happy.png"))
+        res = facial_localization(test_face)[0]
+        self.assertTrue(res["top_left_corner"][0] < res["bottom_right_corner"][0])
+        self.assertTrue(res["top_left_corner"][1] < res["bottom_right_corner"][1])
+
+    def test_facial_localization_sensitivity(self):
+        test_face = os.path.normpath(os.path.join(DIR, "data/happy.png"))
+        low_sens = facial_localization(test_face, sensitivity=0.1)
+        high_sens = facial_localization(test_face, sensitivity=0.9)
+        self.assertEqual(len(low_sens), 1)
+        self.assertTrue(len(high_sens) > 1)
+
+    def test_facial_localization_crop(self):
+        test_face = os.path.normpath(os.path.join(DIR, "data/happy.png"))
+        res = facial_localization(test_face, crop=True)[0]
+        self.assertTrue(res.get("image"))
+
    def test_safe_content_filtering(self):
        test_face = os.path.normpath(os.path.join(DIR, "data/happy.png"))
        response = content_filtering(test_face)