Merge pull request #86 from IndicoDataSolutions/development

Development
2026-06-27 16:10:34 +08:00 · 2015-07-10 12:58:12 -04:00
parent 99046adbbc 26bbe35e3b
commit 944e15178f
17 changed files with 200 additions and 41 deletions
@@ -29,3 +29,5 @@ v0.7.2 Thu Jun 11 -- Remove sentiment_hq from text apis by default
 v0.7.3 Wed Jun 17 -- Fixes for handling of specific image types
 v0.7.4 Mon Jun 22 -- Fix for setup.py issues
 v0.7.5 Wed Jul 1 -- Public access to sentimentHQ api
+v0.7.6 Tue Jul 7 -- Add Keywords API
+v0.8.0 Fri Jul 10 -- Add Content Filtering API, Named Entities API, Facial Emotion with Localization
@@ -41,7 +41,7 @@ Supported APIs:
 Examples
 --------
 ```python
->>> from indicoio import political, sentiment, language, text_tags, fer, facial_features, image_features
+>>> from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features

 >>> indicoio.config.api_key = "YOUR_API_KEY"

@@ -74,6 +74,11 @@ Examples

 >>> language('Quis custodiet ipsos custodes')
 {u'Swedish': 0.00033330636691921914, u'Lithuanian': 0.007328693814717631, u'Vietnamese': 0.0002686116137658802, u'Romanian': 8.133913804076592e-06, ...}
+
+>>> keywords("Facebook blog posts about Android tech make better journalism than most news outlets.", top_n=3)
+{u'android': 0.10602030910588661,
+ u'journalism': 0.13466866170166855,
+ u'outlets': 0.13930405357808642}
 ```

 Batch API
@@ -49,7 +49,7 @@ Examples

 .. code:: python

-    >>> from indicoio import political, sentiment, language, text_tags, fer, facial_features, image_features
+    >>> from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features

    >>> indicoio.config.api_key = "YOUR_API_KEY"

@@ -72,7 +72,7 @@ Examples

    >>> import numpy as np

-    >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()
+    >>> test_face = np.linspace(0,50,48*48).reshape(48,48)

    >>> fer(test_face)
    {u'Angry': 0.08843749137458341, u'Sad': 0.39091163159204684, u'Neutral': 0.1947947999669361, u'Surprise': 0.03443785859010413, u'Fear': 0.17574534848440568, u'Happy': 0.11567286999192382}
@@ -83,6 +83,11 @@ Examples
    >>> language('Quis custodiet ipsos custodes')
    {u'Swedish': 0.00033330636691921914, u'Lithuanian': 0.007328693814717631, u'Vietnamese': 0.0002686116137658802, u'Romanian': 8.133913804076592e-06, ...}

+    >>> keywords("Facebook blog posts about Android tech make better journalism than most news outlets.", top_n=3)
+    {u'android': 0.10602030910588661,
+     u'journalism': 0.13466866170166855,
+     u'outlets': 0.13930405357808642}
+
 Batch API
 ---------

@@ -131,3 +136,4 @@ Accepted image API names: ``fer, facial_features, image_features``

    >>> batch_predict_image([test_face, test_face], apis=["fer", "facial_features"])
    {'facial_features': [[0.0, -0.026176479280200796, 0.20707644777495776, ...], [0.0, -0.026176479280200796, 0.20707644777495776, ...]], 'fer': [{u'Angry': 0.08877494466353497, u'Sad': 0.3933999409104264, u'Neutral': 0.1910612654566151, u'Surprise': 0.0346146405941845, u'Fear': 0.17682159820518667, u'Happy': 0.11532761017005204}, { u'Angry': 0.08877494466353497, u'Sad': 0.3933999409104264, u'Neutral': 0.1910612654566151, u'Surprise': 0.0346146405941845, u'Fear': 0.17682159820518667, u'Happy': 0.11532761017005204}]}
+
@@ -1,6 +1,6 @@
 from functools import partial

-Version, version, __version__, VERSION = ('0.7.5',) * 4
+Version, version, __version__, VERSION = ('0.8.0',) * 4

 JSON_HEADERS = {
    'Content-type': 'application/json',
@@ -13,13 +13,17 @@ from indicoio.text.sentiment import political, posneg, sentiment_hq
 from indicoio.text.sentiment import posneg as sentiment
 from indicoio.text.lang import language
 from indicoio.text.tagging import text_tags
+from indicoio.text.keywords import keywords
+from indicoio.text.ner import named_entities
 from indicoio.images.fer import fer
 from indicoio.images.features import facial_features
 from indicoio.images.features import image_features
+from indicoio.images.filtering import content_filtering
 from indicoio.utils.multi import predict_image, predict_text

 from indicoio.config import API_NAMES

+
 apis = dict((api, globals().get(api)) for api in API_NAMES)

 for api in apis:
@@ -50,13 +50,16 @@ TEXT_APIS = [
    'political',
    'sentiment',
    'language',
-    'sentiment_hq'
+    'sentiment_hq',
+    'keywords',
+    'named_entities'
 ]

 IMAGE_APIS = [
    'fer',
    'facial_features',
-    'image_features'
+    'image_features',
+    'content_filtering'
 ]

 API_NAMES = IMAGE_APIS + TEXT_APIS + ["predict_text", "predict_image"]
@@ -26,7 +26,8 @@ def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):
    :rtype: List containing feature responses
    """
    image = image_preprocess(image, batch=batch)
-    return api_handler(image, cloud=cloud, api="facialfeatures", url_params={"batch":batch, "api_key":api_key}, **kwargs)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(image, cloud=cloud, api="facialfeatures", url_params=url_params, **kwargs)

 def image_features(image, cloud=None, batch=False, api_key=None, **kwargs):
    """
@@ -59,4 +60,5 @@ def image_features(image, cloud=None, batch=False, api_key=None, **kwargs):
    :rtype: List containing features
    """
    image = image_preprocess(image, batch=batch, size=(64,64))
-    return api_handler(image, cloud=cloud, api="imagefeatures", url_params={"batch":batch, "api_key":api_key}, **kwargs)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(image, cloud=cloud, api="imagefeatures", url_params=url_params, **kwargs)
@@ -28,4 +28,5 @@ def fer(image, cloud=None, batch=False, api_key=None, **kwargs):
    :rtype: Dictionary containing emotion probability pairs
    """
    image = image_preprocess(image, batch=batch)
-    return api_handler(image, cloud=cloud, api="fer", url_params={"batch":batch, "api_key":api_key}, **kwargs)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs)
@@ -0,0 +1,29 @@
+import requests
+
+from indicoio.utils.api import api_handler
+from indicoio.utils.image import image_preprocess
+import indicoio.config as config
+
+def content_filtering(image, cloud=None, batch=False, api_key=None, **kwargs):
+    """
+    Given a grayscale input image, returns how obcene the image is.
+    Input should be in a list of list format.
+
+    Example usage:
+
+    .. code-block:: python
+
+       >>> from indicoio import content_filtering
+       >>> import numpy as np
+       >>> face = np.zeros((48,48)).tolist()
+       >>> res = content_filtering(face)
+       >>> res
+	   .056
+
+    :param image: The image to be analyzed.
+    :type image: list of lists
+    :rtype: float of nsfwness
+    """
+    image = image_preprocess(image, batch=batch, size=None)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs)
@@ -0,0 +1,24 @@
+from indicoio.utils.api import api_handler
+import indicoio.config as config
+
+def keywords(text, cloud=None, batch=False, api_key=None, **kwargs):
+    """
+    Given input text, returns series of keywords and associated scores
+
+    Example usage:
+
+    .. code-block:: python
+
+       >>> import indicoio
+       >>> import numpy as np
+       >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.'
+       >>> keywords = indicoio.keywords(text, top_n=3)
+       >>> print "The keywords are: "+str(keywords.keys())
+       u'The keywords are ['delightful', 'highs', 'skies']
+
+    :param text: The text to be analyzed.
+    :type text: str or unicode
+    :rtype: Dictionary of feature score pairs
+    """
+    url_params = {'batch': batch, 'api_key': api_key}
+    return api_handler(text, cloud=cloud, api="keywords", url_params=url_params, **kwargs)
@@ -23,5 +23,5 @@ def language(text, cloud=None, batch=False, api_key=None, **kwargs):
    :type text: str or unicode
    :rtype: Dictionary of language probability pairs
    """
-
-    return api_handler(text, cloud=cloud, api="language", url_params={"batch":batch, "api_key":api_key}, **kwargs)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(text, cloud=cloud, api="language", url_params=url_params, **kwargs)
@@ -0,0 +1,30 @@
+from indicoio.utils.api import api_handler
+import indicoio.config as config
+
+def named_entities(text, cloud=None, batch=False, api_key=None, **kwargs):
+    """
+    Given input text, returns named entities (proper nouns) found in the text
+
+    Example usage:
+
+    .. code-block:: python
+
+       >>> text = "London Underground's boss Mike Brown warned that the strike ..."
+       >>> entities = indicoio.named_entities(text)
+       {u'London Underground': {u'categories': {u'location': 0.583755654607989,
+          u'organization': 0.07460487821791033,
+          u'person': 0.07304850776658672,
+          u'unknown': 0.2685909594075139},
+         u'confidence': 0.846188063604044},
+        u'Mike Brown': {u'categories': {u'location': 0.025813884950623898,
+          u'organization': 0.06661470013014613,
+          u'person': 0.08723850624560824,
+          u'unknown': 0.8203329086736217},
+         u'confidence': 0.8951793008234012}}
+
+    :param text: The text to be analyzed.
+    :type text: str or unicode
+    :rtype: Dictionary of language probability pairs
+    """
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(text, cloud=cloud, api="namedentities", url_params=url_params, **kwargs)
@@ -25,8 +25,8 @@ def political(text, cloud=None, batch=False, api_key=None, **kwargs):
    :type text: str or unicode
    :rtype: Dictionary of party probability pairs
    """
-
-    return api_handler(text, cloud=cloud, api="political", url_params={"batch":batch, "api_key":api_key}, **kwargs)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(text, cloud=cloud, api="political", url_params=url_params, **kwargs)

 def posneg(text, cloud=None, batch=False, api_key=None, **kwargs):
    """
@@ -48,8 +48,8 @@ def posneg(text, cloud=None, batch=False, api_key=None, **kwargs):
    :type text: str or unicode
    :rtype: Float
    """
-
-    return api_handler(text, cloud=cloud, api="sentiment", url_params={"batch":batch, "api_key":api_key}, **kwargs)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(text, cloud=cloud, api="sentiment", url_params=url_params, **kwargs)

 def sentiment_hq(text, cloud=None, batch=False, api_key=None, **kwargs):
    """
@@ -71,5 +71,5 @@ def sentiment_hq(text, cloud=None, batch=False, api_key=None, **kwargs):
    :type text: str or unicode
    :rtype: Float
    """
-
-    return api_handler(text, cloud=cloud, api="sentimenthq", url_params={"batch":batch, "api_key":api_key}, **kwargs)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(text, cloud=cloud, api="sentimenthq", url_params=url_params, **kwargs)
@@ -22,5 +22,5 @@ def text_tags(text, cloud=None, batch=False, api_key=None, **kwargs):
    :type text: str or unicode
    :rtype: Dictionary of class probability pairs
    """
-
-    return api_handler(text, cloud=cloud, api="texttags", url_params={"batch":batch, "api_key":api_key}, **kwargs)
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(text, cloud=cloud, api="texttags", url_params=url_params, **kwargs)
@@ -48,7 +48,8 @@ def image_preprocess(image, size=(48,48), batch=False):
        raise IndicoError("Image must be a filepath, base64 encoded string, or a numpy array")

    # image resizing
-    out_image = out_image.resize(size)
+    if size:
+        out_image = out_image.resize(size)

    # convert to base64
    temp_output = StringIO.StringIO()
@@ -137,7 +137,7 @@ def predict_image(image, apis=IMAGE_APIS, **kwargs):

 def parsed_response(api, response):
    result = response.get('results', False)
-    if result:
+    if result != False:
        return result
    raise IndicoError(
        "Sorry, the %s API returned an unexpected response.\n\t%s"
@@ -9,7 +9,7 @@ except ImportError:

 setup(
    name="IndicoIo",
-    version="0.7.5",
+    version="0.8.0",
    packages=[
        "indicoio",
        "indicoio.text",
@@ -6,10 +6,12 @@ from requests import ConnectionError
 from nose.plugins.skip import Skip, SkipTest

 from indicoio import config
-from indicoio import political, sentiment, fer, facial_features, language, image_features, text_tags
-from indicoio import batch_political, batch_sentiment, batch_fer, batch_facial_features
+from indicoio import political, sentiment, fer, facial_features, content_filtering, language, image_features, text_tags
+from indicoio import batch_political, batch_sentiment, batch_fer, batch_content_filtering, batch_facial_features
 from indicoio import batch_language, batch_image_features, batch_text_tags
+from indicoio import keywords, batch_keywords
 from indicoio import sentiment_hq, batch_sentiment_hq
+from indicoio import named_entities, batch_named_entities
 from indicoio import predict_image, predict_text, batch_predict_image, batch_predict_text
 from indicoio.utils.errors import IndicoError

@@ -32,18 +34,24 @@ class BatchAPIRun(unittest.TestCase):
        response = batch_text_tags(test_data, api_key=self.api_key)
        self.assertTrue(isinstance(response, list))

+    def test_batch_keywords(self):
+        test_data = ["A working api is key to the success of our young company"]
+        words = [set(text.lower().split()) for text in test_data]
+        response = batch_keywords(test_data, api_key=self.api_key)
+        self.assertTrue(isinstance(response, list))
+        self.assertTrue(set(response[0].keys()).issubset(words[0]))
+
    def test_batch_posneg(self):
        test_data = ['Worst song ever', 'Best song ever']
        response = batch_sentiment(test_data, api_key=self.api_key)
        self.assertTrue(isinstance(response, list))
        self.assertTrue(response[0] < 0.5)

-    # TODO: uncomment once the high quality sentiment API is publicly released
-    # def test_batch_sentiment_hq(self):
-    #     test_data = ['Worst song ever', 'Best song ever']
-    #     response = batch_sentiment_hq(test_data, api_key=self.api_key)
-    #     self.assertTrue(isinstance(response, list))
-    #     self.assertTrue(response[0] < 0.5)
+    def test_batch_sentiment_hq(self):
+        test_data = ['Worst song ever', 'Best song ever']
+        response = batch_sentiment_hq(test_data, api_key=self.api_key)
+        self.assertTrue(isinstance(response, list))
+        self.assertTrue(response[0] < 0.5)

    def test_batch_political(self):
        test_data = ["Guns don't kill people, people kill people."]
@@ -56,6 +64,12 @@ class BatchAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, list))
        self.assertTrue(isinstance(response[0], dict))

+    def test_batch_content_filtering(self):
+        test_data = [generate_array((48,48))]
+        response = batch_content_filtering(test_data, api_key=self.api_key)
+        self.assertTrue(isinstance(response, list))
+        self.assertTrue(isinstance(response[0], float))
+
    def test_batch_fer_bad_b64(self):
        test_data = ["$bad#FI jeaf9(#0"]
        self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key)
@@ -82,7 +96,6 @@ class BatchAPIRun(unittest.TestCase):
        test_data = ["data/unhappy.png"]
        self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key)

-
    def test_batch_facial_features(self):
        test_data = [generate_array((48,48))]
        response = batch_facial_features(test_data, api_key=self.api_key)
@@ -123,6 +136,15 @@ class BatchAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, list))
        self.assertTrue(response[0]['English'] > 0.25)

+    def test_batch_named_entities(self):
+        batch = ["London Underground's boss Mike Brown warned that the strike ..."]
+        expected_entities = ("London Underground", "Mike Brown")
+        expected_keys = set(["categories", "confidence"])
+        entities = batch_named_entities(batch)[0]
+        for entity in expected_entities:
+            assert entity in expected_entities
+            assert not (set(entities[entity]) - expected_keys)
+
    def test_batch_multi_api_image(self):
        test_data = [generate_array((48,48)), generate_int_array((48,48))]
        response = batch_predict_image(test_data, apis=config.IMAGE_APIS, api_key=self.api_key)
@@ -202,6 +224,32 @@ class FullAPIRun(unittest.TestCase):
        for v in results.values():
            assert v >= 0.1

+    def test_keywords(self):
+        text = "A working api is key to the success of our young company"
+        words = set(text.lower().split())
+
+        results = keywords(text)
+        sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
+        assert 'api' in sorted_results[:3]
+
+        self.assertTrue(set(results.keys()).issubset(words))
+
+        results = keywords(text, top_n=3)
+        assert len(results) is 3
+
+        results = keywords(text, threshold=.1)
+        for v in results.values():
+            assert v >= .1
+
+    def test_named_entities(self):
+        text = "London Underground's boss Mike Brown warned that the strike ..."
+        expected_entities = ("London Underground", "Mike Brown")
+        expected_keys = set(["categories", "confidence"])
+        entities = named_entities(text)
+        for entity in expected_entities:
+            assert entity in expected_entities
+            assert not (set(entities[entity]) - expected_keys)
+
    def test_political(self):
        political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
        test_string = "Guns don't kill people, people kill people."
@@ -228,18 +276,17 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, float))
        self.assertTrue(response > 0.5)

-    # TODO: uncomment when the high quality sentiment API is publicly released
-    # def test_sentiment_hq(self):
-    #     test_string = "Worst song ever."
-    #     response = sentiment_hq(test_string)
+    def test_sentiment_hq(self):
+        test_string = "Worst song ever."
+        response = sentiment_hq(test_string)

-    #     self.assertTrue(isinstance(response, float))
-    #     self.assertTrue(response < 0.5)
+        self.assertTrue(isinstance(response, float))
+        self.assertTrue(response < 0.5)

-    #     test_string = "Best song ever."
-    #     response = sentiment_hq(test_string)
-    #     self.assertTrue(isinstance(response, float))
-    #     self.assertTrue(response > 0.5)
+        test_string = "Best song ever."
+        response = sentiment_hq(test_string)
+        self.assertTrue(isinstance(response, float))
+        self.assertTrue(response > 0.5)

    def test_good_fer(self):
        fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
@@ -283,6 +330,11 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, dict))
        self.assertEqual(fer_set, set(response.keys()))

+    def test_safe_content_filtering(self):
+        test_face = self.load_image("data/happy.png", as_grey=True)
+        response = content_filtering(test_face)
+        self.assertTrue(response < 0.5)
+
    def test_good_facial_features(self):
        test_face = generate_array((48,48))
        response = facial_features(test_face)