Merge pull request #86 from IndicoDataSolutions/development

Development
This commit is contained in:
Madison May
2015-07-10 12:58:12 -04:00
17 changed files with 200 additions and 41 deletions
+2
View File
@@ -29,3 +29,5 @@ v0.7.2 Thu Jun 11 -- Remove sentiment_hq from text apis by default
v0.7.3 Wed Jun 17 -- Fixes for handling of specific image types
v0.7.4 Mon Jun 22 -- Fix for setup.py issues
v0.7.5 Wed Jul 1 -- Public access to sentimentHQ api
v0.7.6 Tue Jul 7 -- Add Keywords API
v0.8.0 Fri Jul 10 -- Add Content Filtering API, Named Entities API, Facial Emotion with Localization
+6 -1
View File
@@ -41,7 +41,7 @@ Supported APIs:
Examples
--------
```python
>>> from indicoio import political, sentiment, language, text_tags, fer, facial_features, image_features
>>> from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features
>>> indicoio.config.api_key = "YOUR_API_KEY"
@@ -74,6 +74,11 @@ Examples
>>> language('Quis custodiet ipsos custodes')
{u'Swedish': 0.00033330636691921914, u'Lithuanian': 0.007328693814717631, u'Vietnamese': 0.0002686116137658802, u'Romanian': 8.133913804076592e-06, ...}
>>> keywords("Facebook blog posts about Android tech make better journalism than most news outlets.", top_n=3)
{u'android': 0.10602030910588661,
u'journalism': 0.13466866170166855,
u'outlets': 0.13930405357808642}
```
Batch API
+8 -2
View File
@@ -49,7 +49,7 @@ Examples
.. code:: python
>>> from indicoio import political, sentiment, language, text_tags, fer, facial_features, image_features
>>> from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features
>>> indicoio.config.api_key = "YOUR_API_KEY"
@@ -72,7 +72,7 @@ Examples
>>> import numpy as np
>>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()
>>> test_face = np.linspace(0,50,48*48).reshape(48,48)
>>> fer(test_face)
{u'Angry': 0.08843749137458341, u'Sad': 0.39091163159204684, u'Neutral': 0.1947947999669361, u'Surprise': 0.03443785859010413, u'Fear': 0.17574534848440568, u'Happy': 0.11567286999192382}
@@ -83,6 +83,11 @@ Examples
>>> language('Quis custodiet ipsos custodes')
{u'Swedish': 0.00033330636691921914, u'Lithuanian': 0.007328693814717631, u'Vietnamese': 0.0002686116137658802, u'Romanian': 8.133913804076592e-06, ...}
>>> keywords("Facebook blog posts about Android tech make better journalism than most news outlets.", top_n=3)
{u'android': 0.10602030910588661,
u'journalism': 0.13466866170166855,
u'outlets': 0.13930405357808642}
Batch API
---------
@@ -131,3 +136,4 @@ Accepted image API names: ``fer, facial_features, image_features``
>>> batch_predict_image([test_face, test_face], apis=["fer", "facial_features"])
{'facial_features': [[0.0, -0.026176479280200796, 0.20707644777495776, ...], [0.0, -0.026176479280200796, 0.20707644777495776, ...]], 'fer': [{u'Angry': 0.08877494466353497, u'Sad': 0.3933999409104264, u'Neutral': 0.1910612654566151, u'Surprise': 0.0346146405941845, u'Fear': 0.17682159820518667, u'Happy': 0.11532761017005204}, { u'Angry': 0.08877494466353497, u'Sad': 0.3933999409104264, u'Neutral': 0.1910612654566151, u'Surprise': 0.0346146405941845, u'Fear': 0.17682159820518667, u'Happy': 0.11532761017005204}]}
+5 -1
View File
@@ -1,6 +1,6 @@
from functools import partial
Version, version, __version__, VERSION = ('0.7.5',) * 4
Version, version, __version__, VERSION = ('0.8.0',) * 4
JSON_HEADERS = {
'Content-type': 'application/json',
@@ -13,13 +13,17 @@ from indicoio.text.sentiment import political, posneg, sentiment_hq
from indicoio.text.sentiment import posneg as sentiment
from indicoio.text.lang import language
from indicoio.text.tagging import text_tags
from indicoio.text.keywords import keywords
from indicoio.text.ner import named_entities
from indicoio.images.fer import fer
from indicoio.images.features import facial_features
from indicoio.images.features import image_features
from indicoio.images.filtering import content_filtering
from indicoio.utils.multi import predict_image, predict_text
from indicoio.config import API_NAMES
apis = dict((api, globals().get(api)) for api in API_NAMES)
for api in apis:
+5 -2
View File
@@ -50,13 +50,16 @@ TEXT_APIS = [
'political',
'sentiment',
'language',
'sentiment_hq'
'sentiment_hq',
'keywords',
'named_entities'
]
IMAGE_APIS = [
'fer',
'facial_features',
'image_features'
'image_features',
'content_filtering'
]
API_NAMES = IMAGE_APIS + TEXT_APIS + ["predict_text", "predict_image"]
+4 -2
View File
@@ -26,7 +26,8 @@ def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs):
:rtype: List containing feature responses
"""
image = image_preprocess(image, batch=batch)
return api_handler(image, cloud=cloud, api="facialfeatures", url_params={"batch":batch, "api_key":api_key}, **kwargs)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(image, cloud=cloud, api="facialfeatures", url_params=url_params, **kwargs)
def image_features(image, cloud=None, batch=False, api_key=None, **kwargs):
"""
@@ -59,4 +60,5 @@ def image_features(image, cloud=None, batch=False, api_key=None, **kwargs):
:rtype: List containing features
"""
image = image_preprocess(image, batch=batch, size=(64,64))
return api_handler(image, cloud=cloud, api="imagefeatures", url_params={"batch":batch, "api_key":api_key}, **kwargs)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(image, cloud=cloud, api="imagefeatures", url_params=url_params, **kwargs)
+2 -1
View File
@@ -28,4 +28,5 @@ def fer(image, cloud=None, batch=False, api_key=None, **kwargs):
:rtype: Dictionary containing emotion probability pairs
"""
image = image_preprocess(image, batch=batch)
return api_handler(image, cloud=cloud, api="fer", url_params={"batch":batch, "api_key":api_key}, **kwargs)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs)
+29
View File
@@ -0,0 +1,29 @@
import requests
from indicoio.utils.api import api_handler
from indicoio.utils.image import image_preprocess
import indicoio.config as config
def content_filtering(image, cloud=None, batch=False, api_key=None, **kwargs):
"""
Given a grayscale input image, returns how obcene the image is.
Input should be in a list of list format.
Example usage:
.. code-block:: python
>>> from indicoio import content_filtering
>>> import numpy as np
>>> face = np.zeros((48,48)).tolist()
>>> res = content_filtering(face)
>>> res
.056
:param image: The image to be analyzed.
:type image: list of lists
:rtype: float of nsfwness
"""
image = image_preprocess(image, batch=batch, size=None)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs)
+24
View File
@@ -0,0 +1,24 @@
from indicoio.utils.api import api_handler
import indicoio.config as config
def keywords(text, cloud=None, batch=False, api_key=None, **kwargs):
"""
Given input text, returns series of keywords and associated scores
Example usage:
.. code-block:: python
>>> import indicoio
>>> import numpy as np
>>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.'
>>> keywords = indicoio.keywords(text, top_n=3)
>>> print "The keywords are: "+str(keywords.keys())
u'The keywords are ['delightful', 'highs', 'skies']
:param text: The text to be analyzed.
:type text: str or unicode
:rtype: Dictionary of feature score pairs
"""
url_params = {'batch': batch, 'api_key': api_key}
return api_handler(text, cloud=cloud, api="keywords", url_params=url_params, **kwargs)
+2 -2
View File
@@ -23,5 +23,5 @@ def language(text, cloud=None, batch=False, api_key=None, **kwargs):
:type text: str or unicode
:rtype: Dictionary of language probability pairs
"""
return api_handler(text, cloud=cloud, api="language", url_params={"batch":batch, "api_key":api_key}, **kwargs)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(text, cloud=cloud, api="language", url_params=url_params, **kwargs)
+30
View File
@@ -0,0 +1,30 @@
from indicoio.utils.api import api_handler
import indicoio.config as config
def named_entities(text, cloud=None, batch=False, api_key=None, **kwargs):
"""
Given input text, returns named entities (proper nouns) found in the text
Example usage:
.. code-block:: python
>>> text = "London Underground's boss Mike Brown warned that the strike ..."
>>> entities = indicoio.named_entities(text)
{u'London Underground': {u'categories': {u'location': 0.583755654607989,
u'organization': 0.07460487821791033,
u'person': 0.07304850776658672,
u'unknown': 0.2685909594075139},
u'confidence': 0.846188063604044},
u'Mike Brown': {u'categories': {u'location': 0.025813884950623898,
u'organization': 0.06661470013014613,
u'person': 0.08723850624560824,
u'unknown': 0.8203329086736217},
u'confidence': 0.8951793008234012}}
:param text: The text to be analyzed.
:type text: str or unicode
:rtype: Dictionary of language probability pairs
"""
url_params = {"batch": batch, "api_key": api_key}
return api_handler(text, cloud=cloud, api="namedentities", url_params=url_params, **kwargs)
+6 -6
View File
@@ -25,8 +25,8 @@ def political(text, cloud=None, batch=False, api_key=None, **kwargs):
:type text: str or unicode
:rtype: Dictionary of party probability pairs
"""
return api_handler(text, cloud=cloud, api="political", url_params={"batch":batch, "api_key":api_key}, **kwargs)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(text, cloud=cloud, api="political", url_params=url_params, **kwargs)
def posneg(text, cloud=None, batch=False, api_key=None, **kwargs):
"""
@@ -48,8 +48,8 @@ def posneg(text, cloud=None, batch=False, api_key=None, **kwargs):
:type text: str or unicode
:rtype: Float
"""
return api_handler(text, cloud=cloud, api="sentiment", url_params={"batch":batch, "api_key":api_key}, **kwargs)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(text, cloud=cloud, api="sentiment", url_params=url_params, **kwargs)
def sentiment_hq(text, cloud=None, batch=False, api_key=None, **kwargs):
"""
@@ -71,5 +71,5 @@ def sentiment_hq(text, cloud=None, batch=False, api_key=None, **kwargs):
:type text: str or unicode
:rtype: Float
"""
return api_handler(text, cloud=cloud, api="sentimenthq", url_params={"batch":batch, "api_key":api_key}, **kwargs)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(text, cloud=cloud, api="sentimenthq", url_params=url_params, **kwargs)
+2 -2
View File
@@ -22,5 +22,5 @@ def text_tags(text, cloud=None, batch=False, api_key=None, **kwargs):
:type text: str or unicode
:rtype: Dictionary of class probability pairs
"""
return api_handler(text, cloud=cloud, api="texttags", url_params={"batch":batch, "api_key":api_key}, **kwargs)
url_params = {"batch": batch, "api_key": api_key}
return api_handler(text, cloud=cloud, api="texttags", url_params=url_params, **kwargs)
+2 -1
View File
@@ -48,7 +48,8 @@ def image_preprocess(image, size=(48,48), batch=False):
raise IndicoError("Image must be a filepath, base64 encoded string, or a numpy array")
# image resizing
out_image = out_image.resize(size)
if size:
out_image = out_image.resize(size)
# convert to base64
temp_output = StringIO.StringIO()
+1 -1
View File
@@ -137,7 +137,7 @@ def predict_image(image, apis=IMAGE_APIS, **kwargs):
def parsed_response(api, response):
result = response.get('results', False)
if result:
if result != False:
return result
raise IndicoError(
"Sorry, the %s API returned an unexpected response.\n\t%s"
+1 -1
View File
@@ -9,7 +9,7 @@ except ImportError:
setup(
name="IndicoIo",
version="0.7.5",
version="0.8.0",
packages=[
"indicoio",
"indicoio.text",
+71 -19
View File
@@ -6,10 +6,12 @@ from requests import ConnectionError
from nose.plugins.skip import Skip, SkipTest
from indicoio import config
from indicoio import political, sentiment, fer, facial_features, language, image_features, text_tags
from indicoio import batch_political, batch_sentiment, batch_fer, batch_facial_features
from indicoio import political, sentiment, fer, facial_features, content_filtering, language, image_features, text_tags
from indicoio import batch_political, batch_sentiment, batch_fer, batch_content_filtering, batch_facial_features
from indicoio import batch_language, batch_image_features, batch_text_tags
from indicoio import keywords, batch_keywords
from indicoio import sentiment_hq, batch_sentiment_hq
from indicoio import named_entities, batch_named_entities
from indicoio import predict_image, predict_text, batch_predict_image, batch_predict_text
from indicoio.utils.errors import IndicoError
@@ -32,18 +34,24 @@ class BatchAPIRun(unittest.TestCase):
response = batch_text_tags(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
def test_batch_keywords(self):
test_data = ["A working api is key to the success of our young company"]
words = [set(text.lower().split()) for text in test_data]
response = batch_keywords(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
self.assertTrue(set(response[0].keys()).issubset(words[0]))
def test_batch_posneg(self):
test_data = ['Worst song ever', 'Best song ever']
response = batch_sentiment(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
self.assertTrue(response[0] < 0.5)
# TODO: uncomment once the high quality sentiment API is publicly released
# def test_batch_sentiment_hq(self):
# test_data = ['Worst song ever', 'Best song ever']
# response = batch_sentiment_hq(test_data, api_key=self.api_key)
# self.assertTrue(isinstance(response, list))
# self.assertTrue(response[0] < 0.5)
def test_batch_sentiment_hq(self):
test_data = ['Worst song ever', 'Best song ever']
response = batch_sentiment_hq(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
self.assertTrue(response[0] < 0.5)
def test_batch_political(self):
test_data = ["Guns don't kill people, people kill people."]
@@ -56,6 +64,12 @@ class BatchAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], dict))
def test_batch_content_filtering(self):
test_data = [generate_array((48,48))]
response = batch_content_filtering(test_data, api_key=self.api_key)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], float))
def test_batch_fer_bad_b64(self):
test_data = ["$bad#FI jeaf9(#0"]
self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key)
@@ -82,7 +96,6 @@ class BatchAPIRun(unittest.TestCase):
test_data = ["data/unhappy.png"]
self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key)
def test_batch_facial_features(self):
test_data = [generate_array((48,48))]
response = batch_facial_features(test_data, api_key=self.api_key)
@@ -123,6 +136,15 @@ class BatchAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, list))
self.assertTrue(response[0]['English'] > 0.25)
def test_batch_named_entities(self):
batch = ["London Underground's boss Mike Brown warned that the strike ..."]
expected_entities = ("London Underground", "Mike Brown")
expected_keys = set(["categories", "confidence"])
entities = batch_named_entities(batch)[0]
for entity in expected_entities:
assert entity in expected_entities
assert not (set(entities[entity]) - expected_keys)
def test_batch_multi_api_image(self):
test_data = [generate_array((48,48)), generate_int_array((48,48))]
response = batch_predict_image(test_data, apis=config.IMAGE_APIS, api_key=self.api_key)
@@ -202,6 +224,32 @@ class FullAPIRun(unittest.TestCase):
for v in results.values():
assert v >= 0.1
def test_keywords(self):
text = "A working api is key to the success of our young company"
words = set(text.lower().split())
results = keywords(text)
sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
assert 'api' in sorted_results[:3]
self.assertTrue(set(results.keys()).issubset(words))
results = keywords(text, top_n=3)
assert len(results) is 3
results = keywords(text, threshold=.1)
for v in results.values():
assert v >= .1
def test_named_entities(self):
text = "London Underground's boss Mike Brown warned that the strike ..."
expected_entities = ("London Underground", "Mike Brown")
expected_keys = set(["categories", "confidence"])
entities = named_entities(text)
for entity in expected_entities:
assert entity in expected_entities
assert not (set(entities[entity]) - expected_keys)
def test_political(self):
political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
test_string = "Guns don't kill people, people kill people."
@@ -228,18 +276,17 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, float))
self.assertTrue(response > 0.5)
# TODO: uncomment when the high quality sentiment API is publicly released
# def test_sentiment_hq(self):
# test_string = "Worst song ever."
# response = sentiment_hq(test_string)
def test_sentiment_hq(self):
test_string = "Worst song ever."
response = sentiment_hq(test_string)
# self.assertTrue(isinstance(response, float))
# self.assertTrue(response < 0.5)
self.assertTrue(isinstance(response, float))
self.assertTrue(response < 0.5)
# test_string = "Best song ever."
# response = sentiment_hq(test_string)
# self.assertTrue(isinstance(response, float))
# self.assertTrue(response > 0.5)
test_string = "Best song ever."
response = sentiment_hq(test_string)
self.assertTrue(isinstance(response, float))
self.assertTrue(response > 0.5)
def test_good_fer(self):
fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
@@ -283,6 +330,11 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, dict))
self.assertEqual(fer_set, set(response.keys()))
def test_safe_content_filtering(self):
test_face = self.load_image("data/happy.png", as_grey=True)
response = content_filtering(test_face)
self.assertTrue(response < 0.5)
def test_good_facial_features(self):
test_face = generate_array((48,48))
response = facial_features(test_face)