diff --git a/CHANGES.txt b/CHANGES.txt index 79f806a..d6e4424 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -29,3 +29,5 @@ v0.7.2 Thu Jun 11 -- Remove sentiment_hq from text apis by default v0.7.3 Wed Jun 17 -- Fixes for handling of specific image types v0.7.4 Mon Jun 22 -- Fix for setup.py issues v0.7.5 Wed Jul 1 -- Public access to sentimentHQ api +v0.7.6 Tue Jul 7 -- Add Keywords API +v0.8.0 Fri Jul 10 -- Add Content Filtering API, Named Entities API, Facial Emotion with Localization diff --git a/README.md b/README.md index 783beab..b90a5ad 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ Supported APIs: Examples -------- ```python ->>> from indicoio import political, sentiment, language, text_tags, fer, facial_features, image_features +>>> from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features >>> indicoio.config.api_key = "YOUR_API_KEY" @@ -74,6 +74,11 @@ Examples >>> language('Quis custodiet ipsos custodes') {u'Swedish': 0.00033330636691921914, u'Lithuanian': 0.007328693814717631, u'Vietnamese': 0.0002686116137658802, u'Romanian': 8.133913804076592e-06, ...} + +>>> keywords("Facebook blog posts about Android tech make better journalism than most news outlets.", top_n=3) +{u'android': 0.10602030910588661, + u'journalism': 0.13466866170166855, + u'outlets': 0.13930405357808642} ``` Batch API diff --git a/README.rst b/README.rst index 7b692de..0ea9ef4 100644 --- a/README.rst +++ b/README.rst @@ -49,7 +49,7 @@ Examples .. code:: python - >>> from indicoio import political, sentiment, language, text_tags, fer, facial_features, image_features + >>> from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features >>> indicoio.config.api_key = "YOUR_API_KEY" @@ -72,7 +72,7 @@ Examples >>> import numpy as np - >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist() + >>> test_face = np.linspace(0,50,48*48).reshape(48,48) >>> fer(test_face) {u'Angry': 0.08843749137458341, u'Sad': 0.39091163159204684, u'Neutral': 0.1947947999669361, u'Surprise': 0.03443785859010413, u'Fear': 0.17574534848440568, u'Happy': 0.11567286999192382} @@ -83,6 +83,11 @@ Examples >>> language('Quis custodiet ipsos custodes') {u'Swedish': 0.00033330636691921914, u'Lithuanian': 0.007328693814717631, u'Vietnamese': 0.0002686116137658802, u'Romanian': 8.133913804076592e-06, ...} + >>> keywords("Facebook blog posts about Android tech make better journalism than most news outlets.", top_n=3) + {u'android': 0.10602030910588661, + u'journalism': 0.13466866170166855, + u'outlets': 0.13930405357808642} + Batch API --------- @@ -131,3 +136,4 @@ Accepted image API names: ``fer, facial_features, image_features`` >>> batch_predict_image([test_face, test_face], apis=["fer", "facial_features"]) {'facial_features': [[0.0, -0.026176479280200796, 0.20707644777495776, ...], [0.0, -0.026176479280200796, 0.20707644777495776, ...]], 'fer': [{u'Angry': 0.08877494466353497, u'Sad': 0.3933999409104264, u'Neutral': 0.1910612654566151, u'Surprise': 0.0346146405941845, u'Fear': 0.17682159820518667, u'Happy': 0.11532761017005204}, { u'Angry': 0.08877494466353497, u'Sad': 0.3933999409104264, u'Neutral': 0.1910612654566151, u'Surprise': 0.0346146405941845, u'Fear': 0.17682159820518667, u'Happy': 0.11532761017005204}]} + diff --git a/indicoio/__init__.py b/indicoio/__init__.py index 3c06929..628fb95 100644 --- a/indicoio/__init__.py +++ b/indicoio/__init__.py @@ -1,6 +1,6 @@ from functools import partial -Version, version, __version__, VERSION = ('0.7.5',) * 4 +Version, version, __version__, VERSION = ('0.8.0',) * 4 JSON_HEADERS = { 'Content-type': 'application/json', @@ -13,13 +13,17 @@ from indicoio.text.sentiment import political, posneg, sentiment_hq from indicoio.text.sentiment import posneg as sentiment from indicoio.text.lang import language from indicoio.text.tagging import text_tags +from indicoio.text.keywords import keywords +from indicoio.text.ner import named_entities from indicoio.images.fer import fer from indicoio.images.features import facial_features from indicoio.images.features import image_features +from indicoio.images.filtering import content_filtering from indicoio.utils.multi import predict_image, predict_text from indicoio.config import API_NAMES + apis = dict((api, globals().get(api)) for api in API_NAMES) for api in apis: diff --git a/indicoio/config.py b/indicoio/config.py index 0b24332..7fead32 100644 --- a/indicoio/config.py +++ b/indicoio/config.py @@ -50,13 +50,16 @@ TEXT_APIS = [ 'political', 'sentiment', 'language', - 'sentiment_hq' + 'sentiment_hq', + 'keywords', + 'named_entities' ] IMAGE_APIS = [ 'fer', 'facial_features', - 'image_features' + 'image_features', + 'content_filtering' ] API_NAMES = IMAGE_APIS + TEXT_APIS + ["predict_text", "predict_image"] diff --git a/indicoio/images/features.py b/indicoio/images/features.py index e1fc18b..75482bd 100644 --- a/indicoio/images/features.py +++ b/indicoio/images/features.py @@ -26,7 +26,8 @@ def facial_features(image, cloud=None, batch=False, api_key=None, **kwargs): :rtype: List containing feature responses """ image = image_preprocess(image, batch=batch) - return api_handler(image, cloud=cloud, api="facialfeatures", url_params={"batch":batch, "api_key":api_key}, **kwargs) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(image, cloud=cloud, api="facialfeatures", url_params=url_params, **kwargs) def image_features(image, cloud=None, batch=False, api_key=None, **kwargs): """ @@ -59,4 +60,5 @@ def image_features(image, cloud=None, batch=False, api_key=None, **kwargs): :rtype: List containing features """ image = image_preprocess(image, batch=batch, size=(64,64)) - return api_handler(image, cloud=cloud, api="imagefeatures", url_params={"batch":batch, "api_key":api_key}, **kwargs) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(image, cloud=cloud, api="imagefeatures", url_params=url_params, **kwargs) diff --git a/indicoio/images/fer.py b/indicoio/images/fer.py index b36d6ff..87612e2 100644 --- a/indicoio/images/fer.py +++ b/indicoio/images/fer.py @@ -28,4 +28,5 @@ def fer(image, cloud=None, batch=False, api_key=None, **kwargs): :rtype: Dictionary containing emotion probability pairs """ image = image_preprocess(image, batch=batch) - return api_handler(image, cloud=cloud, api="fer", url_params={"batch":batch, "api_key":api_key}, **kwargs) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs) diff --git a/indicoio/images/filtering.py b/indicoio/images/filtering.py new file mode 100644 index 0000000..3851b8f --- /dev/null +++ b/indicoio/images/filtering.py @@ -0,0 +1,29 @@ +import requests + +from indicoio.utils.api import api_handler +from indicoio.utils.image import image_preprocess +import indicoio.config as config + +def content_filtering(image, cloud=None, batch=False, api_key=None, **kwargs): + """ + Given a grayscale input image, returns how obcene the image is. + Input should be in a list of list format. + + Example usage: + + .. code-block:: python + + >>> from indicoio import content_filtering + >>> import numpy as np + >>> face = np.zeros((48,48)).tolist() + >>> res = content_filtering(face) + >>> res + .056 + + :param image: The image to be analyzed. + :type image: list of lists + :rtype: float of nsfwness + """ + image = image_preprocess(image, batch=batch, size=None) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs) diff --git a/indicoio/text/keywords.py b/indicoio/text/keywords.py new file mode 100644 index 0000000..4293486 --- /dev/null +++ b/indicoio/text/keywords.py @@ -0,0 +1,24 @@ +from indicoio.utils.api import api_handler +import indicoio.config as config + +def keywords(text, cloud=None, batch=False, api_key=None, **kwargs): + """ + Given input text, returns series of keywords and associated scores + + Example usage: + + .. code-block:: python + + >>> import indicoio + >>> import numpy as np + >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.' + >>> keywords = indicoio.keywords(text, top_n=3) + >>> print "The keywords are: "+str(keywords.keys()) + u'The keywords are ['delightful', 'highs', 'skies'] + + :param text: The text to be analyzed. + :type text: str or unicode + :rtype: Dictionary of feature score pairs + """ + url_params = {'batch': batch, 'api_key': api_key} + return api_handler(text, cloud=cloud, api="keywords", url_params=url_params, **kwargs) diff --git a/indicoio/text/lang.py b/indicoio/text/lang.py index 9f0871f..fbc03a3 100644 --- a/indicoio/text/lang.py +++ b/indicoio/text/lang.py @@ -23,5 +23,5 @@ def language(text, cloud=None, batch=False, api_key=None, **kwargs): :type text: str or unicode :rtype: Dictionary of language probability pairs """ - - return api_handler(text, cloud=cloud, api="language", url_params={"batch":batch, "api_key":api_key}, **kwargs) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(text, cloud=cloud, api="language", url_params=url_params, **kwargs) diff --git a/indicoio/text/ner.py b/indicoio/text/ner.py new file mode 100644 index 0000000..8f2985b --- /dev/null +++ b/indicoio/text/ner.py @@ -0,0 +1,30 @@ +from indicoio.utils.api import api_handler +import indicoio.config as config + +def named_entities(text, cloud=None, batch=False, api_key=None, **kwargs): + """ + Given input text, returns named entities (proper nouns) found in the text + + Example usage: + + .. code-block:: python + + >>> text = "London Underground's boss Mike Brown warned that the strike ..." + >>> entities = indicoio.named_entities(text) + {u'London Underground': {u'categories': {u'location': 0.583755654607989, + u'organization': 0.07460487821791033, + u'person': 0.07304850776658672, + u'unknown': 0.2685909594075139}, + u'confidence': 0.846188063604044}, + u'Mike Brown': {u'categories': {u'location': 0.025813884950623898, + u'organization': 0.06661470013014613, + u'person': 0.08723850624560824, + u'unknown': 0.8203329086736217}, + u'confidence': 0.8951793008234012}} + + :param text: The text to be analyzed. + :type text: str or unicode + :rtype: Dictionary of language probability pairs + """ + url_params = {"batch": batch, "api_key": api_key} + return api_handler(text, cloud=cloud, api="namedentities", url_params=url_params, **kwargs) diff --git a/indicoio/text/sentiment.py b/indicoio/text/sentiment.py index 5172e40..64607f3 100644 --- a/indicoio/text/sentiment.py +++ b/indicoio/text/sentiment.py @@ -25,8 +25,8 @@ def political(text, cloud=None, batch=False, api_key=None, **kwargs): :type text: str or unicode :rtype: Dictionary of party probability pairs """ - - return api_handler(text, cloud=cloud, api="political", url_params={"batch":batch, "api_key":api_key}, **kwargs) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(text, cloud=cloud, api="political", url_params=url_params, **kwargs) def posneg(text, cloud=None, batch=False, api_key=None, **kwargs): """ @@ -48,8 +48,8 @@ def posneg(text, cloud=None, batch=False, api_key=None, **kwargs): :type text: str or unicode :rtype: Float """ - - return api_handler(text, cloud=cloud, api="sentiment", url_params={"batch":batch, "api_key":api_key}, **kwargs) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(text, cloud=cloud, api="sentiment", url_params=url_params, **kwargs) def sentiment_hq(text, cloud=None, batch=False, api_key=None, **kwargs): """ @@ -71,5 +71,5 @@ def sentiment_hq(text, cloud=None, batch=False, api_key=None, **kwargs): :type text: str or unicode :rtype: Float """ - - return api_handler(text, cloud=cloud, api="sentimenthq", url_params={"batch":batch, "api_key":api_key}, **kwargs) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(text, cloud=cloud, api="sentimenthq", url_params=url_params, **kwargs) diff --git a/indicoio/text/tagging.py b/indicoio/text/tagging.py index e0f58de..87a1cdc 100644 --- a/indicoio/text/tagging.py +++ b/indicoio/text/tagging.py @@ -22,5 +22,5 @@ def text_tags(text, cloud=None, batch=False, api_key=None, **kwargs): :type text: str or unicode :rtype: Dictionary of class probability pairs """ - - return api_handler(text, cloud=cloud, api="texttags", url_params={"batch":batch, "api_key":api_key}, **kwargs) + url_params = {"batch": batch, "api_key": api_key} + return api_handler(text, cloud=cloud, api="texttags", url_params=url_params, **kwargs) diff --git a/indicoio/utils/image.py b/indicoio/utils/image.py index 70fb19e..f0ab106 100644 --- a/indicoio/utils/image.py +++ b/indicoio/utils/image.py @@ -48,7 +48,8 @@ def image_preprocess(image, size=(48,48), batch=False): raise IndicoError("Image must be a filepath, base64 encoded string, or a numpy array") # image resizing - out_image = out_image.resize(size) + if size: + out_image = out_image.resize(size) # convert to base64 temp_output = StringIO.StringIO() diff --git a/indicoio/utils/multi.py b/indicoio/utils/multi.py index 220f569..f037642 100644 --- a/indicoio/utils/multi.py +++ b/indicoio/utils/multi.py @@ -137,7 +137,7 @@ def predict_image(image, apis=IMAGE_APIS, **kwargs): def parsed_response(api, response): result = response.get('results', False) - if result: + if result != False: return result raise IndicoError( "Sorry, the %s API returned an unexpected response.\n\t%s" diff --git a/setup.py b/setup.py index eb0b590..7b2b08d 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ except ImportError: setup( name="IndicoIo", - version="0.7.5", + version="0.8.0", packages=[ "indicoio", "indicoio.text", diff --git a/tests/test_remote.py b/tests/test_remote.py index b4b262d..85c45b5 100644 --- a/tests/test_remote.py +++ b/tests/test_remote.py @@ -6,10 +6,12 @@ from requests import ConnectionError from nose.plugins.skip import Skip, SkipTest from indicoio import config -from indicoio import political, sentiment, fer, facial_features, language, image_features, text_tags -from indicoio import batch_political, batch_sentiment, batch_fer, batch_facial_features +from indicoio import political, sentiment, fer, facial_features, content_filtering, language, image_features, text_tags +from indicoio import batch_political, batch_sentiment, batch_fer, batch_content_filtering, batch_facial_features from indicoio import batch_language, batch_image_features, batch_text_tags +from indicoio import keywords, batch_keywords from indicoio import sentiment_hq, batch_sentiment_hq +from indicoio import named_entities, batch_named_entities from indicoio import predict_image, predict_text, batch_predict_image, batch_predict_text from indicoio.utils.errors import IndicoError @@ -32,18 +34,24 @@ class BatchAPIRun(unittest.TestCase): response = batch_text_tags(test_data, api_key=self.api_key) self.assertTrue(isinstance(response, list)) + def test_batch_keywords(self): + test_data = ["A working api is key to the success of our young company"] + words = [set(text.lower().split()) for text in test_data] + response = batch_keywords(test_data, api_key=self.api_key) + self.assertTrue(isinstance(response, list)) + self.assertTrue(set(response[0].keys()).issubset(words[0])) + def test_batch_posneg(self): test_data = ['Worst song ever', 'Best song ever'] response = batch_sentiment(test_data, api_key=self.api_key) self.assertTrue(isinstance(response, list)) self.assertTrue(response[0] < 0.5) - # TODO: uncomment once the high quality sentiment API is publicly released - # def test_batch_sentiment_hq(self): - # test_data = ['Worst song ever', 'Best song ever'] - # response = batch_sentiment_hq(test_data, api_key=self.api_key) - # self.assertTrue(isinstance(response, list)) - # self.assertTrue(response[0] < 0.5) + def test_batch_sentiment_hq(self): + test_data = ['Worst song ever', 'Best song ever'] + response = batch_sentiment_hq(test_data, api_key=self.api_key) + self.assertTrue(isinstance(response, list)) + self.assertTrue(response[0] < 0.5) def test_batch_political(self): test_data = ["Guns don't kill people, people kill people."] @@ -56,6 +64,12 @@ class BatchAPIRun(unittest.TestCase): self.assertTrue(isinstance(response, list)) self.assertTrue(isinstance(response[0], dict)) + def test_batch_content_filtering(self): + test_data = [generate_array((48,48))] + response = batch_content_filtering(test_data, api_key=self.api_key) + self.assertTrue(isinstance(response, list)) + self.assertTrue(isinstance(response[0], float)) + def test_batch_fer_bad_b64(self): test_data = ["$bad#FI jeaf9(#0"] self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key) @@ -82,7 +96,6 @@ class BatchAPIRun(unittest.TestCase): test_data = ["data/unhappy.png"] self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key) - def test_batch_facial_features(self): test_data = [generate_array((48,48))] response = batch_facial_features(test_data, api_key=self.api_key) @@ -123,6 +136,15 @@ class BatchAPIRun(unittest.TestCase): self.assertTrue(isinstance(response, list)) self.assertTrue(response[0]['English'] > 0.25) + def test_batch_named_entities(self): + batch = ["London Underground's boss Mike Brown warned that the strike ..."] + expected_entities = ("London Underground", "Mike Brown") + expected_keys = set(["categories", "confidence"]) + entities = batch_named_entities(batch)[0] + for entity in expected_entities: + assert entity in expected_entities + assert not (set(entities[entity]) - expected_keys) + def test_batch_multi_api_image(self): test_data = [generate_array((48,48)), generate_int_array((48,48))] response = batch_predict_image(test_data, apis=config.IMAGE_APIS, api_key=self.api_key) @@ -202,6 +224,32 @@ class FullAPIRun(unittest.TestCase): for v in results.values(): assert v >= 0.1 + def test_keywords(self): + text = "A working api is key to the success of our young company" + words = set(text.lower().split()) + + results = keywords(text) + sorted_results = sorted(results.keys(), key=lambda x:results.get(x), reverse=True) + assert 'api' in sorted_results[:3] + + self.assertTrue(set(results.keys()).issubset(words)) + + results = keywords(text, top_n=3) + assert len(results) is 3 + + results = keywords(text, threshold=.1) + for v in results.values(): + assert v >= .1 + + def test_named_entities(self): + text = "London Underground's boss Mike Brown warned that the strike ..." + expected_entities = ("London Underground", "Mike Brown") + expected_keys = set(["categories", "confidence"]) + entities = named_entities(text) + for entity in expected_entities: + assert entity in expected_entities + assert not (set(entities[entity]) - expected_keys) + def test_political(self): political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green']) test_string = "Guns don't kill people, people kill people." @@ -228,18 +276,17 @@ class FullAPIRun(unittest.TestCase): self.assertTrue(isinstance(response, float)) self.assertTrue(response > 0.5) - # TODO: uncomment when the high quality sentiment API is publicly released - # def test_sentiment_hq(self): - # test_string = "Worst song ever." - # response = sentiment_hq(test_string) + def test_sentiment_hq(self): + test_string = "Worst song ever." + response = sentiment_hq(test_string) - # self.assertTrue(isinstance(response, float)) - # self.assertTrue(response < 0.5) + self.assertTrue(isinstance(response, float)) + self.assertTrue(response < 0.5) - # test_string = "Best song ever." - # response = sentiment_hq(test_string) - # self.assertTrue(isinstance(response, float)) - # self.assertTrue(response > 0.5) + test_string = "Best song ever." + response = sentiment_hq(test_string) + self.assertTrue(isinstance(response, float)) + self.assertTrue(response > 0.5) def test_good_fer(self): fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy']) @@ -283,6 +330,11 @@ class FullAPIRun(unittest.TestCase): self.assertTrue(isinstance(response, dict)) self.assertEqual(fer_set, set(response.keys())) + def test_safe_content_filtering(self): + test_face = self.load_image("data/happy.png", as_grey=True) + response = content_filtering(test_face) + self.assertTrue(response < 0.5) + def test_good_facial_features(self): test_face = generate_array((48,48)) response = facial_features(test_face)