From 6c23d0d7ee8c173e1e44b28429a508273626c397 Mon Sep 17 00:00:00 2001 From: Madison May Date: Thu, 18 Dec 2014 16:10:36 -0500 Subject: [PATCH 1/2] Add support for optional arguments to API --- CHANGES.txt | 3 ++- indicoio/__init__.py | 2 +- indicoio/images/features.py | 8 ++++---- indicoio/images/fer.py | 4 ++-- indicoio/text/lang.py | 4 ++-- indicoio/text/sentiment.py | 8 ++++---- indicoio/text/tagging.py | 4 ++-- indicoio/utils/__init__.py | 10 ++++++---- setup.py | 2 +- tests/local/test_local.py | 19 ++++++------------- tests/remote/test_remote.py | 5 +++++ 11 files changed, 35 insertions(+), 34 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 67a8c7d..48f6da9 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -14,4 +14,5 @@ v0.4.5, Thu Sep 25 -- Added interface to local indico server v0.4.6, Fri Oct 27 -- Updated to point to new indico api servers, cleaner REST API v0.4.8, Fri Nov 7 -- Updated API interface to include new text tags API v0.4.11, Wed Dec 18 -- Updated tests for text tags -v0.4.12, Thu Dec 19 -- Added batch support interface \ No newline at end of file +v0.4.12, Thu Dec 19 -- Added batch support interface +v0.4.13, Thu Dec 19 -- Added optional arguments to text tags API \ No newline at end of file diff --git a/indicoio/__init__.py b/indicoio/__init__.py index c8c3548..17b4702 100644 --- a/indicoio/__init__.py +++ b/indicoio/__init__.py @@ -3,7 +3,7 @@ import indicoio.config as config JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'} -Version, version, __version__, VERSION = ('0.4.12',) * 4 +Version, version, __version__, VERSION = ('0.4.13',) * 4 from indicoio.text.sentiment import political, posneg from indicoio.text.sentiment import posneg as sentiment diff --git a/indicoio/images/features.py b/indicoio/images/features.py index 776ef8e..f404994 100644 --- a/indicoio/images/features.py +++ b/indicoio/images/features.py @@ -5,7 +5,7 @@ import numpy as np from indicoio.utils import image_preprocess, api_handler -def facial_features(api_root, image, batch=False, auth=None): +def facial_features(api_root, image, batch=False, auth=None, **kwargs): """ Given an grayscale input image of a face, returns a 48 dimensional feature vector explaining that face. Useful as a form of feature engineering for face oriented tasks. @@ -27,9 +27,9 @@ def facial_features(api_root, image, batch=False, auth=None): :type image: list of lists :rtype: List containing feature responses """ - return api_handler(image, api_root + "facialfeatures", batch=batch, auth=auth) + return api_handler(image, api_root + "facialfeatures", batch=batch, auth=auth, **kwargs) -def image_features(api_root, image, batch=False, auth=None): +def image_features(api_root, image, batch=False, auth=None, **kwargs): """ Given an input image, returns a 2048 dimensional sparse feature vector explaining that image. Useful as a form of feature engineering for image oriented tasks. @@ -60,4 +60,4 @@ def image_features(api_root, image, batch=False, auth=None): :rtype: List containing features """ image = image_preprocess(image) - return api_handler(image, api_root + "imagefeatures", batch=batch, auth=auth) + return api_handler(image, api_root + "imagefeatures", batch=batch, auth=auth, **kwargs) diff --git a/indicoio/images/fer.py b/indicoio/images/fer.py index ecbddd6..2248fe8 100644 --- a/indicoio/images/fer.py +++ b/indicoio/images/fer.py @@ -4,7 +4,7 @@ import requests import numpy as np from indicoio.utils import api_handler -def fer(api_root, image, batch=False, auth=None): +def fer(api_root, image, batch=False, auth=None, **kwargs): """ Given a grayscale input image of a face, returns a probability distribution over emotional state. Input should be in a list of list format, resizing will be attempted internally but for best @@ -28,4 +28,4 @@ def fer(api_root, image, batch=False, auth=None): :rtype: Dictionary containing emotion probability pairs """ - return api_handler(image, api_root + "fer", batch=batch, auth=auth) + return api_handler(image, api_root + "fer", batch=batch, auth=auth, **kwargs) diff --git a/indicoio/text/lang.py b/indicoio/text/lang.py index 2e0e65f..51bd339 100644 --- a/indicoio/text/lang.py +++ b/indicoio/text/lang.py @@ -1,6 +1,6 @@ from indicoio.utils import api_handler -def language(api_root, text, batch=False, auth=None): +def language(api_root, text, batch=False, auth=None, **kwargs): """ Given input text, returns a probability distribution over 33 possible languages of what language the text was written in. @@ -23,4 +23,4 @@ def language(api_root, text, batch=False, auth=None): :rtype: Dictionary of language probability pairs """ - return api_handler(text, api_root + "language", batch=batch, auth=auth) + return api_handler(text, api_root + "language", batch=batch, auth=auth, **kwargs) diff --git a/indicoio/text/sentiment.py b/indicoio/text/sentiment.py index acb88ac..387bb46 100644 --- a/indicoio/text/sentiment.py +++ b/indicoio/text/sentiment.py @@ -1,7 +1,7 @@ from indicoio import JSON_HEADERS from indicoio.utils import api_handler -def political(api_root, text, batch=False, auth=None): +def political(api_root, text, batch=False, auth=None, **kwargs): """ Given input text, returns a probability distribution over the political alignment of the speaker. @@ -27,9 +27,9 @@ def political(api_root, text, batch=False, auth=None): :rtype: Dictionary of party probability pairs """ - return api_handler(text, api_root + "political", batch=batch, auth=auth) + return api_handler(text, api_root + "political", batch=batch, auth=auth, **kwargs) -def posneg(api_root, text, batch=False, auth=None): +def posneg(api_root, text, batch=False, auth=None, **kwargs): """ Given input text, returns a scalar estimate of the sentiment of that text. Values are roughly in the range 0 to 1 with 0.5 indicating neutral sentiment. @@ -50,4 +50,4 @@ def posneg(api_root, text, batch=False, auth=None): :rtype: Float """ - return api_handler(text, api_root + "sentiment", batch=batch, auth=auth) + return api_handler(text, api_root + "sentiment", batch=batch, auth=auth, **kwargs) diff --git a/indicoio/text/tagging.py b/indicoio/text/tagging.py index a2d8ad6..b0e8750 100644 --- a/indicoio/text/tagging.py +++ b/indicoio/text/tagging.py @@ -1,6 +1,6 @@ from indicoio.utils import api_handler -def text_tags(api_root, text, batch=False, auth=None): +def text_tags(api_root, text, batch=False, auth=None, **kwargs): """ Given input text, returns a probability distribution over 100 document categories @@ -22,4 +22,4 @@ def text_tags(api_root, text, batch=False, auth=None): :rtype: Dictionary of class probability pairs """ - return api_handler(text, api_root + "texttags", batch=batch, auth=None) + return api_handler(text, api_root + "texttags", batch=batch, auth=None, **kwargs) diff --git a/indicoio/utils/__init__.py b/indicoio/utils/__init__.py index 0369dc5..c04e378 100644 --- a/indicoio/utils/__init__.py +++ b/indicoio/utils/__init__.py @@ -20,15 +20,17 @@ def auth_query(): return (email, password) -def api_handler(arg, url, batch=False, auth=None): - data_dict = json.dumps({'data': arg}) +def api_handler(arg, url, batch=False, auth=None, **kwargs): + data = {'data': arg} + data.update(**kwargs) + json_data = json.dumps(data) if batch: url += "/batch" if not auth: auth = auth_query() - response = requests.post(url, data=data_dict, headers=JSON_HEADERS, auth=auth).json() + response = requests.post(url, data=json_data, headers=JSON_HEADERS, auth=auth).json() results = response.get('results', False) - if not results: + if results is False: error = response.get('error') raise ValueError(error) return results diff --git a/setup.py b/setup.py index 2edb68a..fc4825b 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ except ImportError: setup( name="IndicoIo", - version='0.4.12', + version='0.4.13', packages=[ "indicoio", "indicoio.text", diff --git a/tests/local/test_local.py b/tests/local/test_local.py index 2f66ce6..9d8f1ba 100644 --- a/tests/local/test_local.py +++ b/tests/local/test_local.py @@ -22,22 +22,15 @@ class FullAPIRun(unittest.TestCase): self.assertTrue(np.ptp(vector) > span) def test_text_tags(self): - expected_keys = set(['fashion', 'art', 'energy', 'economics', 'entreprener', - 'books', 'politics', 'gardening', 'nba', 'conservative', - 'technology', 'startps', 'relationships', 'edcation', - 'hmor', 'psychology', 'bicycling', 'investing', 'travel', - 'cooking', 'christianity', 'environment', 'religion', 'health', - 'hockey', 'pets', 'msic', 'soccer', 'gns', 'gaming', 'jobs', - 'bsiness', 'natre', 'food', 'cars', 'photography', 'philosophy', - 'geek', 'sports', 'baseball', 'news', 'television', 'entertainment', - 'parenting', 'comics', 'science', 'nfl','programming', - 'personalfinance', 'atheism', 'movies', 'anime', 'fitness', - 'military', 'realestate', 'history']) text = "On Monday, president Barack Obama will be..." results = text_tags(text) max_keys = sorted(results.keys(), key=lambda x:results.get(x), reverse=True) - assert 'politics' in max_keys[:3] - self.assertTrue(expected_keys == set(results.keys())) + assert 'political_discussion' in max_keys[:5] + results = text_tags(text, top_n=5) + assert len(results) is 5 + results = text_tags(text, threshold=0.1) + for v in results.values(): + assert v >= 0.1 def test_political(self): political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green']) diff --git a/tests/remote/test_remote.py b/tests/remote/test_remote.py index 3427d7e..43f97f7 100644 --- a/tests/remote/test_remote.py +++ b/tests/remote/test_remote.py @@ -27,6 +27,11 @@ class FullAPIRun(unittest.TestCase): results = text_tags(text) max_keys = sorted(results.keys(), key=lambda x:results.get(x), reverse=True) assert 'political_discussion' in max_keys[:5] + results = text_tags(text, top_n=5) + assert len(results) is 5 + results = text_tags(text, threshold=0.1) + for v in results.values(): + assert v >= 0.1 def test_political(self): political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green']) From 8e5d7977b8a0b98e5f2b9a4c9ae9b797f3bf7130 Mon Sep 17 00:00:00 2001 From: Madison May Date: Thu, 18 Dec 2014 16:18:53 -0500 Subject: [PATCH 2/2] Documentation for optional arguments --- README | 22 +++++++++++++++++++--- README.md | 15 ++++++++++----- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/README b/README index 76622a3..9cd838f 100644 --- a/README +++ b/README @@ -42,10 +42,18 @@ Examples >>> sentiment('Really enjoyed the movie.') {u'Sentiment': 0.8105182526856075} ->>> tag_dict = text_tags("Facebook blog posts about Android tech make better journalism than most news outlets.") +>>> test_text = "Facebook blog posts about Android tech make better journalism than most news outlets." ->>> sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:5] -[u'investing', u'startups', u'business', u'entrepreneur', u'humor'] +>>> tag_dict = text_tags(test_text) + +>>> sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:3] +[u'startups_and_entrepreneurship', u'investment', u'business'] + +>>> text_tags(test_text, threshold=0.1) # return only keys with value > 0.1 +{u'startups_and_entrepreneurship': 0.21888586688354486} + +>>> text_tags(test_text, top_n=1) # return only keys with top_n values +{u'startups_and_entrepreneurship': 0.21888586688354486} >>> tag_dict {u'fashion': 0.011450126534350728, u'art': 0.00358698972755963, u'energy': 0.005537894035625527, ...} @@ -74,6 +82,14 @@ If you have a local indico server running, simply import from `indicoio.local`. >>> from indicoio.local import political, sentiment, fer, facial_features, language ``` +If you'd like to use our batch api interface, please send an email to contact@indico.io. + +``` +>>> from indicio import batch_sentiment +batch_sentiment(['Text to analyze', 'More text'], auth=("example@example.com", "********")) +``` + + Installation ------------ ``` diff --git a/README.md b/README.md index 38cecde..08039a1 100644 --- a/README.md +++ b/README.md @@ -46,13 +46,18 @@ Examples >>> sentiment('Really enjoyed the movie.') {u'Sentiment': 0.8105182526856075} ->>> tag_dict = text_tags("Facebook blog posts about Android tech make better journalism than most news outlets.") +>>> test_text = "Facebook blog posts about Android tech make better journalism than most news outlets." ->>> sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:5] -[u'investing', u'startups', u'business', u'entrepreneur', u'humor'] +>>> tag_dict = text_tags(test_text) ->>> tag_dict -{u'fashion': 0.011450126534350728, u'art': 0.00358698972755963, u'energy': 0.005537894035625527, ...} +>>> sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:3] +[u'startups_and_entrepreneurship', u'investment', u'business'] + +>>> text_tags(test_text, threshold=0.1) # return only keys with value > 0.1 +{u'startups_and_entrepreneurship': 0.21888586688354486} + +>>> text_tags(test_text, top_n=1) # return only keys with top_n values +{u'startups_and_entrepreneurship': 0.21888586688354486} >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()