diff --git a/CHANGES.txt b/CHANGES.txt index c3f8f44..0e02727 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -12,3 +12,4 @@ v0.4.3, Thu Sep 11 -- Added image features api and sphinx compliant documentatio v0.4.4, Thu Sep 25 -- Added dependencies installation to setup.py v0.4.5, Thu Sep 25 -- Added interface to local indico server v0.4.6, Fri Oct 27 -- Updated to point to new indico api servers, cleaner REST API +v0.4.8, Fri Nov 7 -- Updated API interface to include new text tags API diff --git a/README b/README index 95cec92..4b76b3a 100644 --- a/README +++ b/README @@ -41,6 +41,9 @@ Examples >>> sentiment('Really enjoyed the movie.') {u'Sentiment': 0.8105182526856075} +>>> text_tags("On Monday, president Barack Obama will be...") +{u'fashion': 0.024739582352183764, u'art': 0.008637280256320275, u'energy': 0.013183388999943419, ...} + >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist() >>> fer(test_face) diff --git a/README.md b/README.md index 76c1f1e..05aaa39 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,9 @@ Examples >>> sentiment('Really enjoyed the movie.') {u'Sentiment': 0.8105182526856075} +>>> text_tags("On Monday, president Barack Obama will be...") +{u'fashion': 0.024739582352183764, u'art': 0.008637280256320275, u'energy': 0.013183388999943419, ...} + >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist() >>> fer(test_face) diff --git a/indicoio/__init__.py b/indicoio/__init__.py index d92b8ba..2309126 100644 --- a/indicoio/__init__.py +++ b/indicoio/__init__.py @@ -8,6 +8,7 @@ Version, version, __version__, VERSION = ('0.4.5',) * 4 from indicoio.text.sentiment import political, posneg from indicoio.text.sentiment import posneg as sentiment from indicoio.text.lang import language +from indicoio.text.tagging import text_tags from indicoio.images.fer import fer from indicoio.images.features import facial_features from indicoio.images.features import image_features @@ -20,3 +21,4 @@ language = partial(language, config.api_root) fer = partial(fer, config.api_root) facial_features = partial(facial_features, config.api_root) image_features = partial(image_features, config.api_root) +text_tags = partial(text_tags, config.api_root) diff --git a/indicoio/local/__init__.py b/indicoio/local/__init__.py index 9d44439..428186e 100644 --- a/indicoio/local/__init__.py +++ b/indicoio/local/__init__.py @@ -6,6 +6,7 @@ JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'} from indicoio.text.sentiment import political, posneg from indicoio.text.sentiment import posneg as sentiment from indicoio.text.lang import language +from indicoio.text.tagging import text_tags from indicoio.images.fer import fer from indicoio.images.features import facial_features from indicoio.images.features import image_features @@ -18,3 +19,4 @@ language = partial(language, config.local_api_root) fer = partial(fer, config.local_api_root) facial_features = partial(facial_features, config.local_api_root) image_features = partial(image_features, config.local_api_root) +text_tags = partial(text_tags, config.local_api_root) diff --git a/indicoio/text/lang.py b/indicoio/text/lang.py index 169b415..068d45d 100644 --- a/indicoio/text/lang.py +++ b/indicoio/text/lang.py @@ -1,6 +1,3 @@ -import requests -import json - from indicoio.utils import api_handler def language(api_root, text): diff --git a/indicoio/text/sentiment.py b/indicoio/text/sentiment.py index e60cd42..81f0363 100644 --- a/indicoio/text/sentiment.py +++ b/indicoio/text/sentiment.py @@ -1,6 +1,3 @@ -import requests -import json - from indicoio import JSON_HEADERS from indicoio.utils import api_handler diff --git a/indicoio/text/tagging.py b/indicoio/text/tagging.py new file mode 100644 index 0000000..3f182ad --- /dev/null +++ b/indicoio/text/tagging.py @@ -0,0 +1,25 @@ +from indicoio.utils import api_handler + +def text_tags(api_root, text): + """ + Given input text, returns a probability distribution over 100 document categories + + Example usage: + + .. code-block:: python + + >>> import indicoio + >>> import numpy as np + >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.' + >>> possible = indicoio.classification(text) + >>> category = possible.keys()[np.argmax(possible.values())] + >>> probability = np.max(possible.values()) + >>> "Predicted category '%s' with probability %.4f"%(category,probability) + u'Predicted 'Weather' with probability 0.8548' + + :param text: The text to be analyzed. + :type text: str or unicode + :rtype: Dictionary of class probability pairs + """ + + return api_handler(text, api_root + "texttags") diff --git a/setup.py b/setup.py index d61d160..cc6e82f 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ except ImportError: setup( name="IndicoIo", - version='0.4.7', + version='0.4.8', packages=[ "indicoio", "indicoio.text", diff --git a/tests/local/test_local.py b/tests/local/test_local.py index f8bc5dd..48cc0f5 100644 --- a/tests/local/test_local.py +++ b/tests/local/test_local.py @@ -2,7 +2,7 @@ import unittest import numpy as np -from indicoio.local import political, sentiment, fer, facial_features, language, image_features +from indicoio.local import political, sentiment, fer, facial_features, language, image_features, text_tags DIR = os.path.dirname(os.path.realpath(__file__)) @@ -20,6 +20,24 @@ class FullAPIRun(unittest.TestCase): self.assertTrue(vector.min() < minimum) self.assertTrue(np.ptp(vector) > span) + def test_text_tags(self): + expected_keys = set(['fashion', 'art', 'energy', 'economics', 'entreprener', + 'books', 'politics', 'gardening', 'nba', 'conservative', + 'technology', 'startps', 'relationships', 'edcation', + 'hmor', 'psychology', 'bicycling', 'investing', 'travel', + 'cooking', 'christianity', 'environment', 'religion', 'health', + 'hockey', 'pets', 'msic', 'soccer', 'gns', 'gaming', 'jobs', + 'bsiness', 'natre', 'food', 'cars', 'photography', 'philosophy', + 'geek', 'sports', 'baseball', 'news', 'television', 'entertainment', + 'parenting', 'comics', 'science', 'nfl','programming', + 'personalfinance', 'atheism', 'movies', 'anime', 'fitness', + 'military', 'realestate', 'history']) + text = "On Monday, president Barack Obama will be..." + results = text_tags(text) + max_keys = sorted(results.keys(), key=lambda x:results.get(x), reverse=True) + assert 'politics' in max_keys[:3] + self.assertTrue(expected_keys == set(results.keys())) + def test_political(self): political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green']) test_string = "Guns don't kill people, people kill people." diff --git a/tests/remote/test_remote.py b/tests/remote/test_remote.py index 9c75c3b..1477822 100644 --- a/tests/remote/test_remote.py +++ b/tests/remote/test_remote.py @@ -4,7 +4,7 @@ import os import numpy as np import skimage.io -from indicoio import political, sentiment, fer, facial_features, language, image_features +from indicoio import political, sentiment, fer, facial_features, language, image_features, text_tags DIR = os.path.dirname(os.path.realpath(__file__)) @@ -22,6 +22,24 @@ class FullAPIRun(unittest.TestCase): self.assertTrue(vector.min() < minimum) self.assertTrue(np.ptp(vector) > span) + def test_text_tags(self): + expected_keys = set(['fashion', 'art', 'energy', 'economics', 'entrepreneur', + 'books', 'politics', 'gardening', 'nba', 'conservative', + 'technology', 'startups', 'relationships', 'education', + 'humor', 'psychology', 'bicycling', 'investing', 'travel', + 'cooking', 'christianity', 'environment', 'religion', 'health', + 'hockey', 'pets', 'music', 'soccer', 'guns', 'gaming', 'jobs', + 'business', 'nature', 'food', 'cars', 'photography', 'philosophy', + 'geek', 'sports', 'baseball', 'news', 'television', 'entertainment', + 'parenting', 'comics', 'science', 'nfl','programming', + 'personalfinance', 'atheism', 'movies', 'anime', 'fitness', + 'military', 'realestate', 'history']) + text = "On Monday, president Barack Obama will be..." + results = text_tags(text) + max_keys = sorted(results.keys(), key=lambda x:results.get(x), reverse=True) + assert 'politics' in max_keys[:3] + self.assertFalse(set(results.keys()) - expected_keys) + def test_political(self): political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green']) test_string = "Guns don't kill people, people kill people."