Merge pull request #12 from IndicoDataSolutions/nlp-sprint

Updated API to include text tags API
2026-06-27 16:10:34 +08:00 · 2014-11-07 17:24:58 -05:00
parent ae6ff8b62e 92da96e6c8
commit 2534161617
11 changed files with 75 additions and 9 deletions
@@ -12,3 +12,4 @@ v0.4.3, Thu Sep 11 -- Added image features api and sphinx compliant documentatio
 v0.4.4, Thu Sep 25 -- Added dependencies installation to setup.py
 v0.4.5, Thu Sep 25 -- Added interface to local indico server
 v0.4.6, Fri Oct 27 -- Updated to point to new indico api servers, cleaner REST API
+v0.4.8, Fri Nov 7 -- Updated API interface to include new text tags API
@@ -41,6 +41,9 @@ Examples
 >>> sentiment('Really enjoyed the movie.')
 {u'Sentiment': 0.8105182526856075}

+>>> text_tags("On Monday, president Barack Obama will be...")
+{u'fashion': 0.024739582352183764, u'art': 0.008637280256320275, u'energy': 0.013183388999943419, ...}
+
 >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()

 >>> fer(test_face)
@@ -51,6 +51,9 @@ Examples
 >>> sentiment('Really enjoyed the movie.')
 {u'Sentiment': 0.8105182526856075}

+>>> text_tags("On Monday, president Barack Obama will be...")
+{u'fashion': 0.024739582352183764, u'art': 0.008637280256320275, u'energy': 0.013183388999943419, ...}
+
 >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()

 >>> fer(test_face)
@@ -8,6 +8,7 @@ Version, version, __version__, VERSION = ('0.4.5',) * 4
 from indicoio.text.sentiment import political, posneg
 from indicoio.text.sentiment import posneg as sentiment
 from indicoio.text.lang import language
+from indicoio.text.tagging import text_tags
 from indicoio.images.fer import fer
 from indicoio.images.features import facial_features
 from indicoio.images.features import image_features
@@ -20,3 +21,4 @@ language = partial(language, config.api_root)
 fer = partial(fer, config.api_root)
 facial_features = partial(facial_features, config.api_root)
 image_features = partial(image_features, config.api_root)
+text_tags = partial(text_tags, config.api_root)
@@ -6,6 +6,7 @@ JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'}
 from indicoio.text.sentiment import political, posneg
 from indicoio.text.sentiment import posneg as sentiment
 from indicoio.text.lang import language
+from indicoio.text.tagging import text_tags
 from indicoio.images.fer import fer
 from indicoio.images.features import facial_features
 from indicoio.images.features import image_features
@@ -18,3 +19,4 @@ language = partial(language, config.local_api_root)
 fer = partial(fer, config.local_api_root)
 facial_features = partial(facial_features, config.local_api_root)
 image_features = partial(image_features, config.local_api_root)
+text_tags = partial(text_tags, config.local_api_root)
@@ -1,6 +1,3 @@
-import requests
-import json
-
 from indicoio.utils import api_handler

 def language(api_root, text):
@@ -1,6 +1,3 @@
-import requests
-import json
-
 from indicoio import JSON_HEADERS
 from indicoio.utils import api_handler

@@ -0,0 +1,25 @@
+from indicoio.utils import api_handler
+
+def text_tags(api_root, text):
+    """
+    Given input text, returns a probability distribution over 100 document categories
+
+    Example usage:
+
+    .. code-block:: python
+
+       >>> import indicoio
+       >>> import numpy as np
+       >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.'
+       >>> possible = indicoio.classification(text)
+       >>> category = possible.keys()[np.argmax(possible.values())]
+       >>> probability = np.max(possible.values())
+       >>> "Predicted category '%s' with probability %.4f"%(category,probability)
+       u'Predicted 'Weather' with probability 0.8548'
+
+    :param text: The text to be analyzed.
+    :type text: str or unicode
+    :rtype: Dictionary of class probability pairs
+    """
+    
+    return api_handler(text, api_root + "texttags")
@@ -8,7 +8,7 @@ except ImportError:

 setup(
    name="IndicoIo",
-    version='0.4.7',
+    version='0.4.8',
    packages=[
        "indicoio",
        "indicoio.text",
@@ -2,7 +2,7 @@ import unittest

 import numpy as np

-from indicoio.local import political, sentiment, fer, facial_features, language, image_features
+from indicoio.local import political, sentiment, fer, facial_features, language, image_features, text_tags

 DIR = os.path.dirname(os.path.realpath(__file__))

@@ -20,6 +20,24 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(vector.min() < minimum)
        self.assertTrue(np.ptp(vector) > span)

+    def test_text_tags(self):
+        expected_keys = set(['fashion', 'art', 'energy', 'economics', 'entreprener', 
+                             'books', 'politics', 'gardening', 'nba', 'conservative', 
+                             'technology', 'startps', 'relationships', 'edcation',
+                             'hmor', 'psychology', 'bicycling', 'investing', 'travel',
+                             'cooking', 'christianity', 'environment', 'religion', 'health', 
+                             'hockey', 'pets', 'msic', 'soccer', 'gns', 'gaming', 'jobs',
+                             'bsiness', 'natre', 'food', 'cars', 'photography', 'philosophy',
+                             'geek', 'sports', 'baseball', 'news', 'television', 'entertainment',
+                             'parenting', 'comics', 'science', 'nfl','programming',
+                             'personalfinance', 'atheism', 'movies', 'anime', 'fitness',
+                             'military', 'realestate', 'history'])
+        text = "On Monday, president Barack Obama will be..."
+        results = text_tags(text)
+        max_keys = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
+        assert 'politics' in max_keys[:3]
+        self.assertTrue(expected_keys == set(results.keys()))
+
    def test_political(self):
        political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
        test_string = "Guns don't kill people, people kill people."
@@ -4,7 +4,7 @@ import os
 import numpy as np
 import skimage.io

-from indicoio import political, sentiment, fer, facial_features, language, image_features
+from indicoio import political, sentiment, fer, facial_features, language, image_features, text_tags

 DIR = os.path.dirname(os.path.realpath(__file__))

@@ -22,6 +22,24 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(vector.min() < minimum)
        self.assertTrue(np.ptp(vector) > span)

+    def test_text_tags(self):
+        expected_keys = set(['fashion', 'art', 'energy', 'economics', 'entrepreneur', 
+                             'books', 'politics', 'gardening', 'nba', 'conservative', 
+                             'technology', 'startups', 'relationships', 'education',
+                             'humor', 'psychology', 'bicycling', 'investing', 'travel',
+                             'cooking', 'christianity', 'environment', 'religion', 'health', 
+                             'hockey', 'pets', 'music', 'soccer', 'guns', 'gaming', 'jobs',
+                             'business', 'nature', 'food', 'cars', 'photography', 'philosophy',
+                             'geek', 'sports', 'baseball', 'news', 'television', 'entertainment',
+                             'parenting', 'comics', 'science', 'nfl','programming',
+                             'personalfinance', 'atheism', 'movies', 'anime', 'fitness',
+                             'military', 'realestate', 'history'])
+        text = "On Monday, president Barack Obama will be..."
+        results = text_tags(text)
+        max_keys = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
+        assert 'politics' in max_keys[:3]
+        self.assertFalse(set(results.keys()) - expected_keys)
+
    def test_political(self):
        political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
        test_string = "Guns don't kill people, people kill people."