Merge pull request #12 from IndicoDataSolutions/nlp-sprint

Updated API to include text tags API
This commit is contained in:
Madison May
2014-11-07 17:24:58 -05:00
11 changed files with 75 additions and 9 deletions
+1
View File
@@ -12,3 +12,4 @@ v0.4.3, Thu Sep 11 -- Added image features api and sphinx compliant documentatio
v0.4.4, Thu Sep 25 -- Added dependencies installation to setup.py
v0.4.5, Thu Sep 25 -- Added interface to local indico server
v0.4.6, Fri Oct 27 -- Updated to point to new indico api servers, cleaner REST API
v0.4.8, Fri Nov 7 -- Updated API interface to include new text tags API
+3
View File
@@ -41,6 +41,9 @@ Examples
>>> sentiment('Really enjoyed the movie.')
{u'Sentiment': 0.8105182526856075}
>>> text_tags("On Monday, president Barack Obama will be...")
{u'fashion': 0.024739582352183764, u'art': 0.008637280256320275, u'energy': 0.013183388999943419, ...}
>>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()
>>> fer(test_face)
+3
View File
@@ -51,6 +51,9 @@ Examples
>>> sentiment('Really enjoyed the movie.')
{u'Sentiment': 0.8105182526856075}
>>> text_tags("On Monday, president Barack Obama will be...")
{u'fashion': 0.024739582352183764, u'art': 0.008637280256320275, u'energy': 0.013183388999943419, ...}
>>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()
>>> fer(test_face)
+2
View File
@@ -8,6 +8,7 @@ Version, version, __version__, VERSION = ('0.4.5',) * 4
from indicoio.text.sentiment import political, posneg
from indicoio.text.sentiment import posneg as sentiment
from indicoio.text.lang import language
from indicoio.text.tagging import text_tags
from indicoio.images.fer import fer
from indicoio.images.features import facial_features
from indicoio.images.features import image_features
@@ -20,3 +21,4 @@ language = partial(language, config.api_root)
fer = partial(fer, config.api_root)
facial_features = partial(facial_features, config.api_root)
image_features = partial(image_features, config.api_root)
text_tags = partial(text_tags, config.api_root)
+2
View File
@@ -6,6 +6,7 @@ JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'}
from indicoio.text.sentiment import political, posneg
from indicoio.text.sentiment import posneg as sentiment
from indicoio.text.lang import language
from indicoio.text.tagging import text_tags
from indicoio.images.fer import fer
from indicoio.images.features import facial_features
from indicoio.images.features import image_features
@@ -18,3 +19,4 @@ language = partial(language, config.local_api_root)
fer = partial(fer, config.local_api_root)
facial_features = partial(facial_features, config.local_api_root)
image_features = partial(image_features, config.local_api_root)
text_tags = partial(text_tags, config.local_api_root)
-3
View File
@@ -1,6 +1,3 @@
import requests
import json
from indicoio.utils import api_handler
def language(api_root, text):
-3
View File
@@ -1,6 +1,3 @@
import requests
import json
from indicoio import JSON_HEADERS
from indicoio.utils import api_handler
+25
View File
@@ -0,0 +1,25 @@
from indicoio.utils import api_handler
def text_tags(api_root, text):
"""
Given input text, returns a probability distribution over 100 document categories
Example usage:
.. code-block:: python
>>> import indicoio
>>> import numpy as np
>>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.'
>>> possible = indicoio.classification(text)
>>> category = possible.keys()[np.argmax(possible.values())]
>>> probability = np.max(possible.values())
>>> "Predicted category '%s' with probability %.4f"%(category,probability)
u'Predicted 'Weather' with probability 0.8548'
:param text: The text to be analyzed.
:type text: str or unicode
:rtype: Dictionary of class probability pairs
"""
return api_handler(text, api_root + "texttags")
+1 -1
View File
@@ -8,7 +8,7 @@ except ImportError:
setup(
name="IndicoIo",
version='0.4.7',
version='0.4.8',
packages=[
"indicoio",
"indicoio.text",
+19 -1
View File
@@ -2,7 +2,7 @@ import unittest
import numpy as np
from indicoio.local import political, sentiment, fer, facial_features, language, image_features
from indicoio.local import political, sentiment, fer, facial_features, language, image_features, text_tags
DIR = os.path.dirname(os.path.realpath(__file__))
@@ -20,6 +20,24 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(vector.min() < minimum)
self.assertTrue(np.ptp(vector) > span)
def test_text_tags(self):
expected_keys = set(['fashion', 'art', 'energy', 'economics', 'entreprener',
'books', 'politics', 'gardening', 'nba', 'conservative',
'technology', 'startps', 'relationships', 'edcation',
'hmor', 'psychology', 'bicycling', 'investing', 'travel',
'cooking', 'christianity', 'environment', 'religion', 'health',
'hockey', 'pets', 'msic', 'soccer', 'gns', 'gaming', 'jobs',
'bsiness', 'natre', 'food', 'cars', 'photography', 'philosophy',
'geek', 'sports', 'baseball', 'news', 'television', 'entertainment',
'parenting', 'comics', 'science', 'nfl','programming',
'personalfinance', 'atheism', 'movies', 'anime', 'fitness',
'military', 'realestate', 'history'])
text = "On Monday, president Barack Obama will be..."
results = text_tags(text)
max_keys = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
assert 'politics' in max_keys[:3]
self.assertTrue(expected_keys == set(results.keys()))
def test_political(self):
political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
test_string = "Guns don't kill people, people kill people."
+19 -1
View File
@@ -4,7 +4,7 @@ import os
import numpy as np
import skimage.io
from indicoio import political, sentiment, fer, facial_features, language, image_features
from indicoio import political, sentiment, fer, facial_features, language, image_features, text_tags
DIR = os.path.dirname(os.path.realpath(__file__))
@@ -22,6 +22,24 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(vector.min() < minimum)
self.assertTrue(np.ptp(vector) > span)
def test_text_tags(self):
expected_keys = set(['fashion', 'art', 'energy', 'economics', 'entrepreneur',
'books', 'politics', 'gardening', 'nba', 'conservative',
'technology', 'startups', 'relationships', 'education',
'humor', 'psychology', 'bicycling', 'investing', 'travel',
'cooking', 'christianity', 'environment', 'religion', 'health',
'hockey', 'pets', 'music', 'soccer', 'guns', 'gaming', 'jobs',
'business', 'nature', 'food', 'cars', 'photography', 'philosophy',
'geek', 'sports', 'baseball', 'news', 'television', 'entertainment',
'parenting', 'comics', 'science', 'nfl','programming',
'personalfinance', 'atheism', 'movies', 'anime', 'fitness',
'military', 'realestate', 'history'])
text = "On Monday, president Barack Obama will be..."
results = text_tags(text)
max_keys = sorted(results.keys(), key=lambda x:results.get(x), reverse=True)
assert 'politics' in max_keys[:3]
self.assertFalse(set(results.keys()) - expected_keys)
def test_political(self):
political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
test_string = "Guns don't kill people, people kill people."