Merge pull request #18 from IndicoDataSolutions/texttagging-batch

Batch image preprocessing and text tagging tweak
This commit is contained in:
Madison May
2014-12-20 17:22:27 -05:00
7 changed files with 76 additions and 10 deletions
+4 -3
View File
@@ -13,6 +13,7 @@ v0.4.4, Thu Sep 25 -- Added dependencies installation to setup.py
v0.4.5, Thu Sep 25 -- Added interface to local indico server
v0.4.6, Fri Oct 27 -- Updated to point to new indico api servers, cleaner REST API
v0.4.8, Fri Nov 7 -- Updated API interface to include new text tags API
v0.4.11, Wed Dec 18 -- Updated tests for text tags
v0.4.12, Thu Dec 19 -- Added batch support interface
v0.4.13, Thu Dec 19 -- Added optional arguments to text tags API
v0.4.11, Thu Dec 18 -- Updated tests for text tags
v0.4.12, Fri Dec 19 -- Added batch support interface
v0.4.13, Fri Dec 19 -- Added optional arguments to text tags API
v0.4.14, Sat Dec 20 -- Fix for batch image features preprocessing, increased test coverage
+1 -1
View File
@@ -3,7 +3,7 @@ import indicoio.config as config
JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'}
Version, version, __version__, VERSION = ('0.4.13',) * 4
Version, version, __version__, VERSION = ('0.4.14',) * 4
from indicoio.text.sentiment import political, posneg
from indicoio.text.sentiment import posneg as sentiment
+1 -1
View File
@@ -59,5 +59,5 @@ def image_features(api_root, image, batch=False, auth=None, **kwargs):
:type image: numpy.ndarray
:rtype: List containing features
"""
image = image_preprocess(image)
image = image_preprocess(image, batch=batch)
return api_handler(image, api_root + "imagefeatures", batch=batch, auth=auth, **kwargs)
+1 -1
View File
@@ -22,4 +22,4 @@ def text_tags(api_root, text, batch=False, auth=None, **kwargs):
:rtype: Dictionary of class probability pairs
"""
return api_handler(text, api_root + "texttags", batch=batch, auth=None, **kwargs)
return api_handler(text, api_root + "texttags", batch=batch, auth=auth, **kwargs)
+6 -3
View File
@@ -26,8 +26,9 @@ def api_handler(arg, url, batch=False, auth=None, **kwargs):
json_data = json.dumps(data)
if batch:
url += "/batch"
if not auth:
auth = auth_query()
# if not auth:
# auth = auth_query()
print auth
response = requests.post(url, data=json_data, headers=JSON_HEADERS, auth=auth).json()
results = response.get('results', False)
if results is False:
@@ -119,11 +120,13 @@ def normalize(array, distribution=1, norm_range=(0, 1), **kwargs):
return dict(zip(keys, norm_array))
return norm_array
def image_preprocess(image):
def image_preprocess(image, batch=False):
"""
Takes an image and prepares it for sending to the api including
resizing and image data/structure standardizing.
"""
if batch:
return [image_preprocess(img, batch=False) for img in image]
if isinstance(image,list):
image = np.asarray(image)
if type(image).__module__ != np.__name__:
+1 -1
View File
@@ -8,7 +8,7 @@ except ImportError:
setup(
name="IndicoIo",
version='0.4.13',
version='0.4.14',
packages=[
"indicoio",
"indicoio.text",
+62
View File
@@ -3,11 +3,73 @@ import os
import numpy as np
import skimage.io
from nose.plugins.skip import Skip, SkipTest
from indicoio import political, sentiment, fer, facial_features, language, image_features, text_tags
from indicoio import batch_political, batch_sentiment, batch_fer, batch_facial_features
from indicoio import batch_language, batch_image_features, batch_text_tags
DIR = os.path.dirname(os.path.realpath(__file__))
class BatchAPIRun(unittest.TestCase):
def setUp(self):
self.username = os.getenv("INDICO_USERNAME")
self.password = os.getenv("INDICO_PASSWORD")
self.auth = (self.username, self.password)
if not self.username or not self.password:
raise SkipTest
def test_batch_texttags(self):
test_data = ["On Monday, president Barack Obama will be..."]
response = batch_text_tags(test_data, auth=self.auth)
self.assertTrue(isinstance(response, list))
def test_batch_posneg(self):
test_data = ['Worst song ever', 'Best song ever']
response = batch_sentiment(test_data, auth=self.auth)
self.assertTrue(isinstance(response, list))
self.assertTrue(response[0] < 0.5)
def test_batch_political(self):
test_data = ["Guns don't kill people, people kill people."]
response = batch_political(test_data, auth=self.auth)
self.assertTrue(isinstance(response, list))
def test_batch_fer(self):
test_data = [np.random.rand(48, 48).tolist()]
response = batch_fer(test_data, auth=self.auth)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], dict))
def test_batch_facial_features(self):
test_data = [np.random.rand(48, 48).tolist()]
response = batch_facial_features(test_data, auth=self.auth)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], list))
self.assertEqual(len(response[0]), 48)
def test_batch_image_features_greyscale(self):
test_data = [np.random.rand(64, 64).tolist()]
response = batch_image_features(test_data, auth=self.auth)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], list))
self.assertEqual(len(response[0]), 2048)
def test_batch_image_features_rgb(self):
test_data = [np.random.rand(64, 64, 3).tolist()]
response = batch_image_features(test_data, auth=self.auth)
self.assertTrue(isinstance(response, list))
self.assertTrue(isinstance(response[0], list))
self.assertEqual(len(response[0]), 2048)
def test_batch_language(self):
test_data = ['clearly an english sentence']
response = batch_language(test_data, auth=self.auth)
self.assertTrue(isinstance(response, list))
self.assertTrue(response[0]['English'] > 0.25)
class FullAPIRun(unittest.TestCase):