Added normalization, update docs. Aidan is now officially an author

This commit is contained in:
Slater-Victoroff
2014-06-05 14:22:20 -04:00
parent 97ab57fbfe
commit 8d34b05d9b
10 changed files with 114 additions and 33 deletions
+1
View File
@@ -1,2 +1,3 @@
Slater Victoroff <slater@indicodatasolutions.com>
Alec Radford <alec@indicodatasolutions.com>
Aidan McLaughlin <aidan@indicodatasolutions.com>
+1
View File
@@ -1 +1,2 @@
v0.2.8, Tue May 13 -- Added Description, Authors file, changelog. Cleaned up import paths and modified corresponding examples and tests
v0.2.9, Mon Jun 2 -- API now supports normalization, updating documentation to reflect this.
+5 -7
View File
@@ -5,12 +5,10 @@ import numpy as np
from IndicoIo import JSON_HEADERS
base_url = lambda c: "http://indico.io/api/features/%s" % c
base_url = lambda c: "http://indico.io/api/%s" % c
def facial_features(face, full_return=False):
data_dict = json.dumps({"datums": face})
response = requests.post(base_url("facial"), data=data_dict, headers=JSON_HEADERS)
def facial_features(face):
data_dict = json.dumps({"face": face})
response = requests.post(base_url("facialfeatures"), data=data_dict, headers=JSON_HEADERS)
response_dict = json.loads(response.content)
if full_return:
return response_dict
return json.loads(response_dict['feature_vector'])
return response_dict['response']
+2 -2
View File
@@ -4,9 +4,9 @@ import requests
import numpy as np
from IndicoIo import JSON_HEADERS
base_url = "http://indico.io/api/fer/classify"
base_url = "http://indico.io/api/fer"
def fer(face):
data_dict = json.dumps({"image": face})
data_dict = json.dumps({"face": face})
response = requests.post(base_url, data=data_dict, headers=JSON_HEADERS)
return json.loads(response.content)
+4 -2
View File
@@ -1,8 +1,10 @@
import requests
import json
from IndicoIo import JSON_HEADERS
base_url = lambda c: "http://indico.io/api/sentiment/%s/classify" % c
from IndicoIo import JSON_HEADERS
from IndicoIo.utils import normalize
base_url = lambda c: "http://indico.io/api/%s" % c
def political(test_text):
data_dict = json.dumps({'text': test_text})
+86
View File
@@ -0,0 +1,86 @@
import inspect
import numpy as np
class TypeCheck(object):
"""
Decorator that performs a typecheck on the input to a function
"""
def __init__(self, accepted_structures, arg_name):
"""
When initialized, include list of accepted datatypes and the
arg_name to enforce the check on. Can totally be daisy-chained.
"""
self.accepted_structures = accepted_structures
self.is_accepted = lambda x: type(x) in accepted_structures
self.arg_name = arg_name
def __call__(self, fn):
def check_args(*args, **kwargs):
arg_dict = dict(zip(inspect.getargspec(fn).args, args))
full_args = dict(arg_dict.items() + kwargs.items())
if not self.is_accepted(full_args[self.arg_name]):
raise DataStructureException(
fn,
full_args[self.arg_name],
self.accepted_structures
)
return fn(*args, **kwargs)
return check_args
class DataStructureException(Exception):
"""
If a non-accepted datastructure is passed, throws an exception
"""
def __init__(self, callback, passed_structure, accepted_structures):
self.callback = callback.__name__
self.structure = str(type(passed_structure))
self.accepted = [str(structure) for structure in accepted_structures]
def __str__(self):
return """
function %s does not accept %s, accepted types are: %s
""" % (self.callback, self.structure, str(self.accepted))
@TypeCheck((list, dict, np.ndarray), 'array')
def normalize(array, distribution=1, norm_range=(0, 1), **kwargs):
"""
First arg is an array, whether that's in the form of a numpy array,
a list, or a dictionary that contains the data in its values.
Second arg is the desired distribution which would be applied before
normalization.
Supports linear, exponential, logarithmic and raising to whatever
power specified (in which case you just put a number)
Third arg is the range across which you want the data normalized
"""
# Handling dictionary array input
# Note: lists and numpy arrays behave the same in this program
dict_array = isinstance(array, dict)
if dict_array:
keys = array.keys()
array = np.array(array.values()).astype('float')
else: # Decorator errors if this isn't a list or a numpy array
array = np.array(array).astype('float')
# Handling various distributions
if type(distribution) in [float, int]:
array = np.power(array, distribution)
else:
array = getattr(np, distribution)(array, **kwargs)
# Prep for normalization
x_max, x_min = (np.max(array), np.min(array))
def norm(element,x_min,x_max):
base_span = (element - x_min)*(norm_range[-1] - norm_range[0])
return norm_range[0] + base_span / (x_max - x_min)
norm_array = np.vectorize(norm)(array, x_min, x_max)
if dict_array:
return dict(zip(keys, norm_array))
return norm_array
+5 -5
View File
@@ -27,14 +27,14 @@ Examples
>>> from IndicoIo import political, spam, posneg, fer, facial_features
>>> political("Guns don't kill people, people kill people")
{u'Libertarian': 1.000094905588269, u'Liberal': 1.000194776694221, u'Green': 1.0000989185747784, u'Conservative': 1.000114308739228}
>>> political("Guns don't kill people. People kill people.")
{u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0}
>>> spam("Buy a new car!!")
{u'Ham': 1.0001470818000544, u'Spam': 1.0003137966593707}
>>> spam("Free car!")
{u'Ham': 0.0, u'Spam': 1.0}
>>> posneg("Would not stay in this hotel ever again.")
{u'Positive': 1.0002370406887562, u'Negative': 1.0002938352112363}
{u'Positive': 0.0, u'Negative': 1.0}
>>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()
+5 -5
View File
@@ -27,14 +27,14 @@ Examples
>>> from IndicoIo import political, spam, posneg, fer, facial_features
>>> political("Guns don't kill people, people kill people")
{u'Libertarian': 1.000094905588269, u'Liberal': 1.000194776694221, u'Green': 1.0000989185747784, u'Conservative': 1.000114308739228}
>>> political("Guns don't kill people. People kill people.")
{u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0}
>>> spam("Buy a new car!!")
{u'Ham': 1.0001470818000544, u'Spam': 1.0003137966593707}
>>> spam("Free car!")
{u'Ham': 0.0, u'Spam': 1.0}
>>> posneg("Would not stay in this hotel ever again.")
{u'Positive': 1.0002370406887562, u'Negative': 1.0002938352112363}
{u'Positive': 0.0, u'Negative': 1.0}
>>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()
+4 -3
View File
@@ -5,17 +5,18 @@ except ImportError:
setup(
name = "IndicoIo",
version = '0.2.8',
version = '0.2.10',
packages = [
"IndicoIo",
"IndicoIo.text",
"IndicoIo.images",
"IndicoIo.utils",
"tests",
],
description = "A Python Wrapper for IndicoIo. Use pre-built state of the art machine learning algorithms with a single line of code.",
license = "MIT License (See LICENSE)",
long_description = open("README").read(),
url = "https://github.com/IndicoDataSolutions/IndicoIo-python",
author = "Alec Radford, Slater Victoroff",
author_email = "Alec Radford <alec@indicodatasolutions.com>, Slater Victoroff <slater@indicodatasolutions.com>",
author = "Alec Radford, Slater Victoroff, Aidan McLaughlin",
author_email = "Alec Radford <alec@indicodatasolutions.com>, Slater Victoroff <slater@indicodatasolutions.com>, Aidan McLaughlin <aidan@indicodatasolutions.com>",
)
+1 -9
View File
@@ -43,7 +43,7 @@ class FullAPIRun(unittest.TestCase):
fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
test_face = np.linspace(0,50,56*56).reshape(56,56).tolist()
response = fer(test_face)
self.assertTrue(isinstance(response, dict))
self.assertEqual(fer_set, set(response.keys()))
@@ -54,14 +54,6 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, list))
self.assertEqual(len(response), 48)
def test_full_facial_features(self):
features_set = set(['feature_vector', 'warnings'])
test_face = np.linspace(0,50,56*56).reshape(56,56).tolist()
response = facial_features(test_face, True)
self.assertEqual(set(response.keys()), features_set)
self.assertEqual(response['warnings'], 'Using a 48x48 array will produce the best results')
if __name__ == "__main__":
unittest.main()