From 8d34b05d9b780b8dc377012c59d65d490e927860 Mon Sep 17 00:00:00 2001 From: Slater-Victoroff Date: Thu, 5 Jun 2014 14:22:20 -0400 Subject: [PATCH] Added normalization, update docs. Aidan is now officially an author --- AUTHORS | 1 + CHANGES.txt | 1 + IndicoIo/images/features.py | 12 +++--- IndicoIo/images/fer.py | 4 +- IndicoIo/text/sentiment.py | 6 ++- IndicoIo/utils/__init__.py | 86 +++++++++++++++++++++++++++++++++++++ README | 10 ++--- README.md | 10 ++--- setup.py | 7 +-- tests/test_run.py | 10 +---- 10 files changed, 114 insertions(+), 33 deletions(-) create mode 100644 IndicoIo/utils/__init__.py diff --git a/AUTHORS b/AUTHORS index 4b733af..65ce50e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,2 +1,3 @@ Slater Victoroff Alec Radford +Aidan McLaughlin diff --git a/CHANGES.txt b/CHANGES.txt index 1f50ed3..2a75d3f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1 +1,2 @@ v0.2.8, Tue May 13 -- Added Description, Authors file, changelog. Cleaned up import paths and modified corresponding examples and tests +v0.2.9, Mon Jun 2 -- API now supports normalization, updating documentation to reflect this. diff --git a/IndicoIo/images/features.py b/IndicoIo/images/features.py index 0300678..f91aaca 100644 --- a/IndicoIo/images/features.py +++ b/IndicoIo/images/features.py @@ -5,12 +5,10 @@ import numpy as np from IndicoIo import JSON_HEADERS -base_url = lambda c: "http://indico.io/api/features/%s" % c +base_url = lambda c: "http://indico.io/api/%s" % c -def facial_features(face, full_return=False): - data_dict = json.dumps({"datums": face}) - response = requests.post(base_url("facial"), data=data_dict, headers=JSON_HEADERS) +def facial_features(face): + data_dict = json.dumps({"face": face}) + response = requests.post(base_url("facialfeatures"), data=data_dict, headers=JSON_HEADERS) response_dict = json.loads(response.content) - if full_return: - return response_dict - return json.loads(response_dict['feature_vector']) + return response_dict['response'] diff --git a/IndicoIo/images/fer.py b/IndicoIo/images/fer.py index 4d633d9..c974357 100644 --- a/IndicoIo/images/fer.py +++ b/IndicoIo/images/fer.py @@ -4,9 +4,9 @@ import requests import numpy as np from IndicoIo import JSON_HEADERS -base_url = "http://indico.io/api/fer/classify" +base_url = "http://indico.io/api/fer" def fer(face): - data_dict = json.dumps({"image": face}) + data_dict = json.dumps({"face": face}) response = requests.post(base_url, data=data_dict, headers=JSON_HEADERS) return json.loads(response.content) diff --git a/IndicoIo/text/sentiment.py b/IndicoIo/text/sentiment.py index c5682c4..991f156 100644 --- a/IndicoIo/text/sentiment.py +++ b/IndicoIo/text/sentiment.py @@ -1,8 +1,10 @@ import requests import json -from IndicoIo import JSON_HEADERS -base_url = lambda c: "http://indico.io/api/sentiment/%s/classify" % c +from IndicoIo import JSON_HEADERS +from IndicoIo.utils import normalize + +base_url = lambda c: "http://indico.io/api/%s" % c def political(test_text): data_dict = json.dumps({'text': test_text}) diff --git a/IndicoIo/utils/__init__.py b/IndicoIo/utils/__init__.py new file mode 100644 index 0000000..7272f87 --- /dev/null +++ b/IndicoIo/utils/__init__.py @@ -0,0 +1,86 @@ +import inspect +import numpy as np + +class TypeCheck(object): + """ + Decorator that performs a typecheck on the input to a function + """ + def __init__(self, accepted_structures, arg_name): + """ + When initialized, include list of accepted datatypes and the + arg_name to enforce the check on. Can totally be daisy-chained. + """ + self.accepted_structures = accepted_structures + self.is_accepted = lambda x: type(x) in accepted_structures + self.arg_name = arg_name + + def __call__(self, fn): + def check_args(*args, **kwargs): + arg_dict = dict(zip(inspect.getargspec(fn).args, args)) + full_args = dict(arg_dict.items() + kwargs.items()) + if not self.is_accepted(full_args[self.arg_name]): + raise DataStructureException( + fn, + full_args[self.arg_name], + self.accepted_structures + ) + return fn(*args, **kwargs) + return check_args + + +class DataStructureException(Exception): + """ + If a non-accepted datastructure is passed, throws an exception + """ + def __init__(self, callback, passed_structure, accepted_structures): + self.callback = callback.__name__ + self.structure = str(type(passed_structure)) + self.accepted = [str(structure) for structure in accepted_structures] + + def __str__(self): + return """ + function %s does not accept %s, accepted types are: %s + """ % (self.callback, self.structure, str(self.accepted)) + + +@TypeCheck((list, dict, np.ndarray), 'array') +def normalize(array, distribution=1, norm_range=(0, 1), **kwargs): + """ + First arg is an array, whether that's in the form of a numpy array, + a list, or a dictionary that contains the data in its values. + + Second arg is the desired distribution which would be applied before + normalization. + Supports linear, exponential, logarithmic and raising to whatever + power specified (in which case you just put a number) + + Third arg is the range across which you want the data normalized + """ + # Handling dictionary array input + # Note: lists and numpy arrays behave the same in this program + dict_array = isinstance(array, dict) + + if dict_array: + keys = array.keys() + array = np.array(array.values()).astype('float') + else: # Decorator errors if this isn't a list or a numpy array + array = np.array(array).astype('float') + + # Handling various distributions + if type(distribution) in [float, int]: + array = np.power(array, distribution) + else: + array = getattr(np, distribution)(array, **kwargs) + + # Prep for normalization + x_max, x_min = (np.max(array), np.min(array)) + + def norm(element,x_min,x_max): + base_span = (element - x_min)*(norm_range[-1] - norm_range[0]) + return norm_range[0] + base_span / (x_max - x_min) + + norm_array = np.vectorize(norm)(array, x_min, x_max) + + if dict_array: + return dict(zip(keys, norm_array)) + return norm_array diff --git a/README b/README index 45a3d9a..41c6290 100644 --- a/README +++ b/README @@ -27,14 +27,14 @@ Examples >>> from IndicoIo import political, spam, posneg, fer, facial_features ->>> political("Guns don't kill people, people kill people") -{u'Libertarian': 1.000094905588269, u'Liberal': 1.000194776694221, u'Green': 1.0000989185747784, u'Conservative': 1.000114308739228} +>>> political("Guns don't kill people. People kill people.") +{u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0} ->>> spam("Buy a new car!!") -{u'Ham': 1.0001470818000544, u'Spam': 1.0003137966593707} +>>> spam("Free car!") +{u'Ham': 0.0, u'Spam': 1.0} >>> posneg("Would not stay in this hotel ever again.") -{u'Positive': 1.0002370406887562, u'Negative': 1.0002938352112363} +{u'Positive': 0.0, u'Negative': 1.0} >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist() diff --git a/README.md b/README.md index 45a3d9a..41c6290 100644 --- a/README.md +++ b/README.md @@ -27,14 +27,14 @@ Examples >>> from IndicoIo import political, spam, posneg, fer, facial_features ->>> political("Guns don't kill people, people kill people") -{u'Libertarian': 1.000094905588269, u'Liberal': 1.000194776694221, u'Green': 1.0000989185747784, u'Conservative': 1.000114308739228} +>>> political("Guns don't kill people. People kill people.") +{u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0} ->>> spam("Buy a new car!!") -{u'Ham': 1.0001470818000544, u'Spam': 1.0003137966593707} +>>> spam("Free car!") +{u'Ham': 0.0, u'Spam': 1.0} >>> posneg("Would not stay in this hotel ever again.") -{u'Positive': 1.0002370406887562, u'Negative': 1.0002938352112363} +{u'Positive': 0.0, u'Negative': 1.0} >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist() diff --git a/setup.py b/setup.py index 3ab3959..771eb91 100644 --- a/setup.py +++ b/setup.py @@ -5,17 +5,18 @@ except ImportError: setup( name = "IndicoIo", - version = '0.2.8', + version = '0.2.10', packages = [ "IndicoIo", "IndicoIo.text", "IndicoIo.images", + "IndicoIo.utils", "tests", ], description = "A Python Wrapper for IndicoIo. Use pre-built state of the art machine learning algorithms with a single line of code.", license = "MIT License (See LICENSE)", long_description = open("README").read(), url = "https://github.com/IndicoDataSolutions/IndicoIo-python", - author = "Alec Radford, Slater Victoroff", - author_email = "Alec Radford , Slater Victoroff ", + author = "Alec Radford, Slater Victoroff, Aidan McLaughlin", + author_email = "Alec Radford , Slater Victoroff , Aidan McLaughlin ", ) diff --git a/tests/test_run.py b/tests/test_run.py index 5d4714c..47d624a 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -43,7 +43,7 @@ class FullAPIRun(unittest.TestCase): fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy']) test_face = np.linspace(0,50,56*56).reshape(56,56).tolist() response = fer(test_face) - + self.assertTrue(isinstance(response, dict)) self.assertEqual(fer_set, set(response.keys())) @@ -54,14 +54,6 @@ class FullAPIRun(unittest.TestCase): self.assertTrue(isinstance(response, list)) self.assertEqual(len(response), 48) - def test_full_facial_features(self): - features_set = set(['feature_vector', 'warnings']) - test_face = np.linspace(0,50,56*56).reshape(56,56).tolist() - response = facial_features(test_face, True) - - self.assertEqual(set(response.keys()), features_set) - self.assertEqual(response['warnings'], 'Using a 48x48 array will produce the best results') - if __name__ == "__main__": unittest.main()