From 8d34b05d9b780b8dc377012c59d65d490e927860 Mon Sep 17 00:00:00 2001
From: Slater-Victoroff <slater.r.victoroff@gmail.com>
Date: Thu, 5 Jun 2014 14:22:20 -0400
Subject: [PATCH] Added normalization, update docs. Aidan is now officially an
 author

---
 AUTHORS                     |  1 +
 CHANGES.txt                 |  1 +
 IndicoIo/images/features.py | 12 +++---
 IndicoIo/images/fer.py      |  4 +-
 IndicoIo/text/sentiment.py  |  6 ++-
 IndicoIo/utils/__init__.py  | 86 +++++++++++++++++++++++++++++++++++++
 README                      | 10 ++---
 README.md                   | 10 ++---
 setup.py                    |  7 +--
 tests/test_run.py           | 10 +----
 10 files changed, 114 insertions(+), 33 deletions(-)
 create mode 100644 IndicoIo/utils/__init__.py

diff --git a/AUTHORS b/AUTHORS
index 4b733af..65ce50e 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,2 +1,3 @@
 Slater Victoroff <slater@indicodatasolutions.com>
 Alec Radford <alec@indicodatasolutions.com>
+Aidan McLaughlin <aidan@indicodatasolutions.com>
diff --git a/CHANGES.txt b/CHANGES.txt
index 1f50ed3..2a75d3f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1 +1,2 @@
 v0.2.8, Tue May 13 -- Added Description, Authors file, changelog. Cleaned up import paths and modified corresponding examples and tests
+v0.2.9, Mon Jun 2 -- API now supports normalization, updating documentation to reflect this.
diff --git a/IndicoIo/images/features.py b/IndicoIo/images/features.py
index 0300678..f91aaca 100644
--- a/IndicoIo/images/features.py
+++ b/IndicoIo/images/features.py
@@ -5,12 +5,10 @@ import numpy as np
 
 from IndicoIo import JSON_HEADERS
 
-base_url = lambda c: "http://indico.io/api/features/%s" % c
+base_url = lambda c: "http://indico.io/api/%s" % c
 
-def facial_features(face, full_return=False):
-    data_dict = json.dumps({"datums": face})
-    response = requests.post(base_url("facial"), data=data_dict, headers=JSON_HEADERS)
+def facial_features(face):
+    data_dict = json.dumps({"face": face})
+    response = requests.post(base_url("facialfeatures"), data=data_dict, headers=JSON_HEADERS)
     response_dict = json.loads(response.content)
-    if full_return:
-        return response_dict
-    return json.loads(response_dict['feature_vector'])
+    return response_dict['response']
diff --git a/IndicoIo/images/fer.py b/IndicoIo/images/fer.py
index 4d633d9..c974357 100644
--- a/IndicoIo/images/fer.py
+++ b/IndicoIo/images/fer.py
@@ -4,9 +4,9 @@ import requests
 import numpy as np
 from IndicoIo import JSON_HEADERS
 
-base_url = "http://indico.io/api/fer/classify"
+base_url = "http://indico.io/api/fer"
 
 def fer(face):
-    data_dict = json.dumps({"image": face})
+    data_dict = json.dumps({"face": face})
     response = requests.post(base_url, data=data_dict, headers=JSON_HEADERS)
     return json.loads(response.content)
diff --git a/IndicoIo/text/sentiment.py b/IndicoIo/text/sentiment.py
index c5682c4..991f156 100644
--- a/IndicoIo/text/sentiment.py
+++ b/IndicoIo/text/sentiment.py
@@ -1,8 +1,10 @@
 import requests
 import json
-from IndicoIo import JSON_HEADERS
 
-base_url = lambda c: "http://indico.io/api/sentiment/%s/classify" % c
+from IndicoIo import JSON_HEADERS
+from IndicoIo.utils import normalize
+
+base_url = lambda c: "http://indico.io/api/%s" % c
 
 def political(test_text):
     data_dict = json.dumps({'text': test_text})
diff --git a/IndicoIo/utils/__init__.py b/IndicoIo/utils/__init__.py
new file mode 100644
index 0000000..7272f87
--- /dev/null
+++ b/IndicoIo/utils/__init__.py
@@ -0,0 +1,86 @@
+import inspect
+import numpy as np
+
+class TypeCheck(object):
+    """
+    Decorator that performs a typecheck on the input to a function
+    """
+    def __init__(self, accepted_structures, arg_name):
+        """
+        When initialized, include list of accepted datatypes and the
+        arg_name to enforce the check on. Can totally be daisy-chained.
+        """
+        self.accepted_structures = accepted_structures
+        self.is_accepted = lambda x: type(x) in accepted_structures
+        self.arg_name = arg_name
+
+    def __call__(self, fn):
+        def check_args(*args, **kwargs):
+            arg_dict = dict(zip(inspect.getargspec(fn).args, args))
+            full_args = dict(arg_dict.items() + kwargs.items())
+            if not self.is_accepted(full_args[self.arg_name]):
+                raise DataStructureException(
+                    fn,
+                    full_args[self.arg_name],
+                    self.accepted_structures
+                )
+            return fn(*args, **kwargs)
+        return check_args
+
+
+class DataStructureException(Exception):
+    """
+    If a non-accepted datastructure is passed, throws an exception
+    """
+    def __init__(self, callback, passed_structure, accepted_structures):
+        self.callback = callback.__name__
+        self.structure = str(type(passed_structure))
+        self.accepted = [str(structure) for structure in accepted_structures]
+
+    def __str__(self):
+        return """
+        function %s does not accept %s, accepted types are: %s
+        """ % (self.callback, self.structure, str(self.accepted))
+
+
+@TypeCheck((list, dict, np.ndarray), 'array')
+def normalize(array, distribution=1, norm_range=(0, 1), **kwargs):
+    """
+    First arg is an array, whether that's in the form of a numpy array,
+    a list, or a dictionary that contains the data in its values.
+
+    Second arg is the desired distribution which would be applied before
+    normalization.
+        Supports linear, exponential, logarithmic and raising to whatever
+        power specified (in which case you just put a number)
+
+    Third arg is the range across which you want the data normalized
+    """
+    # Handling dictionary array input
+    # Note: lists and numpy arrays behave the same in this program
+    dict_array = isinstance(array, dict)
+
+    if dict_array:
+        keys = array.keys()
+        array = np.array(array.values()).astype('float')
+    else:  # Decorator errors if this isn't a list or a numpy array
+        array = np.array(array).astype('float')
+
+    # Handling various distributions
+    if type(distribution) in [float, int]:
+        array = np.power(array, distribution)
+    else:
+        array = getattr(np, distribution)(array, **kwargs)
+
+    # Prep for normalization
+    x_max, x_min = (np.max(array), np.min(array))
+
+    def norm(element,x_min,x_max):
+        base_span = (element - x_min)*(norm_range[-1] - norm_range[0])
+        return norm_range[0] + base_span / (x_max - x_min)
+
+    norm_array = np.vectorize(norm)(array, x_min, x_max)
+
+    if dict_array:
+        return dict(zip(keys, norm_array))
+    return norm_array
diff --git a/README b/README
index 45a3d9a..41c6290 100644
--- a/README
+++ b/README
@@ -27,14 +27,14 @@ Examples
 
 >>> from IndicoIo import political, spam, posneg, fer, facial_features
 
->>> political("Guns don't kill people, people kill people")
-{u'Libertarian': 1.000094905588269, u'Liberal': 1.000194776694221, u'Green': 1.0000989185747784, u'Conservative': 1.000114308739228}
+>>> political("Guns don't kill people. People kill people.")
+{u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0}
 
->>> spam("Buy a new car!!")
-{u'Ham': 1.0001470818000544, u'Spam': 1.0003137966593707}
+>>> spam("Free car!")
+{u'Ham': 0.0, u'Spam': 1.0}
 
 >>> posneg("Would not stay in this hotel ever again.")
-{u'Positive': 1.0002370406887562, u'Negative': 1.0002938352112363}
+{u'Positive': 0.0, u'Negative': 1.0}
 
 >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()
 
diff --git a/README.md b/README.md
index 45a3d9a..41c6290 100644
--- a/README.md
+++ b/README.md
@@ -27,14 +27,14 @@ Examples
 
 >>> from IndicoIo import political, spam, posneg, fer, facial_features
 
->>> political("Guns don't kill people, people kill people")
-{u'Libertarian': 1.000094905588269, u'Liberal': 1.000194776694221, u'Green': 1.0000989185747784, u'Conservative': 1.000114308739228}
+>>> political("Guns don't kill people. People kill people.")
+{u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0}
 
->>> spam("Buy a new car!!")
-{u'Ham': 1.0001470818000544, u'Spam': 1.0003137966593707}
+>>> spam("Free car!")
+{u'Ham': 0.0, u'Spam': 1.0}
 
 >>> posneg("Would not stay in this hotel ever again.")
-{u'Positive': 1.0002370406887562, u'Negative': 1.0002938352112363}
+{u'Positive': 0.0, u'Negative': 1.0}
 
 >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()
 
diff --git a/setup.py b/setup.py
index 3ab3959..771eb91 100644
--- a/setup.py
+++ b/setup.py
@@ -5,17 +5,18 @@ except ImportError:
 
 setup(
 	name = "IndicoIo",
-	version = '0.2.8',
+	version = '0.2.10',
 	packages = [
             "IndicoIo",
             "IndicoIo.text",
             "IndicoIo.images",
+            "IndicoIo.utils",
             "tests",
         ],
         description = "A Python Wrapper for IndicoIo. Use pre-built state of the art machine learning algorithms with a single line of code.",
 	license = "MIT License (See LICENSE)",
 	long_description = open("README").read(),
 	url = "https://github.com/IndicoDataSolutions/IndicoIo-python",
-	author = "Alec Radford, Slater Victoroff",
-	author_email = "Alec Radford <alec@indicodatasolutions.com>, Slater Victoroff <slater@indicodatasolutions.com>",
+	author = "Alec Radford, Slater Victoroff, Aidan McLaughlin",
+	author_email = "Alec Radford <alec@indicodatasolutions.com>, Slater Victoroff <slater@indicodatasolutions.com>, Aidan McLaughlin <aidan@indicodatasolutions.com>",
 )
diff --git a/tests/test_run.py b/tests/test_run.py
index 5d4714c..47d624a 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -43,7 +43,7 @@ class FullAPIRun(unittest.TestCase):
         fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
         test_face = np.linspace(0,50,56*56).reshape(56,56).tolist()
         response = fer(test_face)
-        
+
         self.assertTrue(isinstance(response, dict))
         self.assertEqual(fer_set, set(response.keys()))
 
@@ -54,14 +54,6 @@ class FullAPIRun(unittest.TestCase):
         self.assertTrue(isinstance(response, list))
         self.assertEqual(len(response), 48)
 
-    def test_full_facial_features(self):
-        features_set = set(['feature_vector', 'warnings'])
-        test_face = np.linspace(0,50,56*56).reshape(56,56).tolist()
-        response = facial_features(test_face, True)
-        
-        self.assertEqual(set(response.keys()), features_set)
-        self.assertEqual(response['warnings'], 'Using a 48x48 array will produce the best results')
-
 
 if __name__ == "__main__":
     unittest.main()