Added normalization, update docs. Aidan is now officially an author

2026-06-27 16:10:34 +08:00 · 2014-06-05 14:22:20 -04:00
parent 97ab57fbfe
commit 8d34b05d9b
10 changed files with 114 additions and 33 deletions
@@ -1,2 +1,3 @@
 Slater Victoroff <slater@indicodatasolutions.com>
 Alec Radford <alec@indicodatasolutions.com>
+Aidan McLaughlin <aidan@indicodatasolutions.com>
@@ -1 +1,2 @@
 v0.2.8, Tue May 13 -- Added Description, Authors file, changelog. Cleaned up import paths and modified corresponding examples and tests
+v0.2.9, Mon Jun 2 -- API now supports normalization, updating documentation to reflect this.
@@ -5,12 +5,10 @@ import numpy as np

 from IndicoIo import JSON_HEADERS

-base_url = lambda c: "http://indico.io/api/features/%s" % c
+base_url = lambda c: "http://indico.io/api/%s" % c

-def facial_features(face, full_return=False):
-    data_dict = json.dumps({"datums": face})
-    response = requests.post(base_url("facial"), data=data_dict, headers=JSON_HEADERS)
+def facial_features(face):
+    data_dict = json.dumps({"face": face})
+    response = requests.post(base_url("facialfeatures"), data=data_dict, headers=JSON_HEADERS)
    response_dict = json.loads(response.content)
-    if full_return:
-        return response_dict
-    return json.loads(response_dict['feature_vector'])
+    return response_dict['response']
@@ -4,9 +4,9 @@ import requests
 import numpy as np
 from IndicoIo import JSON_HEADERS

-base_url = "http://indico.io/api/fer/classify"
+base_url = "http://indico.io/api/fer"

 def fer(face):
-    data_dict = json.dumps({"image": face})
+    data_dict = json.dumps({"face": face})
    response = requests.post(base_url, data=data_dict, headers=JSON_HEADERS)
    return json.loads(response.content)
@@ -1,8 +1,10 @@
 import requests
 import json
-from IndicoIo import JSON_HEADERS

-base_url = lambda c: "http://indico.io/api/sentiment/%s/classify" % c
+from IndicoIo import JSON_HEADERS
+from IndicoIo.utils import normalize
+
+base_url = lambda c: "http://indico.io/api/%s" % c

 def political(test_text):
    data_dict = json.dumps({'text': test_text})
@@ -0,0 +1,86 @@
+import inspect
+import numpy as np
+
+class TypeCheck(object):
+    """
+    Decorator that performs a typecheck on the input to a function
+    """
+    def __init__(self, accepted_structures, arg_name):
+        """
+        When initialized, include list of accepted datatypes and the
+        arg_name to enforce the check on. Can totally be daisy-chained.
+        """
+        self.accepted_structures = accepted_structures
+        self.is_accepted = lambda x: type(x) in accepted_structures
+        self.arg_name = arg_name
+
+    def __call__(self, fn):
+        def check_args(*args, **kwargs):
+            arg_dict = dict(zip(inspect.getargspec(fn).args, args))
+            full_args = dict(arg_dict.items() + kwargs.items())
+            if not self.is_accepted(full_args[self.arg_name]):
+                raise DataStructureException(
+                    fn,
+                    full_args[self.arg_name],
+                    self.accepted_structures
+                )
+            return fn(*args, **kwargs)
+        return check_args
+
+
+class DataStructureException(Exception):
+    """
+    If a non-accepted datastructure is passed, throws an exception
+    """
+    def __init__(self, callback, passed_structure, accepted_structures):
+        self.callback = callback.__name__
+        self.structure = str(type(passed_structure))
+        self.accepted = [str(structure) for structure in accepted_structures]
+
+    def __str__(self):
+        return """
+        function %s does not accept %s, accepted types are: %s
+        """ % (self.callback, self.structure, str(self.accepted))
+
+
+@TypeCheck((list, dict, np.ndarray), 'array')
+def normalize(array, distribution=1, norm_range=(0, 1), **kwargs):
+    """
+    First arg is an array, whether that's in the form of a numpy array,
+    a list, or a dictionary that contains the data in its values.
+
+    Second arg is the desired distribution which would be applied before
+    normalization.
+        Supports linear, exponential, logarithmic and raising to whatever
+        power specified (in which case you just put a number)
+
+    Third arg is the range across which you want the data normalized
+    """
+    # Handling dictionary array input
+    # Note: lists and numpy arrays behave the same in this program
+    dict_array = isinstance(array, dict)
+
+    if dict_array:
+        keys = array.keys()
+        array = np.array(array.values()).astype('float')
+    else:  # Decorator errors if this isn't a list or a numpy array
+        array = np.array(array).astype('float')
+
+    # Handling various distributions
+    if type(distribution) in [float, int]:
+        array = np.power(array, distribution)
+    else:
+        array = getattr(np, distribution)(array, **kwargs)
+
+    # Prep for normalization
+    x_max, x_min = (np.max(array), np.min(array))
+
+    def norm(element,x_min,x_max):
+        base_span = (element - x_min)*(norm_range[-1] - norm_range[0])
+        return norm_range[0] + base_span / (x_max - x_min)
+
+    norm_array = np.vectorize(norm)(array, x_min, x_max)
+
+    if dict_array:
+        return dict(zip(keys, norm_array))
+    return norm_array
@@ -27,14 +27,14 @@ Examples

 >>> from IndicoIo import political, spam, posneg, fer, facial_features

->>> political("Guns don't kill people, people kill people")
-{u'Libertarian': 1.000094905588269, u'Liberal': 1.000194776694221, u'Green': 1.0000989185747784, u'Conservative': 1.000114308739228}
+>>> political("Guns don't kill people. People kill people.")
+{u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0}

->>> spam("Buy a new car!!")
-{u'Ham': 1.0001470818000544, u'Spam': 1.0003137966593707}
+>>> spam("Free car!")
+{u'Ham': 0.0, u'Spam': 1.0}

 >>> posneg("Would not stay in this hotel ever again.")
-{u'Positive': 1.0002370406887562, u'Negative': 1.0002938352112363}
+{u'Positive': 0.0, u'Negative': 1.0}

 >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()

@@ -27,14 +27,14 @@ Examples

 >>> from IndicoIo import political, spam, posneg, fer, facial_features

->>> political("Guns don't kill people, people kill people")
-{u'Libertarian': 1.000094905588269, u'Liberal': 1.000194776694221, u'Green': 1.0000989185747784, u'Conservative': 1.000114308739228}
+>>> political("Guns don't kill people. People kill people.")
+{u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0}

->>> spam("Buy a new car!!")
-{u'Ham': 1.0001470818000544, u'Spam': 1.0003137966593707}
+>>> spam("Free car!")
+{u'Ham': 0.0, u'Spam': 1.0}

 >>> posneg("Would not stay in this hotel ever again.")
-{u'Positive': 1.0002370406887562, u'Negative': 1.0002938352112363}
+{u'Positive': 0.0, u'Negative': 1.0}

 >>> test_face = np.linspace(0,50,48*48).reshape(48,48).tolist()

@@ -5,17 +5,18 @@ except ImportError:

 setup(
 	name = "IndicoIo",
-	version = '0.2.8',
+	version = '0.2.10',
 	packages = [
            "IndicoIo",
            "IndicoIo.text",
            "IndicoIo.images",
+            "IndicoIo.utils",
            "tests",
        ],
        description = "A Python Wrapper for IndicoIo. Use pre-built state of the art machine learning algorithms with a single line of code.",
 	license = "MIT License (See LICENSE)",
 	long_description = open("README").read(),
 	url = "https://github.com/IndicoDataSolutions/IndicoIo-python",
-	author = "Alec Radford, Slater Victoroff",
-	author_email = "Alec Radford <alec@indicodatasolutions.com>, Slater Victoroff <slater@indicodatasolutions.com>",
+	author = "Alec Radford, Slater Victoroff, Aidan McLaughlin",
+	author_email = "Alec Radford <alec@indicodatasolutions.com>, Slater Victoroff <slater@indicodatasolutions.com>, Aidan McLaughlin <aidan@indicodatasolutions.com>",
 )
@@ -43,7 +43,7 @@ class FullAPIRun(unittest.TestCase):
        fer_set = set(['Angry', 'Sad', 'Neutral', 'Surprise', 'Fear', 'Happy'])
        test_face = np.linspace(0,50,56*56).reshape(56,56).tolist()
        response = fer(test_face)
-        
+
        self.assertTrue(isinstance(response, dict))
        self.assertEqual(fer_set, set(response.keys()))

@@ -54,14 +54,6 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, list))
        self.assertEqual(len(response), 48)

-    def test_full_facial_features(self):
-        features_set = set(['feature_vector', 'warnings'])
-        test_face = np.linspace(0,50,56*56).reshape(56,56).tolist()
-        response = facial_features(test_face, True)
-        
-        self.assertEqual(set(response.keys()), features_set)
-        self.assertEqual(response['warnings'], 'Using a 48x48 array will produce the best results')
-

 if __name__ == "__main__":
    unittest.main()