From d5087188fc5177a185663f3fa2386e5b0a9ae346 Mon Sep 17 00:00:00 2001 From: Slater-Victoroff Date: Fri, 1 Aug 2014 02:30:22 -0400 Subject: [PATCH] language detection added --- CHANGES.txt | 4 +++ IndicoIo/__init__.py | 4 ++- IndicoIo/images/features.py | 2 +- IndicoIo/images/fer.py | 2 +- IndicoIo/text/lang.py | 11 ++++++++ IndicoIo/text/sentiment.py | 7 +----- README | 9 +++---- README.md | 9 +++---- setup.py | 40 ++++++++++++++++++----------- tests/test_run.py | 50 ++++++++++++++++++++++++++++++------- 10 files changed, 95 insertions(+), 43 deletions(-) create mode 100644 IndicoIo/text/lang.py diff --git a/CHANGES.txt b/CHANGES.txt index e7ef582..1954ddf 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,7 @@ v0.2.8, Tue May 13 -- Added Description, Authors file, changelog. Cleaned up import paths and modified corresponding examples and tests v0.2.10, Mon Jun 2 -- API now supports normalization, updating documentation to reflect this. v0.2.11, Fri Jun 6 -- Switched sentiment api to more general version with much higher quality. updated docs to reflect this. Also changed unintuitive posneg to more intuitive Sentiment. Kept old posneg for backward compatibility. +v0.3.0, Mon Jul 21 -- Switched api over to new high-volume version, using api.indico.io instead +v0.3.1, Mon Jul 21 -- Added __version__ to module +v0.3.2, Tue Jul 22 -- Removed spam detection due to instability +v0.3.3, Fri Aug 1 -- Added language detection api \ No newline at end of file diff --git a/IndicoIo/__init__.py b/IndicoIo/__init__.py index 5977d83..1a0470c 100644 --- a/IndicoIo/__init__.py +++ b/IndicoIo/__init__.py @@ -1,6 +1,8 @@ JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'} +__version__ = '0.3.3' -from text.sentiment import political, spam, posneg +from text.sentiment import political, posneg from text.sentiment import posneg as sentiment +from text.lang import language from images.fer import fer from images.features import facial_features diff --git a/IndicoIo/images/features.py b/IndicoIo/images/features.py index f91aaca..cd5269f 100644 --- a/IndicoIo/images/features.py +++ b/IndicoIo/images/features.py @@ -5,7 +5,7 @@ import numpy as np from IndicoIo import JSON_HEADERS -base_url = lambda c: "http://indico.io/api/%s" % c +base_url = lambda c: "http://api.indico.io/%s" % c def facial_features(face): data_dict = json.dumps({"face": face}) diff --git a/IndicoIo/images/fer.py b/IndicoIo/images/fer.py index c974357..2279755 100644 --- a/IndicoIo/images/fer.py +++ b/IndicoIo/images/fer.py @@ -4,7 +4,7 @@ import requests import numpy as np from IndicoIo import JSON_HEADERS -base_url = "http://indico.io/api/fer" +base_url = "http://api.indico.io/fer" def fer(face): data_dict = json.dumps({"face": face}) diff --git a/IndicoIo/text/lang.py b/IndicoIo/text/lang.py new file mode 100644 index 0000000..3d6add4 --- /dev/null +++ b/IndicoIo/text/lang.py @@ -0,0 +1,11 @@ +import requests +import json + +from IndicoIo import JSON_HEADERS + +base_url = lambda c: "http://api.indico.io/%s" % c + +def language(test_text): + data_dict = json.dumps({'text': test_text}) + response = requests.post(base_url("language"), data=data_dict, headers=JSON_HEADERS) + return json.loads(response.content) diff --git a/IndicoIo/text/sentiment.py b/IndicoIo/text/sentiment.py index 991f156..cbed1f7 100644 --- a/IndicoIo/text/sentiment.py +++ b/IndicoIo/text/sentiment.py @@ -4,18 +4,13 @@ import json from IndicoIo import JSON_HEADERS from IndicoIo.utils import normalize -base_url = lambda c: "http://indico.io/api/%s" % c +base_url = lambda c: "http://api.indico.io/%s" % c def political(test_text): data_dict = json.dumps({'text': test_text}) response = requests.post(base_url("political"), data=data_dict, headers=JSON_HEADERS) return json.loads(response.content) -def spam(test_text): - data_dict = json.dumps({'text': test_text}) - response = requests.post(base_url("spam"), data=data_dict, headers=JSON_HEADERS) - return json.loads(response.content) - def posneg(test_text): data_dict = json.dumps({'text': test_text}) response = requests.post(base_url("sentiment"), data=data_dict, headers=JSON_HEADERS) diff --git a/README b/README index 902c0c0..0165d1d 100644 --- a/README +++ b/README @@ -15,24 +15,21 @@ Current APIs Right now this wrapper supports the following apps: - Political Sentiment Analysis -- Spam Detection - Positive/Negative Sentiment Analysis - Facial Emotion Recognition - Facial Feature Extraction +- Language Detection Examples -------- ``` >>> import numpy as np ->>> from IndicoIo import political, spam, sentiment, fer, facial_features +>>> from IndicoIo import political, sentiment, fer, facial_features, language >>> political("Guns don't kill people. People kill people.") {u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0} ->>> spam("Free car!") -{u'Ham': 0.0, u'Spam': 1.0} - >>> sentiment('Worst movie ever.') {u'Sentiment': 0.07062467665597527} @@ -47,6 +44,8 @@ Examples >>> facial_features(test_face) [0.0, -0.02568680526917187, 0.21645604230056517, -0.1519435786033145, -0.5648621854611555, 3.0607368045577226, 0.11434321880792693, -0.02163810928547493, -0.44224330594186484, 0.3024315632285246, -2.6068048934495276, 2.497798330306638, 3.040558335205844, 0.741045340525325, 0.37198135618478817, -0.33132377802172325, -0.9804190889833034, 0.5046575784709395, -0.5609132323152847, 1.679107064439151, 0.6825037853544341, -1.5977176226648016, 1.8959464303080562, -0.7812860715595836, -2.998394007543733, -0.22637273967347724, -0.9642457010679496, 1.4557274834236749, 2.412244419186633, 2.3151771738421965, 0.7881483386786367, 1.6622850935863422, 0.1304768990234367, 1.9344501393866649, 3.1271558035162914, -0.10250886439220543, 1.4921395116492966, 2.761645355670677, 1.6903473594991179, 1.009209807271491, 0.07273926986120445, -1.4941708135718021, -2.082786362439631, 1.0160924044870847, 2.5326580674673895, -0.8328208491083264, 2.0390177029762935, 3.0342637531932777] +>>> language('Clearly an english phrase') + ``` Installation diff --git a/README.md b/README.md index 902c0c0..0165d1d 100644 --- a/README.md +++ b/README.md @@ -15,24 +15,21 @@ Current APIs Right now this wrapper supports the following apps: - Political Sentiment Analysis -- Spam Detection - Positive/Negative Sentiment Analysis - Facial Emotion Recognition - Facial Feature Extraction +- Language Detection Examples -------- ``` >>> import numpy as np ->>> from IndicoIo import political, spam, sentiment, fer, facial_features +>>> from IndicoIo import political, sentiment, fer, facial_features, language >>> political("Guns don't kill people. People kill people.") {u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0} ->>> spam("Free car!") -{u'Ham': 0.0, u'Spam': 1.0} - >>> sentiment('Worst movie ever.') {u'Sentiment': 0.07062467665597527} @@ -47,6 +44,8 @@ Examples >>> facial_features(test_face) [0.0, -0.02568680526917187, 0.21645604230056517, -0.1519435786033145, -0.5648621854611555, 3.0607368045577226, 0.11434321880792693, -0.02163810928547493, -0.44224330594186484, 0.3024315632285246, -2.6068048934495276, 2.497798330306638, 3.040558335205844, 0.741045340525325, 0.37198135618478817, -0.33132377802172325, -0.9804190889833034, 0.5046575784709395, -0.5609132323152847, 1.679107064439151, 0.6825037853544341, -1.5977176226648016, 1.8959464303080562, -0.7812860715595836, -2.998394007543733, -0.22637273967347724, -0.9642457010679496, 1.4557274834236749, 2.412244419186633, 2.3151771738421965, 0.7881483386786367, 1.6622850935863422, 0.1304768990234367, 1.9344501393866649, 3.1271558035162914, -0.10250886439220543, 1.4921395116492966, 2.761645355670677, 1.6903473594991179, 1.009209807271491, 0.07273926986120445, -1.4941708135718021, -2.082786362439631, 1.0160924044870847, 2.5326580674673895, -0.8328208491083264, 2.0390177029762935, 3.0342637531932777] +>>> language('Clearly an english phrase') + ``` Installation diff --git a/setup.py b/setup.py index f5a2bfa..92a2a09 100644 --- a/setup.py +++ b/setup.py @@ -1,22 +1,32 @@ +""" +Setup for indicoio apis +""" try: from setuptools import setup except ImportError: from distutils.core import setup setup( - name = "IndicoIo", - version = '0.2.11', - packages = [ - "IndicoIo", - "IndicoIo.text", - "IndicoIo.images", - "IndicoIo.utils", - "tests", - ], - description = "A Python Wrapper for IndicoIo. Use pre-built state of the art machine learning algorithms with a single line of code.", - license = "MIT License (See LICENSE)", - long_description = open("README").read(), - url = "https://github.com/IndicoDataSolutions/IndicoIo-python", - author = "Alec Radford, Slater Victoroff, Aidan McLaughlin", - author_email = "Alec Radford , Slater Victoroff , Aidan McLaughlin ", + name="IndicoIo", + version='0.3.3', + packages=[ + "IndicoIo", + "IndicoIo.text", + "IndicoIo.images", + "IndicoIo.utils", + "tests", + ], + description=""" + A Python Wrapper for IndicoIo. + Use pre-built state of the art machine learning algorithms with a single line of code. + """, + license="MIT License (See LICENSE)", + long_description=open("README").read(), + url="https://github.com/IndicoDataSolutions/IndicoIo-python", + author="Alec Radford, Slater Victoroff, Aidan McLaughlin", + author_email=""" + Alec Radford , + Slater Victoroff , + Aidan McLaughlin + """, ) diff --git a/tests/test_run.py b/tests/test_run.py index ce7be9c..8a0b5d1 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -2,7 +2,7 @@ import unittest import numpy as np -from IndicoIo import political, spam, sentiment, fer, facial_features +from IndicoIo import political, sentiment, fer, facial_features, language class FullAPIRun(unittest.TestCase): @@ -15,14 +15,6 @@ class FullAPIRun(unittest.TestCase): self.assertTrue(isinstance(response, dict)) self.assertEqual(political_set, set(response.keys())) - def test_spam(self): - spam_set = set(['Spam', 'Ham']) - test_string = "Buy a new car!!" - response = spam(test_string) - - self.assertTrue(isinstance(response, dict)) - self.assertEqual(spam_set, set(response.keys())) - def test_posneg(self): posneg_set = set(['Sentiment']) test_string = "Worst song ever." @@ -54,6 +46,46 @@ class FullAPIRun(unittest.TestCase): self.assertTrue(isinstance(response, list)) self.assertEqual(len(response), 48) + def test_language(self): + language_set = set([ + 'English', + 'Spanish', + 'Tagalog', + 'Esperanto', + 'French', + 'Chinese', + 'French', + 'Bulgarian', + 'Latin', + 'Slovak', + 'Hebrew', + 'Russian', + 'German', + 'Japanese', + 'Korean', + 'Portuguese', + 'Italian', + 'Polish', + 'Turkish', + 'Dutch', + 'Arabic', + 'Persian (Farsi)', + 'Czech', + 'Swedish', + 'Indonesian', + 'Vietnamese', + 'Romanian', + 'Greek', + 'Danish', + 'Hungarian', + 'Thai', + 'Finnish', + 'Norwegian', + 'Lithuanian' + ]) + language_dict = language('clearly an english sentence') + self.assertEqual(language_set, set(language_dict.keys())) + if __name__ == "__main__": unittest.main()