language detection added

2026-06-27 16:10:34 +08:00 · 2014-08-01 02:30:22 -04:00
parent b5ece3d51b
commit d5087188fc
10 changed files with 95 additions and 43 deletions
@@ -1,3 +1,7 @@
 v0.2.8, Tue May 13 -- Added Description, Authors file, changelog. Cleaned up import paths and modified corresponding examples and tests
 v0.2.10, Mon Jun 2 -- API now supports normalization, updating documentation to reflect this.
 v0.2.11, Fri Jun 6 -- Switched sentiment api to more general version with much higher quality. updated docs to reflect this. Also changed unintuitive posneg to more intuitive Sentiment. Kept old posneg for backward compatibility.
+v0.3.0, Mon Jul 21 -- Switched api over to new high-volume version, using api.indico.io instead
+v0.3.1, Mon Jul 21 -- Added __version__ to module
+v0.3.2, Tue Jul 22 -- Removed spam detection due to instability
+v0.3.3, Fri Aug 1 -- Added language detection api
@@ -1,6 +1,8 @@
 JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'}
+__version__ = '0.3.3'

-from text.sentiment import political, spam, posneg
+from text.sentiment import political, posneg
 from text.sentiment import posneg as sentiment
+from text.lang import language
 from images.fer import fer
 from images.features import facial_features
@@ -5,7 +5,7 @@ import numpy as np

 from IndicoIo import JSON_HEADERS

-base_url = lambda c: "http://indico.io/api/%s" % c
+base_url = lambda c: "http://api.indico.io/%s" % c

 def facial_features(face):
    data_dict = json.dumps({"face": face})
@@ -4,7 +4,7 @@ import requests
 import numpy as np
 from IndicoIo import JSON_HEADERS

-base_url = "http://indico.io/api/fer"
+base_url = "http://api.indico.io/fer"

 def fer(face):
    data_dict = json.dumps({"face": face})
@@ -0,0 +1,11 @@
+import requests
+import json
+
+from IndicoIo import JSON_HEADERS
+
+base_url = lambda c: "http://api.indico.io/%s" % c
+
+def language(test_text):
+    data_dict = json.dumps({'text': test_text})
+    response = requests.post(base_url("language"), data=data_dict, headers=JSON_HEADERS)
+    return json.loads(response.content)
@@ -4,18 +4,13 @@ import json
 from IndicoIo import JSON_HEADERS
 from IndicoIo.utils import normalize

-base_url = lambda c: "http://indico.io/api/%s" % c
+base_url = lambda c: "http://api.indico.io/%s" % c

 def political(test_text):
    data_dict = json.dumps({'text': test_text})
    response = requests.post(base_url("political"), data=data_dict, headers=JSON_HEADERS)
    return json.loads(response.content)

-def spam(test_text):
-    data_dict = json.dumps({'text': test_text})
-    response = requests.post(base_url("spam"), data=data_dict, headers=JSON_HEADERS)
-    return json.loads(response.content)
-
 def posneg(test_text):
    data_dict = json.dumps({'text': test_text})
    response = requests.post(base_url("sentiment"), data=data_dict, headers=JSON_HEADERS)
@@ -15,24 +15,21 @@ Current APIs
 Right now this wrapper supports the following apps:

 - Political Sentiment Analysis
- Spam Detection
 - Positive/Negative Sentiment Analysis
 - Facial Emotion Recognition
 - Facial Feature Extraction
+- Language Detection

 Examples
 --------
 ```
 >>> import numpy as np

->>> from IndicoIo import political, spam, sentiment, fer, facial_features
+>>> from IndicoIo import political, sentiment, fer, facial_features, language

 >>> political("Guns don't kill people. People kill people.")
 {u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0}

->>> spam("Free car!")
-{u'Ham': 0.0, u'Spam': 1.0}
-
 >>> sentiment('Worst movie ever.')
 {u'Sentiment': 0.07062467665597527}

@@ -47,6 +44,8 @@ Examples
 >>> facial_features(test_face)
 [0.0, -0.02568680526917187, 0.21645604230056517, -0.1519435786033145, -0.5648621854611555, 3.0607368045577226, 0.11434321880792693, -0.02163810928547493, -0.44224330594186484, 0.3024315632285246, -2.6068048934495276, 2.497798330306638, 3.040558335205844, 0.741045340525325, 0.37198135618478817, -0.33132377802172325, -0.9804190889833034, 0.5046575784709395, -0.5609132323152847, 1.679107064439151, 0.6825037853544341, -1.5977176226648016, 1.8959464303080562, -0.7812860715595836, -2.998394007543733, -0.22637273967347724, -0.9642457010679496, 1.4557274834236749, 2.412244419186633, 2.3151771738421965, 0.7881483386786367, 1.6622850935863422, 0.1304768990234367, 1.9344501393866649, 3.1271558035162914, -0.10250886439220543, 1.4921395116492966, 2.761645355670677, 1.6903473594991179, 1.009209807271491, 0.07273926986120445, -1.4941708135718021, -2.082786362439631, 1.0160924044870847, 2.5326580674673895, -0.8328208491083264, 2.0390177029762935, 3.0342637531932777]

+>>> language('Clearly an english phrase')
+
 ```

 Installation
@@ -15,24 +15,21 @@ Current APIs
 Right now this wrapper supports the following apps:

 - Political Sentiment Analysis
- Spam Detection
 - Positive/Negative Sentiment Analysis
 - Facial Emotion Recognition
 - Facial Feature Extraction
+- Language Detection

 Examples
 --------
 ```
 >>> import numpy as np

->>> from IndicoIo import political, spam, sentiment, fer, facial_features
+>>> from IndicoIo import political, sentiment, fer, facial_features, language

 >>> political("Guns don't kill people. People kill people.")
 {u'Libertarian': 0.22934946808893228, u'Liberal': 0.2025395008382684, u'Green': 0.0, u'Conservative': 1.0}

->>> spam("Free car!")
-{u'Ham': 0.0, u'Spam': 1.0}
-
 >>> sentiment('Worst movie ever.')
 {u'Sentiment': 0.07062467665597527}

@@ -47,6 +44,8 @@ Examples
 >>> facial_features(test_face)
 [0.0, -0.02568680526917187, 0.21645604230056517, -0.1519435786033145, -0.5648621854611555, 3.0607368045577226, 0.11434321880792693, -0.02163810928547493, -0.44224330594186484, 0.3024315632285246, -2.6068048934495276, 2.497798330306638, 3.040558335205844, 0.741045340525325, 0.37198135618478817, -0.33132377802172325, -0.9804190889833034, 0.5046575784709395, -0.5609132323152847, 1.679107064439151, 0.6825037853544341, -1.5977176226648016, 1.8959464303080562, -0.7812860715595836, -2.998394007543733, -0.22637273967347724, -0.9642457010679496, 1.4557274834236749, 2.412244419186633, 2.3151771738421965, 0.7881483386786367, 1.6622850935863422, 0.1304768990234367, 1.9344501393866649, 3.1271558035162914, -0.10250886439220543, 1.4921395116492966, 2.761645355670677, 1.6903473594991179, 1.009209807271491, 0.07273926986120445, -1.4941708135718021, -2.082786362439631, 1.0160924044870847, 2.5326580674673895, -0.8328208491083264, 2.0390177029762935, 3.0342637531932777]

+>>> language('Clearly an english phrase')
+
 ```

 Installation
@@ -1,22 +1,32 @@
+"""
+Setup for indicoio apis
+"""
 try:
    from setuptools import setup
 except ImportError:
    from distutils.core import setup

 setup(
-	name = "IndicoIo",
-	version = '0.2.11',
-	packages = [
-            "IndicoIo",
-            "IndicoIo.text",
-            "IndicoIo.images",
-            "IndicoIo.utils",
-            "tests",
-        ],
-        description = "A Python Wrapper for IndicoIo. Use pre-built state of the art machine learning algorithms with a single line of code.",
-	license = "MIT License (See LICENSE)",
-	long_description = open("README").read(),
-	url = "https://github.com/IndicoDataSolutions/IndicoIo-python",
-	author = "Alec Radford, Slater Victoroff, Aidan McLaughlin",
-	author_email = "Alec Radford <alec@indicodatasolutions.com>, Slater Victoroff <slater@indicodatasolutions.com>, Aidan McLaughlin <aidan@indicodatasolutions.com>",
+    name="IndicoIo",
+    version='0.3.3',
+    packages=[
+        "IndicoIo",
+        "IndicoIo.text",
+        "IndicoIo.images",
+        "IndicoIo.utils",
+        "tests",
+    ],
+    description="""
+        A Python Wrapper for IndicoIo.
+        Use pre-built state of the art machine learning algorithms with a single line of code.
+    """,
+    license="MIT License (See LICENSE)",
+    long_description=open("README").read(),
+    url="https://github.com/IndicoDataSolutions/IndicoIo-python",
+    author="Alec Radford, Slater Victoroff, Aidan McLaughlin",
+    author_email="""
+        Alec Radford <alec@indicodatasolutions.com>,
+        Slater Victoroff <slater@indicodatasolutions.com>,
+        Aidan McLaughlin <aidan@indicodatasolutions.com>
+    """,
 )
@@ -2,7 +2,7 @@ import unittest

 import numpy as np

-from IndicoIo import political, spam, sentiment, fer, facial_features
+from IndicoIo import political, sentiment, fer, facial_features, language


 class FullAPIRun(unittest.TestCase):
@@ -15,14 +15,6 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, dict))
        self.assertEqual(political_set, set(response.keys()))

-    def test_spam(self):
-        spam_set = set(['Spam', 'Ham'])
-        test_string = "Buy a new car!!"
-        response = spam(test_string)
-
-        self.assertTrue(isinstance(response, dict))
-        self.assertEqual(spam_set, set(response.keys()))
-
    def test_posneg(self):
        posneg_set = set(['Sentiment'])
        test_string = "Worst song ever."
@@ -54,6 +46,46 @@ class FullAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, list))
        self.assertEqual(len(response), 48)

+    def test_language(self):
+        language_set = set([
+            'English',
+            'Spanish',
+            'Tagalog',
+            'Esperanto',
+            'French',
+            'Chinese',
+            'French',
+            'Bulgarian',
+            'Latin',
+            'Slovak',
+            'Hebrew',
+            'Russian',
+            'German',
+            'Japanese',
+            'Korean',
+            'Portuguese',
+            'Italian',
+            'Polish',
+            'Turkish',
+            'Dutch',
+            'Arabic',
+            'Persian (Farsi)',
+            'Czech',
+            'Swedish',
+            'Indonesian',
+            'Vietnamese',
+            'Romanian',
+            'Greek',
+            'Danish',
+            'Hungarian',
+            'Thai',
+            'Finnish',
+            'Norwegian',
+            'Lithuanian'
+        ])
+        language_dict = language('clearly an english sentence')
+        self.assertEqual(language_set, set(language_dict.keys()))
+

 if __name__ == "__main__":
    unittest.main()