Merge pull request #84 from IndicoDataSolutions/Chris/ner

[ADD] NER
2026-06-27 16:10:34 +08:00 · 2015-07-10 02:41:59 -04:00
parent 1fe04a1c60 3051aaeaab
commit 024e4a0432
5 changed files with 53 additions and 3 deletions
@@ -14,6 +14,7 @@ from indicoio.text.sentiment import posneg as sentiment
 from indicoio.text.lang import language
 from indicoio.text.tagging import text_tags
 from indicoio.text.keywords import keywords
+from indicoio.text.ner import named_entities
 from indicoio.images.fer import fer
 from indicoio.images.features import facial_features
 from indicoio.images.features import image_features
@@ -51,7 +51,8 @@ TEXT_APIS = [
    'sentiment',
    'language',
    'sentiment_hq',
-    'keywords'
+    'keywords',
+    'named_entities'
 ]

 IMAGE_APIS = [
@@ -0,0 +1,30 @@
+from indicoio.utils.api import api_handler
+import indicoio.config as config
+
+def named_entities(text, cloud=None, batch=False, api_key=None, **kwargs):
+    """
+    Given input text, returns named entities (proper nouns) found in the text
+
+    Example usage:
+
+    .. code-block:: python
+
+       >>> text = "London Underground's boss Mike Brown warned that the strike ..."
+       >>> entities = indicoio.named_entities(text)
+       {u'London Underground': {u'categories': {u'location': 0.583755654607989,
+          u'organization': 0.07460487821791033,
+          u'person': 0.07304850776658672,
+          u'unknown': 0.2685909594075139},
+         u'confidence': 0.846188063604044},
+        u'Mike Brown': {u'categories': {u'location': 0.025813884950623898,
+          u'organization': 0.06661470013014613,
+          u'person': 0.08723850624560824,
+          u'unknown': 0.8203329086736217},
+         u'confidence': 0.8951793008234012}}
+
+    :param text: The text to be analyzed.
+    :type text: str or unicode
+    :rtype: Dictionary of language probability pairs
+    """
+    url_params = {"batch": batch, "api_key": api_key}
+    return api_handler(text, cloud=cloud, api="namedentities", url_params=url_params, **kwargs)
@@ -137,7 +137,7 @@ def predict_image(image, apis=IMAGE_APIS, **kwargs):

 def parsed_response(api, response):
    result = response.get('results', False)
-    if result:
+    if result != False:
        return result
    raise IndicoError(
        "Sorry, the %s API returned an unexpected response.\n\t%s"
@@ -11,6 +11,7 @@ from indicoio import batch_political, batch_sentiment, batch_fer, batch_facial_f
 from indicoio import batch_language, batch_image_features, batch_text_tags
 from indicoio import keywords, batch_keywords
 from indicoio import sentiment_hq, batch_sentiment_hq
+from indicoio import named_entities, batch_named_entities
 from indicoio import predict_image, predict_text, batch_predict_image, batch_predict_text
 from indicoio.utils.errors import IndicoError

@@ -89,7 +90,6 @@ class BatchAPIRun(unittest.TestCase):
        test_data = ["data/unhappy.png"]
        self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key)

-
    def test_batch_facial_features(self):
        test_data = [generate_array((48,48))]
        response = batch_facial_features(test_data, api_key=self.api_key)
@@ -130,6 +130,15 @@ class BatchAPIRun(unittest.TestCase):
        self.assertTrue(isinstance(response, list))
        self.assertTrue(response[0]['English'] > 0.25)

+    def test_batch_named_entities(self):
+        batch = ["London Underground's boss Mike Brown warned that the strike ..."]
+        expected_entities = ("London Underground", "Mike Brown")
+        expected_keys = set(["categories", "confidence"])
+        entities = batch_named_entities(batch)[0]
+        for entity in expected_entities:
+            assert entity in expected_entities
+            assert not (set(entities[entity]) - expected_keys)
+
    def test_batch_multi_api_image(self):
        test_data = [generate_array((48,48)), generate_int_array((48,48))]
        response = batch_predict_image(test_data, apis=config.IMAGE_APIS, api_key=self.api_key)
@@ -226,6 +235,15 @@ class FullAPIRun(unittest.TestCase):
        for v in results.values():
            assert v >= .1

+    def test_named_entities(self):
+        text = "London Underground's boss Mike Brown warned that the strike ..."
+        expected_entities = ("London Underground", "Mike Brown")
+        expected_keys = set(["categories", "confidence"])
+        entities = named_entities(text)
+        for entity in expected_entities:
+            assert entity in expected_entities
+            assert not (set(entities[entity]) - expected_keys)
+
    def test_political(self):
        political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
        test_string = "Guns don't kill people, people kill people."