diff --git a/indicoio/__init__.py b/indicoio/__init__.py index 452f4bf..6ba08cf 100644 --- a/indicoio/__init__.py +++ b/indicoio/__init__.py @@ -14,6 +14,7 @@ from indicoio.text.sentiment import posneg as sentiment from indicoio.text.lang import language from indicoio.text.tagging import text_tags from indicoio.text.keywords import keywords +from indicoio.text.ner import named_entities from indicoio.images.fer import fer from indicoio.images.features import facial_features from indicoio.images.features import image_features diff --git a/indicoio/config.py b/indicoio/config.py index 756f4b0..e308319 100644 --- a/indicoio/config.py +++ b/indicoio/config.py @@ -51,7 +51,8 @@ TEXT_APIS = [ 'sentiment', 'language', 'sentiment_hq', - 'keywords' + 'keywords', + 'named_entities' ] IMAGE_APIS = [ diff --git a/indicoio/text/ner.py b/indicoio/text/ner.py new file mode 100644 index 0000000..8f2985b --- /dev/null +++ b/indicoio/text/ner.py @@ -0,0 +1,30 @@ +from indicoio.utils.api import api_handler +import indicoio.config as config + +def named_entities(text, cloud=None, batch=False, api_key=None, **kwargs): + """ + Given input text, returns named entities (proper nouns) found in the text + + Example usage: + + .. code-block:: python + + >>> text = "London Underground's boss Mike Brown warned that the strike ..." + >>> entities = indicoio.named_entities(text) + {u'London Underground': {u'categories': {u'location': 0.583755654607989, + u'organization': 0.07460487821791033, + u'person': 0.07304850776658672, + u'unknown': 0.2685909594075139}, + u'confidence': 0.846188063604044}, + u'Mike Brown': {u'categories': {u'location': 0.025813884950623898, + u'organization': 0.06661470013014613, + u'person': 0.08723850624560824, + u'unknown': 0.8203329086736217}, + u'confidence': 0.8951793008234012}} + + :param text: The text to be analyzed. + :type text: str or unicode + :rtype: Dictionary of language probability pairs + """ + url_params = {"batch": batch, "api_key": api_key} + return api_handler(text, cloud=cloud, api="namedentities", url_params=url_params, **kwargs) diff --git a/indicoio/utils/multi.py b/indicoio/utils/multi.py index 220f569..f037642 100644 --- a/indicoio/utils/multi.py +++ b/indicoio/utils/multi.py @@ -137,7 +137,7 @@ def predict_image(image, apis=IMAGE_APIS, **kwargs): def parsed_response(api, response): result = response.get('results', False) - if result: + if result != False: return result raise IndicoError( "Sorry, the %s API returned an unexpected response.\n\t%s" diff --git a/tests/test_remote.py b/tests/test_remote.py index 218f878..1a3f30b 100644 --- a/tests/test_remote.py +++ b/tests/test_remote.py @@ -11,6 +11,7 @@ from indicoio import batch_political, batch_sentiment, batch_fer, batch_facial_f from indicoio import batch_language, batch_image_features, batch_text_tags from indicoio import keywords, batch_keywords from indicoio import sentiment_hq, batch_sentiment_hq +from indicoio import named_entities, batch_named_entities from indicoio import predict_image, predict_text, batch_predict_image, batch_predict_text from indicoio.utils.errors import IndicoError @@ -89,7 +90,6 @@ class BatchAPIRun(unittest.TestCase): test_data = ["data/unhappy.png"] self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key) - def test_batch_facial_features(self): test_data = [generate_array((48,48))] response = batch_facial_features(test_data, api_key=self.api_key) @@ -130,6 +130,15 @@ class BatchAPIRun(unittest.TestCase): self.assertTrue(isinstance(response, list)) self.assertTrue(response[0]['English'] > 0.25) + def test_batch_named_entities(self): + batch = ["London Underground's boss Mike Brown warned that the strike ..."] + expected_entities = ("London Underground", "Mike Brown") + expected_keys = set(["categories", "confidence"]) + entities = batch_named_entities(batch)[0] + for entity in expected_entities: + assert entity in expected_entities + assert not (set(entities[entity]) - expected_keys) + def test_batch_multi_api_image(self): test_data = [generate_array((48,48)), generate_int_array((48,48))] response = batch_predict_image(test_data, apis=config.IMAGE_APIS, api_key=self.api_key) @@ -226,6 +235,15 @@ class FullAPIRun(unittest.TestCase): for v in results.values(): assert v >= .1 + def test_named_entities(self): + text = "London Underground's boss Mike Brown warned that the strike ..." + expected_entities = ("London Underground", "Mike Brown") + expected_keys = set(["categories", "confidence"]) + entities = named_entities(text) + for entity in expected_entities: + assert entity in expected_entities + assert not (set(entities[entity]) - expected_keys) + def test_political(self): political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green']) test_string = "Guns don't kill people, people kill people."