Merge pull request #84 from IndicoDataSolutions/Chris/ner

[ADD] NER
This commit is contained in:
Madison May
2015-07-10 02:41:59 -04:00
5 changed files with 53 additions and 3 deletions
+1
View File
@@ -14,6 +14,7 @@ from indicoio.text.sentiment import posneg as sentiment
from indicoio.text.lang import language
from indicoio.text.tagging import text_tags
from indicoio.text.keywords import keywords
from indicoio.text.ner import named_entities
from indicoio.images.fer import fer
from indicoio.images.features import facial_features
from indicoio.images.features import image_features
+2 -1
View File
@@ -51,7 +51,8 @@ TEXT_APIS = [
'sentiment',
'language',
'sentiment_hq',
'keywords'
'keywords',
'named_entities'
]
IMAGE_APIS = [
+30
View File
@@ -0,0 +1,30 @@
from indicoio.utils.api import api_handler
import indicoio.config as config
def named_entities(text, cloud=None, batch=False, api_key=None, **kwargs):
"""
Given input text, returns named entities (proper nouns) found in the text
Example usage:
.. code-block:: python
>>> text = "London Underground's boss Mike Brown warned that the strike ..."
>>> entities = indicoio.named_entities(text)
{u'London Underground': {u'categories': {u'location': 0.583755654607989,
u'organization': 0.07460487821791033,
u'person': 0.07304850776658672,
u'unknown': 0.2685909594075139},
u'confidence': 0.846188063604044},
u'Mike Brown': {u'categories': {u'location': 0.025813884950623898,
u'organization': 0.06661470013014613,
u'person': 0.08723850624560824,
u'unknown': 0.8203329086736217},
u'confidence': 0.8951793008234012}}
:param text: The text to be analyzed.
:type text: str or unicode
:rtype: Dictionary of language probability pairs
"""
url_params = {"batch": batch, "api_key": api_key}
return api_handler(text, cloud=cloud, api="namedentities", url_params=url_params, **kwargs)
+1 -1
View File
@@ -137,7 +137,7 @@ def predict_image(image, apis=IMAGE_APIS, **kwargs):
def parsed_response(api, response):
result = response.get('results', False)
if result:
if result != False:
return result
raise IndicoError(
"Sorry, the %s API returned an unexpected response.\n\t%s"
+19 -1
View File
@@ -11,6 +11,7 @@ from indicoio import batch_political, batch_sentiment, batch_fer, batch_facial_f
from indicoio import batch_language, batch_image_features, batch_text_tags
from indicoio import keywords, batch_keywords
from indicoio import sentiment_hq, batch_sentiment_hq
from indicoio import named_entities, batch_named_entities
from indicoio import predict_image, predict_text, batch_predict_image, batch_predict_text
from indicoio.utils.errors import IndicoError
@@ -89,7 +90,6 @@ class BatchAPIRun(unittest.TestCase):
test_data = ["data/unhappy.png"]
self.assertRaises(IndicoError, batch_fer, test_data, api_key=self.api_key)
def test_batch_facial_features(self):
test_data = [generate_array((48,48))]
response = batch_facial_features(test_data, api_key=self.api_key)
@@ -130,6 +130,15 @@ class BatchAPIRun(unittest.TestCase):
self.assertTrue(isinstance(response, list))
self.assertTrue(response[0]['English'] > 0.25)
def test_batch_named_entities(self):
batch = ["London Underground's boss Mike Brown warned that the strike ..."]
expected_entities = ("London Underground", "Mike Brown")
expected_keys = set(["categories", "confidence"])
entities = batch_named_entities(batch)[0]
for entity in expected_entities:
assert entity in expected_entities
assert not (set(entities[entity]) - expected_keys)
def test_batch_multi_api_image(self):
test_data = [generate_array((48,48)), generate_int_array((48,48))]
response = batch_predict_image(test_data, apis=config.IMAGE_APIS, api_key=self.api_key)
@@ -226,6 +235,15 @@ class FullAPIRun(unittest.TestCase):
for v in results.values():
assert v >= .1
def test_named_entities(self):
text = "London Underground's boss Mike Brown warned that the strike ..."
expected_entities = ("London Underground", "Mike Brown")
expected_keys = set(["categories", "confidence"])
entities = named_entities(text)
for entity in expected_entities:
assert entity in expected_entities
assert not (set(entities[entity]) - expected_keys)
def test_political(self):
political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
test_string = "Guns don't kill people, people kill people."