NER + document classification

This commit is contained in:
Madison May
2014-11-03 15:42:23 -05:00
parent 7feab56a9d
commit ea29884396
8 changed files with 82 additions and 7 deletions
+4
View File
@@ -8,6 +8,8 @@ Version, version, __version__, VERSION = ('0.4.5',) * 4
from indicoio.text.sentiment import political, posneg
from indicoio.text.sentiment import posneg as sentiment
from indicoio.text.lang import language
from indicoio.text.classification import classification
from indicoio.text.ner import named_entities
from indicoio.images.fer import fer
from indicoio.images.features import facial_features
from indicoio.images.features import image_features
@@ -20,3 +22,5 @@ language = partial(language, config.api_root)
fer = partial(fer, config.api_root)
facial_features = partial(facial_features, config.api_root)
image_features = partial(image_features, config.api_root)
classification = partial(classification, config.api_root)
named_entities = partial(named_entities, config.api_root)
+4
View File
@@ -6,6 +6,8 @@ JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'}
from indicoio.text.sentiment import political, posneg
from indicoio.text.sentiment import posneg as sentiment
from indicoio.text.lang import language
from indicoio.text.classification import classification
from indicoio.text.ner import named_entities
from indicoio.images.fer import fer
from indicoio.images.features import facial_features
from indicoio.images.features import image_features
@@ -18,3 +20,5 @@ language = partial(language, config.local_api_root)
fer = partial(fer, config.local_api_root)
facial_features = partial(facial_features, config.local_api_root)
image_features = partial(image_features, config.local_api_root)
classification = partial(classification, config.local_api_root)
named_entities = partial(named_entities, config.local_api_root)
+25
View File
@@ -0,0 +1,25 @@
from indicoio.utils import api_handler
def classification(api_root, text):
"""
Given input text, returns a probability distribution over 100 document categories
Example usage:
.. code-block:: python
>>> import indicoio
>>> import numpy as np
>>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.'
>>> possible = indicoio.classification(text)
>>> category = possible.keys()[np.argmax(possible.values())]
>>> probability = np.max(possible.values())
>>> "Predicted category '%s' with probability %.4f"%(category,probability)
u'Predicted 'Weather' with probability 0.8548'
:param text: The text to be analyzed.
:type text: str or unicode
:rtype: Dictionary of class probability pairs
"""
return api_handler(text, api_root + "documentclassification")
-3
View File
@@ -1,6 +1,3 @@
import requests
import json
from indicoio.utils import api_handler
def language(api_root, text):
+23
View File
@@ -0,0 +1,23 @@
from indicoio.utils import api_handler
def named_entities(api_root, text):
"""
Given input text, returns a mapping from named entities to
named entity categories.
Example usage:
.. code-block:: python
>>> import indicoio
>>> import numpy as np
>>> text = 'On Monday, president Barack Obama will be...'
>>> indicoio.named_entities(text)
>>> "{'Monday': 'Time', 'Barack Obama': 'Person'}"
:param text: The text to be analyzed.
:type text: str or unicode
:rtype: Dictionary of named entity, category pairs
"""
return api_handler(text, api_root + "ner")
-3
View File
@@ -1,6 +1,3 @@
import requests
import json
from indicoio import JSON_HEADERS
from indicoio.utils import api_handler
+12
View File
@@ -20,6 +20,18 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(vector.min() < minimum)
self.assertTrue(np.ptp(vector) > span)
def test_document_classification(self):
categories = set(['arts'])
text = "On Monday, president Barack Obama will be..."
results = classification(text)
self.assertTrue(categories < set(results.keys()))
def test_named_entity_recognition(self):
categories = set(['arts'])
text = "On Monday, president Barack Obama will be..."
results = named_entities(text)
self.assertTrue('named entity' in set(results.keys()))
def test_political(self):
political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
test_string = "Guns don't kill people, people kill people."
+14 -1
View File
@@ -4,7 +4,8 @@ import os
import numpy as np
import skimage.io
from indicoio import political, sentiment, fer, facial_features, language, image_features
from indicoio import political, sentiment, fer, facial_features, language, image_features, \
classification, named_entities
DIR = os.path.dirname(os.path.realpath(__file__))
@@ -22,6 +23,18 @@ class FullAPIRun(unittest.TestCase):
self.assertTrue(vector.min() < minimum)
self.assertTrue(np.ptp(vector) > span)
def test_document_classification(self):
categories = set(['arts'])
text = "On Monday, president Barack Obama will be..."
results = classification(text)
self.assertTrue(categories < set(results.keys()))
def test_named_entity_recognition(self):
categories = set(['arts'])
text = "On Monday, president Barack Obama will be..."
results = named_entities(text)
self.assertTrue('named entity' in set(results.keys()))
def test_political(self):
political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
test_string = "Guns don't kill people, people kill people."