mirror of
https://github.com/wassname/IndicoIo-python.git
synced 2026-06-27 16:10:34 +08:00
NER + document classification
This commit is contained in:
@@ -8,6 +8,8 @@ Version, version, __version__, VERSION = ('0.4.5',) * 4
|
||||
from indicoio.text.sentiment import political, posneg
|
||||
from indicoio.text.sentiment import posneg as sentiment
|
||||
from indicoio.text.lang import language
|
||||
from indicoio.text.classification import classification
|
||||
from indicoio.text.ner import named_entities
|
||||
from indicoio.images.fer import fer
|
||||
from indicoio.images.features import facial_features
|
||||
from indicoio.images.features import image_features
|
||||
@@ -20,3 +22,5 @@ language = partial(language, config.api_root)
|
||||
fer = partial(fer, config.api_root)
|
||||
facial_features = partial(facial_features, config.api_root)
|
||||
image_features = partial(image_features, config.api_root)
|
||||
classification = partial(classification, config.api_root)
|
||||
named_entities = partial(named_entities, config.api_root)
|
||||
|
||||
@@ -6,6 +6,8 @@ JSON_HEADERS = {'Content-type': 'application/json', 'Accept': 'text/plain'}
|
||||
from indicoio.text.sentiment import political, posneg
|
||||
from indicoio.text.sentiment import posneg as sentiment
|
||||
from indicoio.text.lang import language
|
||||
from indicoio.text.classification import classification
|
||||
from indicoio.text.ner import named_entities
|
||||
from indicoio.images.fer import fer
|
||||
from indicoio.images.features import facial_features
|
||||
from indicoio.images.features import image_features
|
||||
@@ -18,3 +20,5 @@ language = partial(language, config.local_api_root)
|
||||
fer = partial(fer, config.local_api_root)
|
||||
facial_features = partial(facial_features, config.local_api_root)
|
||||
image_features = partial(image_features, config.local_api_root)
|
||||
classification = partial(classification, config.local_api_root)
|
||||
named_entities = partial(named_entities, config.local_api_root)
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
from indicoio.utils import api_handler
|
||||
|
||||
def classification(api_root, text):
|
||||
"""
|
||||
Given input text, returns a probability distribution over 100 document categories
|
||||
|
||||
Example usage:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> import indicoio
|
||||
>>> import numpy as np
|
||||
>>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.'
|
||||
>>> possible = indicoio.classification(text)
|
||||
>>> category = possible.keys()[np.argmax(possible.values())]
|
||||
>>> probability = np.max(possible.values())
|
||||
>>> "Predicted category '%s' with probability %.4f"%(category,probability)
|
||||
u'Predicted 'Weather' with probability 0.8548'
|
||||
|
||||
:param text: The text to be analyzed.
|
||||
:type text: str or unicode
|
||||
:rtype: Dictionary of class probability pairs
|
||||
"""
|
||||
|
||||
return api_handler(text, api_root + "documentclassification")
|
||||
@@ -1,6 +1,3 @@
|
||||
import requests
|
||||
import json
|
||||
|
||||
from indicoio.utils import api_handler
|
||||
|
||||
def language(api_root, text):
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
from indicoio.utils import api_handler
|
||||
|
||||
def named_entities(api_root, text):
|
||||
"""
|
||||
Given input text, returns a mapping from named entities to
|
||||
named entity categories.
|
||||
|
||||
Example usage:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> import indicoio
|
||||
>>> import numpy as np
|
||||
>>> text = 'On Monday, president Barack Obama will be...'
|
||||
>>> indicoio.named_entities(text)
|
||||
>>> "{'Monday': 'Time', 'Barack Obama': 'Person'}"
|
||||
|
||||
:param text: The text to be analyzed.
|
||||
:type text: str or unicode
|
||||
:rtype: Dictionary of named entity, category pairs
|
||||
"""
|
||||
|
||||
return api_handler(text, api_root + "ner")
|
||||
@@ -1,6 +1,3 @@
|
||||
import requests
|
||||
import json
|
||||
|
||||
from indicoio import JSON_HEADERS
|
||||
from indicoio.utils import api_handler
|
||||
|
||||
|
||||
@@ -20,6 +20,18 @@ class FullAPIRun(unittest.TestCase):
|
||||
self.assertTrue(vector.min() < minimum)
|
||||
self.assertTrue(np.ptp(vector) > span)
|
||||
|
||||
def test_document_classification(self):
|
||||
categories = set(['arts'])
|
||||
text = "On Monday, president Barack Obama will be..."
|
||||
results = classification(text)
|
||||
self.assertTrue(categories < set(results.keys()))
|
||||
|
||||
def test_named_entity_recognition(self):
|
||||
categories = set(['arts'])
|
||||
text = "On Monday, president Barack Obama will be..."
|
||||
results = named_entities(text)
|
||||
self.assertTrue('named entity' in set(results.keys()))
|
||||
|
||||
def test_political(self):
|
||||
political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
|
||||
test_string = "Guns don't kill people, people kill people."
|
||||
|
||||
@@ -4,7 +4,8 @@ import os
|
||||
import numpy as np
|
||||
import skimage.io
|
||||
|
||||
from indicoio import political, sentiment, fer, facial_features, language, image_features
|
||||
from indicoio import political, sentiment, fer, facial_features, language, image_features, \
|
||||
classification, named_entities
|
||||
|
||||
DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
@@ -22,6 +23,18 @@ class FullAPIRun(unittest.TestCase):
|
||||
self.assertTrue(vector.min() < minimum)
|
||||
self.assertTrue(np.ptp(vector) > span)
|
||||
|
||||
def test_document_classification(self):
|
||||
categories = set(['arts'])
|
||||
text = "On Monday, president Barack Obama will be..."
|
||||
results = classification(text)
|
||||
self.assertTrue(categories < set(results.keys()))
|
||||
|
||||
def test_named_entity_recognition(self):
|
||||
categories = set(['arts'])
|
||||
text = "On Monday, president Barack Obama will be..."
|
||||
results = named_entities(text)
|
||||
self.assertTrue('named entity' in set(results.keys()))
|
||||
|
||||
def test_political(self):
|
||||
political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
|
||||
test_string = "Guns don't kill people, people kill people."
|
||||
|
||||
Reference in New Issue
Block a user