You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.5 KiB

"""
Unit tests for Senna
"""
import unittest
from os import environ, path, sep
from nltk.classify import Senna
from nltk.tag import SennaChunkTagger, SennaNERTagger, SennaTagger
# Set Senna executable path for tests if it is not specified as an environment variable
if "SENNA" in environ:
SENNA_EXECUTABLE_PATH = path.normpath(environ["SENNA"]) + sep
else:
SENNA_EXECUTABLE_PATH = "/usr/share/senna-v3.0"
senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)
@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaPipeline(unittest.TestCase):
"""Unittest for nltk.classify.senna"""
def test_senna_pipeline(self):
"""Senna pipeline interface"""
pipeline = Senna(SENNA_EXECUTABLE_PATH, ["pos", "chk", "ner"])
sent = "Dusseldorf is an international business center".split()
result = [
(token["word"], token["chk"], token["ner"], token["pos"])
for token in pipeline.tag(sent)
]
expected = [
("Dusseldorf", "B-NP", "B-LOC", "NNP"),
("is", "B-VP", "O", "VBZ"),
("an", "B-NP", "O", "DT"),
("international", "I-NP", "O", "JJ"),
("business", "I-NP", "O", "NN"),
("center", "I-NP", "O", "NN"),
]
self.assertEqual(result, expected)
@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaTagger(unittest.TestCase):
"""Unittest for nltk.tag.senna"""
def test_senna_tagger(self):
tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
result = tagger.tag("What is the airspeed of an unladen swallow ?".split())
expected = [
("What", "WP"),
("is", "VBZ"),
("the", "DT"),
("airspeed", "NN"),
("of", "IN"),
("an", "DT"),
("unladen", "NN"),
("swallow", "NN"),
("?", "."),
]
self.assertEqual(result, expected)
def test_senna_chunk_tagger(self):
chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
result_1 = chktagger.tag("What is the airspeed of an unladen swallow ?".split())
expected_1 = [
("What", "B-NP"),
("is", "B-VP"),
("the", "B-NP"),
("airspeed", "I-NP"),
("of", "B-PP"),
("an", "B-NP"),
("unladen", "I-NP"),
("swallow", "I-NP"),
("?", "O"),
]
result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type="NP"))
expected_2 = [
("What", "0"),
("the airspeed", "2-3"),
("an unladen swallow", "5-6-7"),
]
self.assertEqual(result_1, expected_1)
self.assertEqual(result_2, expected_2)
def test_senna_ner_tagger(self):
nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
result_1 = nertagger.tag("Shakespeare theatre was in London .".split())
expected_1 = [
("Shakespeare", "B-PER"),
("theatre", "O"),
("was", "O"),
("in", "O"),
("London", "B-LOC"),
(".", "O"),
]
result_2 = nertagger.tag("UN headquarters are in NY , USA .".split())
expected_2 = [
("UN", "B-ORG"),
("headquarters", "O"),
("are", "O"),
("in", "O"),
("NY", "B-LOC"),
(",", "O"),
("USA", "B-LOC"),
(".", "O"),
]
self.assertEqual(result_1, expected_1)
self.assertEqual(result_2, expected_2)