You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

15 lines
514 B

import pytest
@pytest.mark.parametrize("text", ["αριθ.", "τρισ.", "δισ.", "σελ."])
def test_el_tokenizer_handles_abbr(el_tokenizer, text):
tokens = el_tokenizer(text)
assert len(tokens) == 1
def test_el_tokenizer_handles_exc_in_text(el_tokenizer):
text = "Στα 14 τρισ. δολάρια το κόστος από την άνοδο της στάθμης της θάλασσας."
tokens = el_tokenizer(text)
assert len(tokens) == 14
assert tokens[2].text == "τρισ."