diff --git a/pkgs/development/python-modules/spacy-transformers/annotation-test/annotate.py b/pkgs/development/python-modules/spacy-transformers/annotation-test/annotate.py new file mode 100644 index 00000000000..d0be2d1c335 --- /dev/null +++ b/pkgs/development/python-modules/spacy-transformers/annotation-test/annotate.py @@ -0,0 +1,68 @@ +import pytest +import spacy + +en_text = ( + "When Sebastian Thrun started working on self-driving cars at " + "Google in 2007, few people outside of the company took him " + "seriously. “I can tell you very senior CEOs of major American " + "car companies would shake my hand and turn away because I wasn’t " + "worth talking to,” said Thrun, in an interview with Recode earlier " + "this week.") + + +@pytest.fixture +def en_core_web_trf(): + return spacy.load("en_core_web_trf") + + +@pytest.fixture +def doc_en_core_web_trf(en_core_web_trf): + return en_core_web_trf(en_text) + + +def test_entities(doc_en_core_web_trf): + entities = list(map(lambda e: (e.text, e.label_), + doc_en_core_web_trf.ents)) + + assert entities == [ + ('Sebastian Thrun', 'PERSON'), + ('Google', 'ORG'), + ('2007', 'DATE'), + ('American', 'NORP'), + ('Thrun', 'PERSON'), + ('Recode', 'ORG'), + ('earlier this week', 'DATE'), + ] + + +def test_nouns(doc_en_core_web_trf): + assert [ + chunk.text for chunk in doc_en_core_web_trf.noun_chunks] == [ + 'Sebastian Thrun', + 'self-driving cars', + 'Google', + 'few people', + 'the company', + 'him', + 'I', + 'you', + 'very senior CEOs', + 'major American car companies', + 'my hand', + 'I', + 'Thrun', + 'an interview', + 'Recode'] + + +def test_verbs(doc_en_core_web_trf): + assert [ + token.lemma_ for token in doc_en_core_web_trf if token.pos_ == "VERB"] == [ + 'start', + 'take', + 'tell', + 'shake', + 'turn', + 'be', + 'talk', + 'say'] diff --git a/pkgs/development/python-modules/spacy-transformers/annotation-test/default.nix b/pkgs/development/python-modules/spacy-transformers/annotation-test/default.nix new file mode 100644 index 00000000000..11a927a0a62 --- /dev/null +++ b/pkgs/development/python-modules/spacy-transformers/annotation-test/default.nix @@ -0,0 +1,23 @@ +{ stdenv, pytest, spacy_models }: + +stdenv.mkDerivation { + name = "spacy-transformers-annotation-test"; + + src = ./.; + + dontConfigure = true; + dontBuild = true; + doCheck = true; + + checkInputs = [ pytest spacy_models.en_core_web_trf ]; + + checkPhase = '' + pytest annotate.py + ''; + + installPhase = '' + touch $out + ''; + + meta.timeout = 60; +} diff --git a/pkgs/development/python-modules/spacy-transformers/default.nix b/pkgs/development/python-modules/spacy-transformers/default.nix index 0e8669d98ff..1d02460167b 100644 --- a/pkgs/development/python-modules/spacy-transformers/default.nix +++ b/pkgs/development/python-modules/spacy-transformers/default.nix @@ -1,4 +1,5 @@ { lib +, callPackage , fetchPypi , buildPythonPackage , pytorch @@ -30,6 +31,8 @@ buildPythonPackage rec { pythonImportsCheck = [ "spacy_transformers" ]; + passthru.tests.annotation = callPackage ./annotation-test { }; + meta = with lib; { description = "spaCy pipelines for pretrained BERT, XLNet and GPT-2"; homepage = "https://github.com/explosion/spacy-transformers";