aboutsummaryrefslogtreecommitdiff
path: root/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py')
-rw-r--r--nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py69
1 files changed, 69 insertions, 0 deletions
diff --git a/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py b/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py
new file mode 100644
index 00000000000..822eb8ac074
--- /dev/null
+++ b/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py
@@ -0,0 +1,69 @@
+import pytest
+import spacy
+
+en_text = (
+ "When Sebastian Thrun started working on self-driving cars at "
+ "Google in 2007, few people outside of the company took him "
+ "seriously. “I can tell you very senior CEOs of major American "
+ "car companies would shake my hand and turn away because I wasn’t "
+ "worth talking to,” said Thrun, in an interview with Recode earlier "
+ "this week.")
+
+
+@pytest.fixture
+def en_core_web_sm():
+ return spacy.load("en_core_web_sm")
+
+
+@pytest.fixture
+def doc_en_core_web_sm(en_core_web_sm):
+ return en_core_web_sm(en_text)
+
+
+def test_entities(doc_en_core_web_sm):
+ entities = list(map(lambda e: (e.text, e.label_),
+ doc_en_core_web_sm.ents))
+
+ assert entities == [
+ ('Sebastian Thrun', 'PERSON'),
+ ('Google', 'ORG'), ('2007', 'DATE'),
+ ('American', 'NORP'),
+ ('Thrun', 'ORG'),
+ ('earlier this week', 'DATE')
+ ]
+
+
+def test_nouns(doc_en_core_web_sm):
+ assert [
+ chunk.text for chunk in doc_en_core_web_sm.noun_chunks] == [
+ 'Sebastian Thrun',
+ 'self-driving cars',
+ 'Google',
+ 'few people',
+ 'the company',
+ 'him',
+ 'I',
+ 'you',
+ 'very senior CEOs',
+ 'major American car companies',
+ 'my hand',
+ 'I',
+ 'Thrun',
+ 'an interview',
+ 'Recode']
+
+
+def test_verbs(doc_en_core_web_sm):
+ assert [
+ token.lemma_ for token in doc_en_core_web_sm if token.pos_ == "VERB"] == [
+ 'start',
+ 'work',
+ 'drive',
+ 'take',
+ 'can',
+ 'tell',
+ 'would',
+ 'shake',
+ 'turn',
+ 'talk',
+ 'say']