1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
import pytest
import spacy
en_text = (
"When Sebastian Thrun started working on self-driving cars at "
"Google in 2007, few people outside of the company took him "
"seriously. “I can tell you very senior CEOs of major American "
"car companies would shake my hand and turn away because I wasn’t "
"worth talking to,” said Thrun, in an interview with Recode earlier "
"this week.")
@pytest.fixture
def en_core_web_sm():
return spacy.load("en_core_web_sm")
@pytest.fixture
def doc_en_core_web_sm(en_core_web_sm):
return en_core_web_sm(en_text)
def test_entities(doc_en_core_web_sm):
entities = list(map(lambda e: (e.text, e.label_),
doc_en_core_web_sm.ents))
assert entities == [
('Sebastian Thrun', 'PERSON'),
('Google', 'ORG'), ('2007', 'DATE'),
('American', 'NORP'),
('Thrun', 'ORG'),
('earlier this week', 'DATE')
]
def test_nouns(doc_en_core_web_sm):
assert [
chunk.text for chunk in doc_en_core_web_sm.noun_chunks] == [
'Sebastian Thrun',
'self-driving cars',
'Google',
'few people',
'the company',
'him',
'I',
'you',
'very senior CEOs',
'major American car companies',
'my hand',
'I',
'Thrun',
'an interview',
'Recode']
def test_verbs(doc_en_core_web_sm):
assert [
token.lemma_ for token in doc_en_core_web_sm if token.pos_ == "VERB"] == [
'start',
'work',
'drive',
'take',
'can',
'tell',
'would',
'shake',
'turn',
'talk',
'say']
|