Dependency Parsing

Note

This tutorial is available as an IPython notebook here.

%%time
import malaya
CPU times: user 12.2 s, sys: 1.49 s, total: 13.7 s
Wall time: 17.3 s

List available deep learning Dependency models

malaya.dependency.available_deep_model()
['concat', 'bahdanau', 'luong', 'attention-is-all-you-need']

Describe supported dependencies

malaya.describe_dependency()
acl - clausal modifier of noun
advcl - adverbial clause modifier
advmod - adverbial modifier
amod - adjectival modifier
appos - appositional modifier
aux - auxiliary
case - case marking
ccomp - clausal complement
compound - compound
compound:plur - plural compound
conj - conjunct
cop - cop
csubj - clausal subject
dep - dependent
det - determiner
fixed - multi-word expression
flat - name
iobj - indirect object
mark - marker
nmod - nominal modifier
nsubj - nominal subject
obj - direct object
parataxis - parataxis
root - root
xcomp - open clausal complement
you can read more from https://universaldependencies.org/en/dep/xcomp.html
string = 'Dr Mahathir menasihati mereka supaya berhenti berehat dan tidur sebentar sekiranya mengantuk ketika memandu.'

Load CRF model

crf = malaya.dependency.crf()
d_object, tagging, indexing = crf.predict(string)
tagging, indexing
([('Dr', 'case'),
  ('Mahathir', 'obl'),
  ('menasihati', 'acl'),
  ('mereka', 'obj'),
  ('supaya', 'case'),
  ('berhenti', 'xcomp'),
  ('berehat', 'advcl'),
  ('dan', 'cc'),
  ('tidur', 'conj'),
  ('sebentar', 'advmod'),
  ('sekiranya', 'advmod'),
  ('mengantuk', 'UNK'),
  ('ketika', 'case'),
  ('memandu', 'xcomp')],
 [('Dr', 3),
  ('Mahathir', 6),
  ('menasihati', 4),
  ('mereka', 4),
  ('supaya', 8),
  ('berhenti', 10),
  ('berehat', 10),
  ('dan', 14),
  ('tidur', 4),
  ('sebentar', 12),
  ('sekiranya', 9),
  ('mengantuk', 1),
  ('ketika', 9),
  ('memandu', 7)])

Visualize graph for dependency output

Make sure you already installed graphvis.

d_object.to_graphvis()
_images/load-dependency_10_0.svg

Load deep learning models

for i in malaya.dependency.available_deep_model():
    print('Testing %s model'%(i))
    model = malaya.dependency.deep_model(i)
    print(model.predict(string))
    print()
Testing concat model
(<malaya._utils._parse_dependency.DependencyGraph object at 0x13f5ea9e8>, [('Dr', 'nsubj'), ('Mahathir', 'nsubj'), ('menasihati', 'root'), ('mereka', 'obj'), ('supaya', 'case'), ('berhenti', 'xcomp'), ('berehat', 'xcomp'), ('dan', 'cc'), ('tidur', 'conj'), ('sebentar', 'det'), ('sekiranya', 'mark'), ('mengantuk', 'amod'), ('ketika', 'case'), ('memandu', 'xcomp')], [('Dr', 2), ('Mahathir', 3), ('menasihati', 0), ('mereka', 3), ('supaya', 6), ('berhenti', 3), ('berehat', 6), ('dan', 9), ('tidur', 7), ('sebentar', 9), ('sekiranya', 13), ('mengantuk', 9), ('ketika', 13), ('memandu', 12)])

Testing bahdanau model
(<malaya._utils._parse_dependency.DependencyGraph object at 0x15329d518>, [('Dr', 'nsubj'), ('Mahathir', 'compound'), ('menasihati', 'root'), ('mereka', 'nsubj'), ('supaya', 'case'), ('berhenti', 'obl'), ('berehat', 'ccomp'), ('dan', 'cc'), ('tidur', 'conj'), ('sebentar', 'advmod'), ('sekiranya', 'mark'), ('mengantuk', 'amod'), ('ketika', 'case'), ('memandu', 'xcomp')], [('Dr', 2), ('Mahathir', 2), ('menasihati', 0), ('mereka', 3), ('supaya', 6), ('berhenti', 3), ('berehat', 6), ('dan', 9), ('tidur', 3), ('sebentar', 9), ('sekiranya', 10), ('mengantuk', 10), ('ketika', 13), ('memandu', 11)])

Testing luong model
(<malaya._utils._parse_dependency.DependencyGraph object at 0x13f5eae48>, [('Dr', 'nmod'), ('Mahathir', 'nsubj'), ('menasihati', 'UNK'), ('mereka', 'det'), ('supaya', 'mark'), ('berhenti', 'advcl'), ('berehat', 'fixed'), ('dan', 'cc'), ('tidur', 'conj'), ('sebentar', 'advmod'), ('sekiranya', 'mark'), ('mengantuk', 'advcl'), ('ketika', 'mark'), ('memandu', 'advcl')], [('Dr', 4), ('Mahathir', 2), ('menasihati', 0), ('mereka', 6), ('supaya', 6), ('berhenti', 4), ('berehat', 6), ('dan', 9), ('tidur', 2), ('sebentar', 9), ('sekiranya', 10), ('mengantuk', 2), ('ketika', 12), ('memandu', 11)])

Testing attention-is-all-you-need model
(<malaya._utils._parse_dependency.DependencyGraph object at 0x15c449518>, [('Dr', 'nsubj'), ('Mahathir', 'UNK'), ('menasihati', 'nsubj'), ('mereka', 'advmod'), ('supaya', 'nsubj'), ('berhenti', 'advmod'), ('berehat', 'root'), ('dan', 'mark'), ('tidur', 'nsubj'), ('sebentar', 'advmod'), ('sekiranya', 'nsubj'), ('mengantuk', 'advmod'), ('ketika', 'UNK'), ('memandu', 'advmod')], [('Dr', 3), ('Mahathir', 0), ('menasihati', 3), ('mereka', 3), ('supaya', 3), ('berhenti', 3), ('berehat', 0), ('dan', 3), ('tidur', 3), ('sebentar', 3), ('sekiranya', 3), ('mengantuk', 3), ('ketika', 0), ('memandu', 3)])

Voting stack model

concat = malaya.dependency.deep_model('concat')
bahdanau = malaya.dependency.deep_model('bahdanau')
luong = malaya.dependency.deep_model('luong')
tagging, indexing = malaya.stack.voting_stack([concat, bahdanau, luong], string)
malaya.dependency.dependency_graph(tagging, indexing).to_graphvis()
_images/load-dependency_27_0.svg

Dependency graph object

To initiate a dependency graph from dependency models, you need to call malaya.dependency.dependency_graph.

graph = malaya.dependency.dependency_graph(tagging, indexing)
graph
<malaya._utils._parse_dependency.DependencyGraph at 0x10e9cf400>

generate graphvis

graph.to_graphvis()
_images/load-dependency_31_0.svg

Get nodes

graph.nodes
defaultdict(<function malaya._utils._parse_dependency.DependencyGraph.__init__.<locals>.<lambda>()>,
            {0: {'address': 0,
              'word': None,
              'lemma': None,
              'ctag': 'TOP',
              'tag': 'TOP',
              'feats': None,
              'head': None,
              'deps': defaultdict(list, {'root': [3]}),
              'rel': None},
             1: {'address': 1,
              'word': 'Dr',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 2,
              'deps': defaultdict(list, {}),
              'rel': 'nsubj'},
             2: {'address': 2,
              'word': 'Mahathir',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 3,
              'deps': defaultdict(list, {'nsubj': [1]}),
              'rel': 'nmod'},
             3: {'address': 3,
              'word': 'menasihati',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 0,
              'deps': defaultdict(list,
                          {'nmod': [2], 'obj': [4], 'xcomp': [6]}),
              'rel': 'root'},
             4: {'address': 4,
              'word': 'mereka',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 3,
              'deps': defaultdict(list, {}),
              'rel': 'obj'},
             5: {'address': 5,
              'word': 'supaya',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 6,
              'deps': defaultdict(list, {'conj': [9]}),
              'rel': 'case'},
             6: {'address': 6,
              'word': 'berhenti',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 3,
              'deps': defaultdict(list, {'case': [5], 'ccomp': [7]}),
              'rel': 'xcomp'},
             7: {'address': 7,
              'word': 'berehat',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 6,
              'deps': defaultdict(list, {}),
              'rel': 'ccomp'},
             8: {'address': 8,
              'word': 'dan',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 9,
              'deps': defaultdict(list, {}),
              'rel': 'cc'},
             9: {'address': 9,
              'word': 'tidur',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 5,
              'deps': defaultdict(list,
                          {'cc': [8], 'det': [10], 'amod': [12]}),
              'rel': 'conj'},
             10: {'address': 10,
              'word': 'sebentar',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 9,
              'deps': defaultdict(list, {}),
              'rel': 'det'},
             11: {'address': 11,
              'word': 'sekiranya',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 12,
              'deps': defaultdict(list, {}),
              'rel': 'mark'},
             12: {'address': 12,
              'word': 'mengantuk',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 9,
              'deps': defaultdict(list, {'mark': [11, 13], 'advcl': [14]}),
              'rel': 'amod'},
             13: {'address': 13,
              'word': 'ketika',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 12,
              'deps': defaultdict(list, {}),
              'rel': 'mark'},
             14: {'address': 14,
              'word': 'memandu',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 12,
              'deps': defaultdict(list, {}),
              'rel': 'advcl'}})

Flat the graph

list(graph.triples())
[(('menasihati', '_'), 'nmod', ('Mahathir', '_')),
 (('Mahathir', '_'), 'nsubj', ('Dr', '_')),
 (('menasihati', '_'), 'obj', ('mereka', '_')),
 (('menasihati', '_'), 'xcomp', ('berhenti', '_')),
 (('berhenti', '_'), 'case', ('supaya', '_')),
 (('supaya', '_'), 'conj', ('tidur', '_')),
 (('tidur', '_'), 'cc', ('dan', '_')),
 (('tidur', '_'), 'det', ('sebentar', '_')),
 (('tidur', '_'), 'amod', ('mengantuk', '_')),
 (('mengantuk', '_'), 'mark', ('sekiranya', '_')),
 (('mengantuk', '_'), 'mark', ('ketika', '_')),
 (('mengantuk', '_'), 'advcl', ('memandu', '_')),
 (('berhenti', '_'), 'ccomp', ('berehat', '_'))]

Check the graph contains cycles

graph.contains_cycle()
False

Generate networkx

Make sure you already installed networkx, pip install networkx

digraph = graph.to_networkx()
digraph
<networkx.classes.multidigraph.MultiDiGraph at 0x122004240>
import networkx as nx
import matplotlib.pyplot as plt
nx.draw_networkx(digraph)
plt.show()
<Figure size 640x480 with 1 Axes>
digraph.edges()
OutMultiEdgeDataView([(1, 2), (2, 3), (4, 3), (5, 6), (6, 3), (7, 6), (8, 9), (9, 5), (10, 9), (11, 12), (12, 9), (13, 12), (14, 12)])
digraph.nodes()
NodeView((1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))
labels = {i:graph.get_by_address(i)['word'] for i in digraph.nodes()}
labels
{1: 'Dr',
 2: 'Mahathir',
 3: 'menasihati',
 4: 'mereka',
 5: 'supaya',
 6: 'berhenti',
 7: 'berehat',
 8: 'dan',
 9: 'tidur',
 10: 'sebentar',
 11: 'sekiranya',
 12: 'mengantuk',
 13: 'ketika',
 14: 'memandu'}
plt.figure(figsize=(15,5))
nx.draw_networkx(digraph,labels=labels)
plt.show()
_images/load-dependency_44_0.png