Dependency Parsing#

This tutorial is available as an IPython notebook at Malaya/example/dependency.

This module only trained on standard language structure, so it is not save to use it for local language structure.

[1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''
[2]:
%%time
import malaya
/home/husein/.local/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
  warn("The installed version of bitsandbytes was compiled without GPU support. "
/home/husein/.local/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32
CPU times: user 2.85 s, sys: 3.65 s, total: 6.5 s
Wall time: 2.11 s
/home/husein/dev/malaya/malaya/tokenizer.py:214: FutureWarning: Possible nested set at position 3397
  self.tok = re.compile(r'({})'.format('|'.join(pipeline)))
/home/husein/dev/malaya/malaya/tokenizer.py:214: FutureWarning: Possible nested set at position 3927
  self.tok = re.compile(r'({})'.format('|'.join(pipeline)))

Describe supported dependencies#

[3]:
malaya.dependency.describe
[3]:
[{'Tag': 'acl', 'Description': 'clausal modifier of noun'},
 {'Tag': 'advcl', 'Description': 'adverbial clause modifier'},
 {'Tag': 'advmod', 'Description': 'adverbial modifier'},
 {'Tag': 'amod', 'Description': 'adjectival modifier'},
 {'Tag': 'appos', 'Description': 'appositional modifier'},
 {'Tag': 'aux', 'Description': 'auxiliary'},
 {'Tag': 'case', 'Description': 'case marking'},
 {'Tag': 'ccomp', 'Description': 'clausal complement'},
 {'Tag': 'compound', 'Description': 'compound'},
 {'Tag': 'compound:plur', 'Description': 'plural compound'},
 {'Tag': 'conj', 'Description': 'conjunct'},
 {'Tag': 'cop', 'Description': 'cop'},
 {'Tag': 'csubj', 'Description': 'clausal subject'},
 {'Tag': 'dep', 'Description': 'dependent'},
 {'Tag': 'det', 'Description': 'determiner'},
 {'Tag': 'fixed', 'Description': 'multi-word expression'},
 {'Tag': 'flat', 'Description': 'name'},
 {'Tag': 'iobj', 'Description': 'indirect object'},
 {'Tag': 'mark', 'Description': 'marker'},
 {'Tag': 'nmod', 'Description': 'nominal modifier'},
 {'Tag': 'nsubj', 'Description': 'nominal subject'},
 {'Tag': 'obj', 'Description': 'direct object'},
 {'Tag': 'parataxis', 'Description': 'parataxis'},
 {'Tag': 'root', 'Description': 'root'},
 {'Tag': 'xcomp', 'Description': 'open clausal complement'}]

List available HuggingFace Dependency models#

[4]:
malaya.dependency.available_huggingface
[4]:
{'mesolitica/finetune-dependency-t5-tiny-standard-bahasa-cased': {'Size (MB)': 143,
  'Arc Accuracy': 0.8506069089930276,
  'Types Accuracy': 0.7831641780774206,
  'Root Accuracy': 0.8723021582733813},
 'mesolitica/finetune-dependency-t5-small-standard-bahasa-cased': {'Size (MB)': 247,
  'Arc Accuracy': 0.8494045261191319,
  'Types Accuracy': 0.783103051811978,
  'Root Accuracy': 0.8669064748201439},
 'mesolitica/finetune-dependency-t5-base-standard-bahasa-cased': {'Size (MB)': 898,
  'Arc Accuracy': 0.8528921010932324,
  'Types Accuracy': 0.7840908663367674,
  'Root Accuracy': 0.8597122302158273}}

Load HuggingFace dependency model#

def huggingface(
    model: str = 'mesolitica/finetune-dependency-t5-small-standard-bahasa-cased',
    force_check: bool = True,
    **kwargs,
):
    """
    Load HuggingFace model to dependency parsing.

    Parameters
    ----------
    model: str, optional (default='mesolitica/finetune-dependency-t5-small-standard-bahasa-cased')
        Check available models at `malaya.dependency.available_huggingface()`.
    force_check: bool, optional (default=True)
        Force check model one of malaya model.
        Set to False if you have your own huggingface model.

    Returns
    -------
    result: malaya.torch_model.huggingface.Dependency
    """
[5]:
model = malaya.dependency.huggingface()
`malaya.dependency.huggingface` trained on indonesian dataset and augmented dataset, not an actual malay dataset.
Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`,  it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565

Predict#

def predict(
    self,
    string: str,
    validate_tree: bool = False,
    f_tree: Callable = eisner,
):
    """
    Tag a string. We assumed the string input been properly tokenized.

    Parameters
    ----------
    string: str
    validate_tree: bool, optional (default=False)
        validate arcs is a valid tree using `malaya.parser.conll.CoNLL.istree`.
        Originally from https://github.com/Unipisa/diaparser
    f_tree: Callable, optional (default=malaya.parser.alg.eisner)
        if arcs is not a tree, use approximate function to fix arcs.
        Originally from https://github.com/Unipisa/diaparser

    Returns
    -------
    result: Tuple
    """
[6]:
string = 'Dr Mahathir menasihati mereka supaya berhenti berehat dan tidur sebentar sekiranya mengantuk ketika memandu.'
[7]:
d_object, tagging, indexing = model.predict(string)
d_object.to_graphvis()
You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
To disable this warning, you can either:
        - Avoid using `tokenizers` before the fork if possible
        - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[7]:
_images/load-dependency_13_1.svg

Harder example#

[8]:
# https://www.astroawani.com/berita-malaysia/terbaik-tun-kita-geng-najib-razak-puji-tun-m-297884

s = """
KUALA LUMPUR: Dalam hal politik, jarang sekali untuk melihat dua figura ini - bekas Perdana Menteri, Datuk Seri Najib Razak dan Tun Dr Mahathir Mohamad mempunyai 'pandangan yang sama' atau sekapal. Namun, situasi itu berbeza apabila melibatkan isu ketidakpatuhan terhadap prosedur operasi standard (SOP). Najib, yang juga Ahli Parlimen Pekan memuji sikap Ahli Parlimen Langkawi itu yang mengaku bersalah selepas melanggar SOP kerana tidak mengambil suhu badan ketika masuk ke sebuah surau di Langkawi pada Sabtu lalu.
"""
[9]:
d_object, tagging, indexing = model.predict(s)
d_object.to_graphvis()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
To disable this warning, you can either:
        - Avoid using `tokenizers` before the fork if possible
        - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[9]:
_images/load-dependency_16_1.svg

Dependency graph object#

To initiate a dependency graph from dependency models, you need to call malaya.dependency.dependency_graph.

[10]:
graph = malaya.dependency.dependency_graph(tagging, indexing)
graph
[10]:
<malaya.function.parse_dependency.DependencyGraph at 0x7f3680991be0>

generate graphvis#

[11]:
graph.to_graphvis()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
To disable this warning, you can either:
        - Avoid using `tokenizers` before the fork if possible
        - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
[11]:
_images/load-dependency_20_1.svg

Get nodes#

[12]:
graph.nodes
[12]:
defaultdict(<function malaya.function.parse_dependency.DependencyGraph.__init__.<locals>.<lambda>()>,
            {0: {'address': 0,
              'word': None,
              'lemma': None,
              'ctag': 'TOP',
              'tag': 'TOP',
              'feats': None,
              'head': None,
              'deps': defaultdict(list, {'root': [26]}),
              'rel': None},
             1: {'address': 1,
              'word': 'KUALA',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 26,
              'deps': defaultdict(list, {'flat': [2], 'nmod': [4]}),
              'rel': 'nsubj'},
             26: {'address': 26,
              'word': 'mempunyai',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 0,
              'deps': defaultdict(list,
                          {'nsubj': [1], 'xcomp': [9], 'obj': [27]}),
              'rel': 'root'},
             2: {'address': 2,
              'word': 'LUMPUR:',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 1,
              'deps': defaultdict(list, {}),
              'rel': 'flat'},
             3: {'address': 3,
              'word': 'Dalam',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 4,
              'deps': defaultdict(list, {}),
              'rel': 'case'},
             4: {'address': 4,
              'word': 'hal',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 1,
              'deps': defaultdict(list, {'case': [3], 'compound': [5]}),
              'rel': 'nmod'},
             5: {'address': 5,
              'word': 'politik,',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 4,
              'deps': defaultdict(list, {}),
              'rel': 'compound'},
             6: {'address': 6,
              'word': 'jarang',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 9,
              'deps': defaultdict(list, {'advmod': [7]}),
              'rel': 'advmod'},
             9: {'address': 9,
              'word': 'melihat',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 26,
              'deps': defaultdict(list,
                          {'advmod': [6], 'case': [8], 'obj': [11]}),
              'rel': 'xcomp'},
             7: {'address': 7,
              'word': 'sekali',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 6,
              'deps': defaultdict(list, {}),
              'rel': 'advmod'},
             8: {'address': 8,
              'word': 'untuk',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 9,
              'deps': defaultdict(list, {}),
              'rel': 'case'},
             10: {'address': 10,
              'word': 'dua',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 11,
              'deps': defaultdict(list, {}),
              'rel': 'nummod'},
             11: {'address': 11,
              'word': 'figura',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 9,
              'deps': defaultdict(list,
                          {'nummod': [10],
                           'det': [12],
                           'punct': [13],
                           'compound': [14]}),
              'rel': 'obj'},
             12: {'address': 12,
              'word': 'ini',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 11,
              'deps': defaultdict(list, {}),
              'rel': 'det'},
             13: {'address': 13,
              'word': '-',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 11,
              'deps': defaultdict(list, {}),
              'rel': 'punct'},
             14: {'address': 14,
              'word': 'bekas',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 11,
              'deps': defaultdict(list, {'flat': [15]}),
              'rel': 'compound'},
             15: {'address': 15,
              'word': 'Perdana',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 14,
              'deps': defaultdict(list, {'flat': [16], 'conj': [22]}),
              'rel': 'flat'},
             16: {'address': 16,
              'word': 'Menteri,',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 15,
              'deps': defaultdict(list, {'flat': [17]}),
              'rel': 'flat'},
             17: {'address': 17,
              'word': 'Datuk',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 16,
              'deps': defaultdict(list, {'flat': [18]}),
              'rel': 'flat'},
             18: {'address': 18,
              'word': 'Seri',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 17,
              'deps': defaultdict(list, {'flat': [19]}),
              'rel': 'flat'},
             19: {'address': 19,
              'word': 'Najib',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 18,
              'deps': defaultdict(list, {'flat': [20]}),
              'rel': 'flat'},
             20: {'address': 20,
              'word': 'Razak',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 19,
              'deps': defaultdict(list, {}),
              'rel': 'flat'},
             21: {'address': 21,
              'word': 'dan',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 22,
              'deps': defaultdict(list, {}),
              'rel': 'cc'},
             22: {'address': 22,
              'word': 'Tun',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 15,
              'deps': defaultdict(list, {'cc': [21], 'flat': [23]}),
              'rel': 'conj'},
             23: {'address': 23,
              'word': 'Dr',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 22,
              'deps': defaultdict(list, {'flat': [24]}),
              'rel': 'flat'},
             24: {'address': 24,
              'word': 'Mahathir',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 23,
              'deps': defaultdict(list, {'flat': [25]}),
              'rel': 'flat'},
             25: {'address': 25,
              'word': 'Mohamad',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 24,
              'deps': defaultdict(list, {}),
              'rel': 'flat'},
             27: {'address': 27,
              'word': "'pandangan",
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 26,
              'deps': defaultdict(list, {'acl': [35]}),
              'rel': 'obj'},
             28: {'address': 28,
              'word': 'yang',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 35,
              'deps': defaultdict(list, {}),
              'rel': 'nsubj'},
             35: {'address': 35,
              'word': 'berbeza',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 27,
              'deps': defaultdict(list,
                          {'nsubj': [28, 33],
                           'punct': [29],
                           'mark': [32],
                           'advcl': [37]}),
              'rel': 'acl'},
             29: {'address': 29,
              'word': "sama'",
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 35,
              'deps': defaultdict(list, {'conj': [31]}),
              'rel': 'punct'},
             30: {'address': 30,
              'word': 'atau',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 31,
              'deps': defaultdict(list, {}),
              'rel': 'cc'},
             31: {'address': 31,
              'word': 'sekapal.',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 29,
              'deps': defaultdict(list, {'cc': [30]}),
              'rel': 'conj'},
             32: {'address': 32,
              'word': 'Namun,',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 35,
              'deps': defaultdict(list, {}),
              'rel': 'mark'},
             33: {'address': 33,
              'word': 'situasi',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 35,
              'deps': defaultdict(list, {'det': [34]}),
              'rel': 'nsubj'},
             34: {'address': 34,
              'word': 'itu',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 33,
              'deps': defaultdict(list, {}),
              'rel': 'det'},
             36: {'address': 36,
              'word': 'apabila',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 37,
              'deps': defaultdict(list, {}),
              'rel': 'mark'},
             37: {'address': 37,
              'word': 'melibatkan',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 35,
              'deps': defaultdict(list, {'mark': [36], 'obj': [38]}),
              'rel': 'advcl'},
             38: {'address': 38,
              'word': 'isu',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 37,
              'deps': defaultdict(list, {'compound': [39], 'nmod': [41]}),
              'rel': 'obj'},
             39: {'address': 39,
              'word': 'ketidakpatuhan',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 38,
              'deps': defaultdict(list, {}),
              'rel': 'compound'},
             40: {'address': 40,
              'word': 'terhadap',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 41,
              'deps': defaultdict(list, {}),
              'rel': 'case'},
             41: {'address': 41,
              'word': 'prosedur',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 38,
              'deps': defaultdict(list,
                          {'case': [40], 'compound': [42], 'acl': [51]}),
              'rel': 'nmod'},
             42: {'address': 42,
              'word': 'operasi',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 41,
              'deps': defaultdict(list, {'flat': [43]}),
              'rel': 'compound'},
             43: {'address': 43,
              'word': 'standard',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 42,
              'deps': defaultdict(list, {'flat': [44]}),
              'rel': 'flat'},
             44: {'address': 44,
              'word': '(SOP).',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 43,
              'deps': defaultdict(list, {'flat': [45]}),
              'rel': 'flat'},
             45: {'address': 45,
              'word': 'Najib,',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 44,
              'deps': defaultdict(list, {}),
              'rel': 'flat'},
             46: {'address': 46,
              'word': 'yang',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 51,
              'deps': defaultdict(list, {}),
              'rel': 'obj'},
             51: {'address': 51,
              'word': 'memuji',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 41,
              'deps': defaultdict(list, {'obj': [46, 52], 'nsubj': [48]}),
              'rel': 'acl'},
             47: {'address': 47,
              'word': 'juga',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 48,
              'deps': defaultdict(list, {}),
              'rel': 'advmod'},
             48: {'address': 48,
              'word': 'Ahli',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 51,
              'deps': defaultdict(list, {'advmod': [47], 'flat': [49]}),
              'rel': 'nsubj'},
             49: {'address': 49,
              'word': 'Parlimen',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 48,
              'deps': defaultdict(list, {'flat': [50]}),
              'rel': 'flat'},
             50: {'address': 50,
              'word': 'Pekan',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 49,
              'deps': defaultdict(list, {}),
              'rel': 'flat'},
             52: {'address': 52,
              'word': 'sikap',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 51,
              'deps': defaultdict(list, {'flat': [53], 'acl': [58]}),
              'rel': 'obj'},
             53: {'address': 53,
              'word': 'Ahli',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 52,
              'deps': defaultdict(list, {'flat': [54]}),
              'rel': 'flat'},
             54: {'address': 54,
              'word': 'Parlimen',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 53,
              'deps': defaultdict(list, {'flat': [55]}),
              'rel': 'flat'},
             55: {'address': 55,
              'word': 'Langkawi',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 54,
              'deps': defaultdict(list, {'det': [56]}),
              'rel': 'flat'},
             56: {'address': 56,
              'word': 'itu',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 55,
              'deps': defaultdict(list, {}),
              'rel': 'det'},
             57: {'address': 57,
              'word': 'yang',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 58,
              'deps': defaultdict(list, {}),
              'rel': 'nsubj'},
             58: {'address': 58,
              'word': 'mengaku',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 52,
              'deps': defaultdict(list, {'nsubj': [57], 'xcomp': [59]}),
              'rel': 'acl'},
             59: {'address': 59,
              'word': 'bersalah',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 58,
              'deps': defaultdict(list, {'xcomp': [61]}),
              'rel': 'xcomp'},
             60: {'address': 60,
              'word': 'selepas',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 61,
              'deps': defaultdict(list, {}),
              'rel': 'det'},
             61: {'address': 61,
              'word': 'melanggar',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 59,
              'deps': defaultdict(list,
                          {'det': [60], 'obj': [62], 'advcl': [65]}),
              'rel': 'xcomp'},
             62: {'address': 62,
              'word': 'SOP',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 61,
              'deps': defaultdict(list, {}),
              'rel': 'obj'},
             63: {'address': 63,
              'word': 'kerana',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 65,
              'deps': defaultdict(list, {}),
              'rel': 'mark'},
             65: {'address': 65,
              'word': 'mengambil',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 61,
              'deps': defaultdict(list,
                          {'mark': [63],
                           'advmod': [64],
                           'obj': [66],
                           'advcl': [69]}),
              'rel': 'advcl'},
             64: {'address': 64,
              'word': 'tidak',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 65,
              'deps': defaultdict(list, {}),
              'rel': 'advmod'},
             66: {'address': 66,
              'word': 'suhu',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 65,
              'deps': defaultdict(list, {'compound': [67]}),
              'rel': 'obj'},
             67: {'address': 67,
              'word': 'badan',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 66,
              'deps': defaultdict(list, {}),
              'rel': 'compound'},
             68: {'address': 68,
              'word': 'ketika',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 69,
              'deps': defaultdict(list, {}),
              'rel': 'mark'},
             69: {'address': 69,
              'word': 'masuk',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 65,
              'deps': defaultdict(list, {'mark': [68], 'obl': [72, 74, 76]}),
              'rel': 'advcl'},
             70: {'address': 70,
              'word': 'ke',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 72,
              'deps': defaultdict(list, {}),
              'rel': 'case'},
             72: {'address': 72,
              'word': 'surau',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 69,
              'deps': defaultdict(list, {'case': [70], 'det': [71]}),
              'rel': 'obl'},
             71: {'address': 71,
              'word': 'sebuah',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 72,
              'deps': defaultdict(list, {}),
              'rel': 'det'},
             73: {'address': 73,
              'word': 'di',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 74,
              'deps': defaultdict(list, {}),
              'rel': 'case'},
             74: {'address': 74,
              'word': 'Langkawi',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 69,
              'deps': defaultdict(list, {'case': [73]}),
              'rel': 'obl'},
             75: {'address': 75,
              'word': 'pada',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 76,
              'deps': defaultdict(list, {}),
              'rel': 'case'},
             76: {'address': 76,
              'word': 'Sabtu',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 69,
              'deps': defaultdict(list, {'case': [75], 'punct': [77]}),
              'rel': 'obl'},
             77: {'address': 77,
              'word': 'lalu.',
              'lemma': '_',
              'ctag': '_',
              'tag': '_',
              'feats': '_',
              'head': 76,
              'deps': defaultdict(list, {}),
              'rel': 'punct'}})

Flat the graph#

[13]:
list(graph.triples())
[13]:
[(('mempunyai', '_'), 'nsubj', ('KUALA', '_')),
 (('KUALA', '_'), 'flat', ('LUMPUR:', '_')),
 (('KUALA', '_'), 'nmod', ('hal', '_')),
 (('hal', '_'), 'case', ('Dalam', '_')),
 (('hal', '_'), 'compound', ('politik,', '_')),
 (('mempunyai', '_'), 'xcomp', ('melihat', '_')),
 (('melihat', '_'), 'advmod', ('jarang', '_')),
 (('jarang', '_'), 'advmod', ('sekali', '_')),
 (('melihat', '_'), 'case', ('untuk', '_')),
 (('melihat', '_'), 'obj', ('figura', '_')),
 (('figura', '_'), 'nummod', ('dua', '_')),
 (('figura', '_'), 'det', ('ini', '_')),
 (('figura', '_'), 'punct', ('-', '_')),
 (('figura', '_'), 'compound', ('bekas', '_')),
 (('bekas', '_'), 'flat', ('Perdana', '_')),
 (('Perdana', '_'), 'flat', ('Menteri,', '_')),
 (('Menteri,', '_'), 'flat', ('Datuk', '_')),
 (('Datuk', '_'), 'flat', ('Seri', '_')),
 (('Seri', '_'), 'flat', ('Najib', '_')),
 (('Najib', '_'), 'flat', ('Razak', '_')),
 (('Perdana', '_'), 'conj', ('Tun', '_')),
 (('Tun', '_'), 'cc', ('dan', '_')),
 (('Tun', '_'), 'flat', ('Dr', '_')),
 (('Dr', '_'), 'flat', ('Mahathir', '_')),
 (('Mahathir', '_'), 'flat', ('Mohamad', '_')),
 (('mempunyai', '_'), 'obj', ("'pandangan", '_')),
 (("'pandangan", '_'), 'acl', ('berbeza', '_')),
 (('berbeza', '_'), 'nsubj', ('yang', '_')),
 (('berbeza', '_'), 'punct', ("sama'", '_')),
 (("sama'", '_'), 'conj', ('sekapal.', '_')),
 (('sekapal.', '_'), 'cc', ('atau', '_')),
 (('berbeza', '_'), 'mark', ('Namun,', '_')),
 (('berbeza', '_'), 'nsubj', ('situasi', '_')),
 (('situasi', '_'), 'det', ('itu', '_')),
 (('berbeza', '_'), 'advcl', ('melibatkan', '_')),
 (('melibatkan', '_'), 'mark', ('apabila', '_')),
 (('melibatkan', '_'), 'obj', ('isu', '_')),
 (('isu', '_'), 'compound', ('ketidakpatuhan', '_')),
 (('isu', '_'), 'nmod', ('prosedur', '_')),
 (('prosedur', '_'), 'case', ('terhadap', '_')),
 (('prosedur', '_'), 'compound', ('operasi', '_')),
 (('operasi', '_'), 'flat', ('standard', '_')),
 (('standard', '_'), 'flat', ('(SOP).', '_')),
 (('(SOP).', '_'), 'flat', ('Najib,', '_')),
 (('prosedur', '_'), 'acl', ('memuji', '_')),
 (('memuji', '_'), 'obj', ('yang', '_')),
 (('memuji', '_'), 'nsubj', ('Ahli', '_')),
 (('Ahli', '_'), 'advmod', ('juga', '_')),
 (('Ahli', '_'), 'flat', ('Parlimen', '_')),
 (('Parlimen', '_'), 'flat', ('Pekan', '_')),
 (('memuji', '_'), 'obj', ('sikap', '_')),
 (('sikap', '_'), 'flat', ('Ahli', '_')),
 (('Ahli', '_'), 'flat', ('Parlimen', '_')),
 (('Parlimen', '_'), 'flat', ('Langkawi', '_')),
 (('Langkawi', '_'), 'det', ('itu', '_')),
 (('sikap', '_'), 'acl', ('mengaku', '_')),
 (('mengaku', '_'), 'nsubj', ('yang', '_')),
 (('mengaku', '_'), 'xcomp', ('bersalah', '_')),
 (('bersalah', '_'), 'xcomp', ('melanggar', '_')),
 (('melanggar', '_'), 'det', ('selepas', '_')),
 (('melanggar', '_'), 'obj', ('SOP', '_')),
 (('melanggar', '_'), 'advcl', ('mengambil', '_')),
 (('mengambil', '_'), 'mark', ('kerana', '_')),
 (('mengambil', '_'), 'advmod', ('tidak', '_')),
 (('mengambil', '_'), 'obj', ('suhu', '_')),
 (('suhu', '_'), 'compound', ('badan', '_')),
 (('mengambil', '_'), 'advcl', ('masuk', '_')),
 (('masuk', '_'), 'mark', ('ketika', '_')),
 (('masuk', '_'), 'obl', ('surau', '_')),
 (('surau', '_'), 'case', ('ke', '_')),
 (('surau', '_'), 'det', ('sebuah', '_')),
 (('masuk', '_'), 'obl', ('Langkawi', '_')),
 (('Langkawi', '_'), 'case', ('di', '_')),
 (('masuk', '_'), 'obl', ('Sabtu', '_')),
 (('Sabtu', '_'), 'case', ('pada', '_')),
 (('Sabtu', '_'), 'punct', ('lalu.', '_'))]

Check the graph contains cycles#

[14]:
graph.contains_cycle()
[14]:
False