{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Dependency Parsing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "\n", "This tutorial is available as an IPython notebook at [Malaya/example/dependency](https://github.com/huseinzol05/Malaya/tree/master/example/dependency).\n", " \n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "\n", "This module only trained on standard language structure, so it is not save to use it for local language structure.\n", " \n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ['CUDA_VISIBLE_DEVICES'] = ''" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/husein/.local/lib/python3.8/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n", " warn(\"The installed version of bitsandbytes was compiled without GPU support. \"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "/home/husein/.local/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32\n", "CPU times: user 2.85 s, sys: 3.65 s, total: 6.5 s\n", "Wall time: 2.11 s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/husein/dev/malaya/malaya/tokenizer.py:214: FutureWarning: Possible nested set at position 3397\n", " self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n", "/home/husein/dev/malaya/malaya/tokenizer.py:214: FutureWarning: Possible nested set at position 3927\n", " self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n" ] } ], "source": [ "%%time\n", "import malaya" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Describe supported dependencies" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'Tag': 'acl', 'Description': 'clausal modifier of noun'},\n", " {'Tag': 'advcl', 'Description': 'adverbial clause modifier'},\n", " {'Tag': 'advmod', 'Description': 'adverbial modifier'},\n", " {'Tag': 'amod', 'Description': 'adjectival modifier'},\n", " {'Tag': 'appos', 'Description': 'appositional modifier'},\n", " {'Tag': 'aux', 'Description': 'auxiliary'},\n", " {'Tag': 'case', 'Description': 'case marking'},\n", " {'Tag': 'ccomp', 'Description': 'clausal complement'},\n", " {'Tag': 'compound', 'Description': 'compound'},\n", " {'Tag': 'compound:plur', 'Description': 'plural compound'},\n", " {'Tag': 'conj', 'Description': 'conjunct'},\n", " {'Tag': 'cop', 'Description': 'cop'},\n", " {'Tag': 'csubj', 'Description': 'clausal subject'},\n", " {'Tag': 'dep', 'Description': 'dependent'},\n", " {'Tag': 'det', 'Description': 'determiner'},\n", " {'Tag': 'fixed', 'Description': 'multi-word expression'},\n", " {'Tag': 'flat', 'Description': 'name'},\n", " {'Tag': 'iobj', 'Description': 'indirect object'},\n", " {'Tag': 'mark', 'Description': 'marker'},\n", " {'Tag': 'nmod', 'Description': 'nominal modifier'},\n", " {'Tag': 'nsubj', 'Description': 'nominal subject'},\n", " {'Tag': 'obj', 'Description': 'direct object'},\n", " {'Tag': 'parataxis', 'Description': 'parataxis'},\n", " {'Tag': 'root', 'Description': 'root'},\n", " {'Tag': 'xcomp', 'Description': 'open clausal complement'}]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "malaya.dependency.describe" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### List available HuggingFace Dependency models" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mesolitica/finetune-dependency-t5-tiny-standard-bahasa-cased': {'Size (MB)': 143,\n", " 'Arc Accuracy': 0.8506069089930276,\n", " 'Types Accuracy': 0.7831641780774206,\n", " 'Root Accuracy': 0.8723021582733813},\n", " 'mesolitica/finetune-dependency-t5-small-standard-bahasa-cased': {'Size (MB)': 247,\n", " 'Arc Accuracy': 0.8494045261191319,\n", " 'Types Accuracy': 0.783103051811978,\n", " 'Root Accuracy': 0.8669064748201439},\n", " 'mesolitica/finetune-dependency-t5-base-standard-bahasa-cased': {'Size (MB)': 898,\n", " 'Arc Accuracy': 0.8528921010932324,\n", " 'Types Accuracy': 0.7840908663367674,\n", " 'Root Accuracy': 0.8597122302158273}}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "malaya.dependency.available_huggingface" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load HuggingFace dependency model\n", "\n", "```python\n", "def huggingface(\n", " model: str = 'mesolitica/finetune-dependency-t5-small-standard-bahasa-cased',\n", " force_check: bool = True,\n", " **kwargs,\n", "):\n", " \"\"\"\n", " Load HuggingFace model to dependency parsing.\n", "\n", " Parameters\n", " ----------\n", " model: str, optional (default='mesolitica/finetune-dependency-t5-small-standard-bahasa-cased')\n", " Check available models at `malaya.dependency.available_huggingface()`.\n", " force_check: bool, optional (default=True)\n", " Force check model one of malaya model.\n", " Set to False if you have your own huggingface model.\n", "\n", " Returns\n", " -------\n", " result: malaya.torch_model.huggingface.Dependency\n", " \"\"\"\n", "```" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "`malaya.dependency.huggingface` trained on indonesian dataset and augmented dataset, not an actual malay dataset.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2f73ad53f0904716b53215a1e63b126c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/2.54k [00:00. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "50bb09e167734f92b1282577217cdca1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/2.19k [00:00\n", "\n", "\n", "\n", "\n", "\n", "G\n", "\n", "\n", "\n", "0\n", "0 (None)\n", "\n", "\n", "\n", "3\n", "3 (menasihati)\n", "\n", "\n", "\n", "0->3\n", "\n", "\n", "root\n", "\n", "\n", "\n", "1\n", "1 (Dr)\n", "\n", "\n", "\n", "3->1\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "4\n", "4 (mereka)\n", "\n", "\n", "\n", "3->4\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "6\n", "6 (berhenti)\n", "\n", "\n", "\n", "3->6\n", "\n", "\n", "conj\n", "\n", "\n", "\n", "2\n", "2 (Mahathir)\n", "\n", "\n", "\n", "1->2\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "5\n", "5 (supaya)\n", "\n", "\n", "\n", "6->5\n", "\n", "\n", "cc\n", "\n", "\n", "\n", "7\n", "7 (berehat)\n", "\n", "\n", "\n", "6->7\n", "\n", "\n", "xcomp\n", "\n", "\n", "\n", "9\n", "9 (tidur)\n", "\n", "\n", "\n", "7->9\n", "\n", "\n", "conj\n", "\n", "\n", "\n", "8\n", "8 (dan)\n", "\n", "\n", "\n", "9->8\n", "\n", "\n", "cc\n", "\n", "\n", "\n", "12\n", "12 (mengantuk)\n", "\n", "\n", "\n", "9->12\n", "\n", "\n", "xcomp\n", "\n", "\n", "\n", "10\n", "10 (sebentar)\n", "\n", "\n", "\n", "12->10\n", "\n", "\n", "case\n", "\n", "\n", "\n", "11\n", "11 (sekiranya)\n", "\n", "\n", "\n", "12->11\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "14\n", "14 (memandu.)\n", "\n", "\n", "\n", "12->14\n", "\n", "\n", "advcl\n", "\n", "\n", "\n", "13\n", "13 (ketika)\n", "\n", "\n", "\n", "14->13\n", "\n", "\n", "mark\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_object, tagging, indexing = model.predict(string)\n", "d_object.to_graphvis()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Harder example" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# https://www.astroawani.com/berita-malaysia/terbaik-tun-kita-geng-najib-razak-puji-tun-m-297884\n", "\n", "s = \"\"\"\n", "KUALA LUMPUR: Dalam hal politik, jarang sekali untuk melihat dua figura ini - bekas Perdana Menteri, Datuk Seri Najib Razak dan Tun Dr Mahathir Mohamad mempunyai 'pandangan yang sama' atau sekapal. Namun, situasi itu berbeza apabila melibatkan isu ketidakpatuhan terhadap prosedur operasi standard (SOP). Najib, yang juga Ahli Parlimen Pekan memuji sikap Ahli Parlimen Langkawi itu yang mengaku bersalah selepas melanggar SOP kerana tidak mengambil suhu badan ketika masuk ke sebuah surau di Langkawi pada Sabtu lalu.\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "G\n", "\n", "\n", "\n", "0\n", "0 (None)\n", "\n", "\n", "\n", "26\n", "26 (mempunyai)\n", "\n", "\n", "\n", "0->26\n", "\n", "\n", "root\n", "\n", "\n", "\n", "1\n", "1 (KUALA)\n", "\n", "\n", "\n", "26->1\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "9\n", "9 (melihat)\n", "\n", "\n", "\n", "26->9\n", "\n", "\n", "xcomp\n", "\n", "\n", "\n", "27\n", "27 ('pandangan)\n", "\n", "\n", "\n", "26->27\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "2\n", "2 (LUMPUR:)\n", "\n", "\n", "\n", "1->2\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "4\n", "4 (hal)\n", "\n", "\n", "\n", "1->4\n", "\n", "\n", "nmod\n", "\n", "\n", "\n", "3\n", "3 (Dalam)\n", "\n", "\n", "\n", "4->3\n", "\n", "\n", "case\n", "\n", "\n", "\n", "5\n", "5 (politik,)\n", "\n", "\n", "\n", "4->5\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "6\n", "6 (jarang)\n", "\n", "\n", "\n", "7\n", "7 (sekali)\n", "\n", "\n", "\n", "6->7\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "8\n", "8 (untuk)\n", "\n", "\n", "\n", "9->6\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "9->8\n", "\n", "\n", "case\n", "\n", "\n", "\n", "11\n", "11 (figura)\n", "\n", "\n", "\n", "9->11\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "10\n", "10 (dua)\n", "\n", "\n", "\n", "11->10\n", "\n", "\n", "nummod\n", "\n", "\n", "\n", "12\n", "12 (ini)\n", "\n", "\n", "\n", "11->12\n", "\n", "\n", "det\n", "\n", "\n", "\n", "13\n", "13 (-)\n", "\n", "\n", "\n", "11->13\n", "\n", "\n", "punct\n", "\n", "\n", "\n", "14\n", "14 (bekas)\n", "\n", "\n", "\n", "11->14\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "15\n", "15 (Perdana)\n", "\n", "\n", "\n", "14->15\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "16\n", "16 (Menteri,)\n", "\n", "\n", "\n", "15->16\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "22\n", "22 (Tun)\n", "\n", "\n", "\n", "15->22\n", "\n", "\n", "conj\n", "\n", "\n", "\n", "17\n", "17 (Datuk)\n", "\n", "\n", "\n", "16->17\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "21\n", "21 (dan)\n", "\n", "\n", "\n", "22->21\n", "\n", "\n", "cc\n", "\n", "\n", "\n", "23\n", "23 (Dr)\n", "\n", "\n", "\n", "22->23\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "18\n", "18 (Seri)\n", "\n", "\n", "\n", "17->18\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "19\n", "19 (Najib)\n", "\n", "\n", "\n", "18->19\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "20\n", "20 (Razak)\n", "\n", "\n", "\n", "19->20\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "24\n", "24 (Mahathir)\n", "\n", "\n", "\n", "23->24\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "25\n", "25 (Mohamad)\n", "\n", "\n", "\n", "24->25\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "35\n", "35 (berbeza)\n", "\n", "\n", "\n", "27->35\n", "\n", "\n", "acl\n", "\n", "\n", "\n", "28\n", "28 (yang)\n", "\n", "\n", "\n", "35->28\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "29\n", "29 (sama')\n", "\n", "\n", "\n", "35->29\n", "\n", "\n", "punct\n", "\n", "\n", "\n", "32\n", "32 (Namun,)\n", "\n", "\n", "\n", "35->32\n", "\n", "\n", "mark\n", "\n", "\n", "\n", "33\n", "33 (situasi)\n", "\n", "\n", "\n", "35->33\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "37\n", "37 (melibatkan)\n", "\n", "\n", "\n", "35->37\n", "\n", "\n", "advcl\n", "\n", "\n", "\n", "31\n", "31 (sekapal.)\n", "\n", "\n", "\n", "29->31\n", "\n", "\n", "conj\n", "\n", "\n", "\n", "30\n", "30 (atau)\n", "\n", "\n", "\n", "31->30\n", "\n", "\n", "cc\n", "\n", "\n", "\n", "34\n", "34 (itu)\n", "\n", "\n", "\n", "33->34\n", "\n", "\n", "det\n", "\n", "\n", "\n", "36\n", "36 (apabila)\n", "\n", "\n", "\n", "37->36\n", "\n", "\n", "mark\n", "\n", "\n", "\n", "38\n", "38 (isu)\n", "\n", "\n", "\n", "37->38\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "39\n", "39 (ketidakpatuhan)\n", "\n", "\n", "\n", "38->39\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "41\n", "41 (prosedur)\n", "\n", "\n", "\n", "38->41\n", "\n", "\n", "nmod\n", "\n", "\n", "\n", "40\n", "40 (terhadap)\n", "\n", "\n", "\n", "41->40\n", "\n", "\n", "case\n", "\n", "\n", "\n", "42\n", "42 (operasi)\n", "\n", "\n", "\n", "41->42\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "51\n", "51 (memuji)\n", "\n", "\n", "\n", "41->51\n", "\n", "\n", "acl\n", "\n", "\n", "\n", "43\n", "43 (standard)\n", "\n", "\n", "\n", "42->43\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "46\n", "46 (yang)\n", "\n", "\n", "\n", "51->46\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "48\n", "48 (Ahli)\n", "\n", "\n", "\n", "51->48\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "52\n", "52 (sikap)\n", "\n", "\n", "\n", "51->52\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "44\n", "44 ((SOP).)\n", "\n", "\n", "\n", "43->44\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "45\n", "45 (Najib,)\n", "\n", "\n", "\n", "44->45\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "47\n", "47 (juga)\n", "\n", "\n", "\n", "48->47\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "49\n", "49 (Parlimen)\n", "\n", "\n", "\n", "48->49\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "50\n", "50 (Pekan)\n", "\n", "\n", "\n", "49->50\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "53\n", "53 (Ahli)\n", "\n", "\n", "\n", "52->53\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "58\n", "58 (mengaku)\n", "\n", "\n", "\n", "52->58\n", "\n", "\n", "acl\n", "\n", "\n", "\n", "54\n", "54 (Parlimen)\n", "\n", "\n", "\n", "53->54\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "57\n", "57 (yang)\n", "\n", "\n", "\n", "58->57\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "59\n", "59 (bersalah)\n", "\n", "\n", "\n", "58->59\n", "\n", "\n", "xcomp\n", "\n", "\n", "\n", "55\n", "55 (Langkawi)\n", "\n", "\n", "\n", "54->55\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "56\n", "56 (itu)\n", "\n", "\n", "\n", "55->56\n", "\n", "\n", "det\n", "\n", "\n", "\n", "61\n", "61 (melanggar)\n", "\n", "\n", "\n", "59->61\n", "\n", "\n", "xcomp\n", "\n", "\n", "\n", "60\n", "60 (selepas)\n", "\n", "\n", "\n", "61->60\n", "\n", "\n", "det\n", "\n", "\n", "\n", "62\n", "62 (SOP)\n", "\n", "\n", "\n", "61->62\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "65\n", "65 (mengambil)\n", "\n", "\n", "\n", "61->65\n", "\n", "\n", "advcl\n", "\n", "\n", "\n", "63\n", "63 (kerana)\n", "\n", "\n", "\n", "65->63\n", "\n", "\n", "mark\n", "\n", "\n", "\n", "64\n", "64 (tidak)\n", "\n", "\n", "\n", "65->64\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "66\n", "66 (suhu)\n", "\n", "\n", "\n", "65->66\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "69\n", "69 (masuk)\n", "\n", "\n", "\n", "65->69\n", "\n", "\n", "advcl\n", "\n", "\n", "\n", "67\n", "67 (badan)\n", "\n", "\n", "\n", "66->67\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "68\n", "68 (ketika)\n", "\n", "\n", "\n", "69->68\n", "\n", "\n", "mark\n", "\n", "\n", "\n", "72\n", "72 (surau)\n", "\n", "\n", "\n", "69->72\n", "\n", "\n", "obl\n", "\n", "\n", "\n", "74\n", "74 (Langkawi)\n", "\n", "\n", "\n", "69->74\n", "\n", "\n", "obl\n", "\n", "\n", "\n", "76\n", "76 (Sabtu)\n", "\n", "\n", "\n", "69->76\n", "\n", "\n", "obl\n", "\n", "\n", "\n", "70\n", "70 (ke)\n", "\n", "\n", "\n", "72->70\n", "\n", "\n", "case\n", "\n", "\n", "\n", "71\n", "71 (sebuah)\n", "\n", "\n", "\n", "72->71\n", "\n", "\n", "det\n", "\n", "\n", "\n", "73\n", "73 (di)\n", "\n", "\n", "\n", "74->73\n", "\n", "\n", "case\n", "\n", "\n", "\n", "75\n", "75 (pada)\n", "\n", "\n", "\n", "76->75\n", "\n", "\n", "case\n", "\n", "\n", "\n", "77\n", "77 (lalu.)\n", "\n", "\n", "\n", "76->77\n", "\n", "\n", "punct\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d_object, tagging, indexing = model.predict(s)\n", "d_object.to_graphvis()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dependency graph object\n", "\n", "To initiate a dependency graph from dependency models, you need to call `malaya.dependency.dependency_graph`." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "graph = malaya.dependency.dependency_graph(tagging, indexing)\n", "graph" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### generate graphvis" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "G\n", "\n", "\n", "\n", "0\n", "0 (None)\n", "\n", "\n", "\n", "26\n", "26 (mempunyai)\n", "\n", "\n", "\n", "0->26\n", "\n", "\n", "root\n", "\n", "\n", "\n", "1\n", "1 (KUALA)\n", "\n", "\n", "\n", "26->1\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "9\n", "9 (melihat)\n", "\n", "\n", "\n", "26->9\n", "\n", "\n", "xcomp\n", "\n", "\n", "\n", "27\n", "27 ('pandangan)\n", "\n", "\n", "\n", "26->27\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "2\n", "2 (LUMPUR:)\n", "\n", "\n", "\n", "1->2\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "4\n", "4 (hal)\n", "\n", "\n", "\n", "1->4\n", "\n", "\n", "nmod\n", "\n", "\n", "\n", "3\n", "3 (Dalam)\n", "\n", "\n", "\n", "4->3\n", "\n", "\n", "case\n", "\n", "\n", "\n", "5\n", "5 (politik,)\n", "\n", "\n", "\n", "4->5\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "6\n", "6 (jarang)\n", "\n", "\n", "\n", "7\n", "7 (sekali)\n", "\n", "\n", "\n", "6->7\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "8\n", "8 (untuk)\n", "\n", "\n", "\n", "9->6\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "9->8\n", "\n", "\n", "case\n", "\n", "\n", "\n", "11\n", "11 (figura)\n", "\n", "\n", "\n", "9->11\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "10\n", "10 (dua)\n", "\n", "\n", "\n", "11->10\n", "\n", "\n", "nummod\n", "\n", "\n", "\n", "12\n", "12 (ini)\n", "\n", "\n", "\n", "11->12\n", "\n", "\n", "det\n", "\n", "\n", "\n", "13\n", "13 (-)\n", "\n", "\n", "\n", "11->13\n", "\n", "\n", "punct\n", "\n", "\n", "\n", "14\n", "14 (bekas)\n", "\n", "\n", "\n", "11->14\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "15\n", "15 (Perdana)\n", "\n", "\n", "\n", "14->15\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "16\n", "16 (Menteri,)\n", "\n", "\n", "\n", "15->16\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "22\n", "22 (Tun)\n", "\n", "\n", "\n", "15->22\n", "\n", "\n", "conj\n", "\n", "\n", "\n", "17\n", "17 (Datuk)\n", "\n", "\n", "\n", "16->17\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "21\n", "21 (dan)\n", "\n", "\n", "\n", "22->21\n", "\n", "\n", "cc\n", "\n", "\n", "\n", "23\n", "23 (Dr)\n", "\n", "\n", "\n", "22->23\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "18\n", "18 (Seri)\n", "\n", "\n", "\n", "17->18\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "19\n", "19 (Najib)\n", "\n", "\n", "\n", "18->19\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "20\n", "20 (Razak)\n", "\n", "\n", "\n", "19->20\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "24\n", "24 (Mahathir)\n", "\n", "\n", "\n", "23->24\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "25\n", "25 (Mohamad)\n", "\n", "\n", "\n", "24->25\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "35\n", "35 (berbeza)\n", "\n", "\n", "\n", "27->35\n", "\n", "\n", "acl\n", "\n", "\n", "\n", "28\n", "28 (yang)\n", "\n", "\n", "\n", "35->28\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "29\n", "29 (sama')\n", "\n", "\n", "\n", "35->29\n", "\n", "\n", "punct\n", "\n", "\n", "\n", "32\n", "32 (Namun,)\n", "\n", "\n", "\n", "35->32\n", "\n", "\n", "mark\n", "\n", "\n", "\n", "33\n", "33 (situasi)\n", "\n", "\n", "\n", "35->33\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "37\n", "37 (melibatkan)\n", "\n", "\n", "\n", "35->37\n", "\n", "\n", "advcl\n", "\n", "\n", "\n", "31\n", "31 (sekapal.)\n", "\n", "\n", "\n", "29->31\n", "\n", "\n", "conj\n", "\n", "\n", "\n", "30\n", "30 (atau)\n", "\n", "\n", "\n", "31->30\n", "\n", "\n", "cc\n", "\n", "\n", "\n", "34\n", "34 (itu)\n", "\n", "\n", "\n", "33->34\n", "\n", "\n", "det\n", "\n", "\n", "\n", "36\n", "36 (apabila)\n", "\n", "\n", "\n", "37->36\n", "\n", "\n", "mark\n", "\n", "\n", "\n", "38\n", "38 (isu)\n", "\n", "\n", "\n", "37->38\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "39\n", "39 (ketidakpatuhan)\n", "\n", "\n", "\n", "38->39\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "41\n", "41 (prosedur)\n", "\n", "\n", "\n", "38->41\n", "\n", "\n", "nmod\n", "\n", "\n", "\n", "40\n", "40 (terhadap)\n", "\n", "\n", "\n", "41->40\n", "\n", "\n", "case\n", "\n", "\n", "\n", "42\n", "42 (operasi)\n", "\n", "\n", "\n", "41->42\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "51\n", "51 (memuji)\n", "\n", "\n", "\n", "41->51\n", "\n", "\n", "acl\n", "\n", "\n", "\n", "43\n", "43 (standard)\n", "\n", "\n", "\n", "42->43\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "46\n", "46 (yang)\n", "\n", "\n", "\n", "51->46\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "48\n", "48 (Ahli)\n", "\n", "\n", "\n", "51->48\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "52\n", "52 (sikap)\n", "\n", "\n", "\n", "51->52\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "44\n", "44 ((SOP).)\n", "\n", "\n", "\n", "43->44\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "45\n", "45 (Najib,)\n", "\n", "\n", "\n", "44->45\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "47\n", "47 (juga)\n", "\n", "\n", "\n", "48->47\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "49\n", "49 (Parlimen)\n", "\n", "\n", "\n", "48->49\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "50\n", "50 (Pekan)\n", "\n", "\n", "\n", "49->50\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "53\n", "53 (Ahli)\n", "\n", "\n", "\n", "52->53\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "58\n", "58 (mengaku)\n", "\n", "\n", "\n", "52->58\n", "\n", "\n", "acl\n", "\n", "\n", "\n", "54\n", "54 (Parlimen)\n", "\n", "\n", "\n", "53->54\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "57\n", "57 (yang)\n", "\n", "\n", "\n", "58->57\n", "\n", "\n", "nsubj\n", "\n", "\n", "\n", "59\n", "59 (bersalah)\n", "\n", "\n", "\n", "58->59\n", "\n", "\n", "xcomp\n", "\n", "\n", "\n", "55\n", "55 (Langkawi)\n", "\n", "\n", "\n", "54->55\n", "\n", "\n", "flat\n", "\n", "\n", "\n", "56\n", "56 (itu)\n", "\n", "\n", "\n", "55->56\n", "\n", "\n", "det\n", "\n", "\n", "\n", "61\n", "61 (melanggar)\n", "\n", "\n", "\n", "59->61\n", "\n", "\n", "xcomp\n", "\n", "\n", "\n", "60\n", "60 (selepas)\n", "\n", "\n", "\n", "61->60\n", "\n", "\n", "det\n", "\n", "\n", "\n", "62\n", "62 (SOP)\n", "\n", "\n", "\n", "61->62\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "65\n", "65 (mengambil)\n", "\n", "\n", "\n", "61->65\n", "\n", "\n", "advcl\n", "\n", "\n", "\n", "63\n", "63 (kerana)\n", "\n", "\n", "\n", "65->63\n", "\n", "\n", "mark\n", "\n", "\n", "\n", "64\n", "64 (tidak)\n", "\n", "\n", "\n", "65->64\n", "\n", "\n", "advmod\n", "\n", "\n", "\n", "66\n", "66 (suhu)\n", "\n", "\n", "\n", "65->66\n", "\n", "\n", "obj\n", "\n", "\n", "\n", "69\n", "69 (masuk)\n", "\n", "\n", "\n", "65->69\n", "\n", "\n", "advcl\n", "\n", "\n", "\n", "67\n", "67 (badan)\n", "\n", "\n", "\n", "66->67\n", "\n", "\n", "compound\n", "\n", "\n", "\n", "68\n", "68 (ketika)\n", "\n", "\n", "\n", "69->68\n", "\n", "\n", "mark\n", "\n", "\n", "\n", "72\n", "72 (surau)\n", "\n", "\n", "\n", "69->72\n", "\n", "\n", "obl\n", "\n", "\n", "\n", "74\n", "74 (Langkawi)\n", "\n", "\n", "\n", "69->74\n", "\n", "\n", "obl\n", "\n", "\n", "\n", "76\n", "76 (Sabtu)\n", "\n", "\n", "\n", "69->76\n", "\n", "\n", "obl\n", "\n", "\n", "\n", "70\n", "70 (ke)\n", "\n", "\n", "\n", "72->70\n", "\n", "\n", "case\n", "\n", "\n", "\n", "71\n", "71 (sebuah)\n", "\n", "\n", "\n", "72->71\n", "\n", "\n", "det\n", "\n", "\n", "\n", "73\n", "73 (di)\n", "\n", "\n", "\n", "74->73\n", "\n", "\n", "case\n", "\n", "\n", "\n", "75\n", "75 (pada)\n", "\n", "\n", "\n", "76->75\n", "\n", "\n", "case\n", "\n", "\n", "\n", "77\n", "77 (lalu.)\n", "\n", "\n", "\n", "76->77\n", "\n", "\n", "punct\n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "graph.to_graphvis()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Get nodes" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "defaultdict(.()>,\n", " {0: {'address': 0,\n", " 'word': None,\n", " 'lemma': None,\n", " 'ctag': 'TOP',\n", " 'tag': 'TOP',\n", " 'feats': None,\n", " 'head': None,\n", " 'deps': defaultdict(list, {'root': [26]}),\n", " 'rel': None},\n", " 1: {'address': 1,\n", " 'word': 'KUALA',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 26,\n", " 'deps': defaultdict(list, {'flat': [2], 'nmod': [4]}),\n", " 'rel': 'nsubj'},\n", " 26: {'address': 26,\n", " 'word': 'mempunyai',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 0,\n", " 'deps': defaultdict(list,\n", " {'nsubj': [1], 'xcomp': [9], 'obj': [27]}),\n", " 'rel': 'root'},\n", " 2: {'address': 2,\n", " 'word': 'LUMPUR:',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 1,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'flat'},\n", " 3: {'address': 3,\n", " 'word': 'Dalam',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 4,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'case'},\n", " 4: {'address': 4,\n", " 'word': 'hal',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 1,\n", " 'deps': defaultdict(list, {'case': [3], 'compound': [5]}),\n", " 'rel': 'nmod'},\n", " 5: {'address': 5,\n", " 'word': 'politik,',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 4,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'compound'},\n", " 6: {'address': 6,\n", " 'word': 'jarang',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 9,\n", " 'deps': defaultdict(list, {'advmod': [7]}),\n", " 'rel': 'advmod'},\n", " 9: {'address': 9,\n", " 'word': 'melihat',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 26,\n", " 'deps': defaultdict(list,\n", " {'advmod': [6], 'case': [8], 'obj': [11]}),\n", " 'rel': 'xcomp'},\n", " 7: {'address': 7,\n", " 'word': 'sekali',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 6,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'advmod'},\n", " 8: {'address': 8,\n", " 'word': 'untuk',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 9,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'case'},\n", " 10: {'address': 10,\n", " 'word': 'dua',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 11,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'nummod'},\n", " 11: {'address': 11,\n", " 'word': 'figura',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 9,\n", " 'deps': defaultdict(list,\n", " {'nummod': [10],\n", " 'det': [12],\n", " 'punct': [13],\n", " 'compound': [14]}),\n", " 'rel': 'obj'},\n", " 12: {'address': 12,\n", " 'word': 'ini',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 11,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'det'},\n", " 13: {'address': 13,\n", " 'word': '-',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 11,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'punct'},\n", " 14: {'address': 14,\n", " 'word': 'bekas',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 11,\n", " 'deps': defaultdict(list, {'flat': [15]}),\n", " 'rel': 'compound'},\n", " 15: {'address': 15,\n", " 'word': 'Perdana',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 14,\n", " 'deps': defaultdict(list, {'flat': [16], 'conj': [22]}),\n", " 'rel': 'flat'},\n", " 16: {'address': 16,\n", " 'word': 'Menteri,',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 15,\n", " 'deps': defaultdict(list, {'flat': [17]}),\n", " 'rel': 'flat'},\n", " 17: {'address': 17,\n", " 'word': 'Datuk',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 16,\n", " 'deps': defaultdict(list, {'flat': [18]}),\n", " 'rel': 'flat'},\n", " 18: {'address': 18,\n", " 'word': 'Seri',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 17,\n", " 'deps': defaultdict(list, {'flat': [19]}),\n", " 'rel': 'flat'},\n", " 19: {'address': 19,\n", " 'word': 'Najib',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 18,\n", " 'deps': defaultdict(list, {'flat': [20]}),\n", " 'rel': 'flat'},\n", " 20: {'address': 20,\n", " 'word': 'Razak',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 19,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'flat'},\n", " 21: {'address': 21,\n", " 'word': 'dan',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 22,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'cc'},\n", " 22: {'address': 22,\n", " 'word': 'Tun',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 15,\n", " 'deps': defaultdict(list, {'cc': [21], 'flat': [23]}),\n", " 'rel': 'conj'},\n", " 23: {'address': 23,\n", " 'word': 'Dr',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 22,\n", " 'deps': defaultdict(list, {'flat': [24]}),\n", " 'rel': 'flat'},\n", " 24: {'address': 24,\n", " 'word': 'Mahathir',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 23,\n", " 'deps': defaultdict(list, {'flat': [25]}),\n", " 'rel': 'flat'},\n", " 25: {'address': 25,\n", " 'word': 'Mohamad',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 24,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'flat'},\n", " 27: {'address': 27,\n", " 'word': \"'pandangan\",\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 26,\n", " 'deps': defaultdict(list, {'acl': [35]}),\n", " 'rel': 'obj'},\n", " 28: {'address': 28,\n", " 'word': 'yang',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 35,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'nsubj'},\n", " 35: {'address': 35,\n", " 'word': 'berbeza',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 27,\n", " 'deps': defaultdict(list,\n", " {'nsubj': [28, 33],\n", " 'punct': [29],\n", " 'mark': [32],\n", " 'advcl': [37]}),\n", " 'rel': 'acl'},\n", " 29: {'address': 29,\n", " 'word': \"sama'\",\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 35,\n", " 'deps': defaultdict(list, {'conj': [31]}),\n", " 'rel': 'punct'},\n", " 30: {'address': 30,\n", " 'word': 'atau',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 31,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'cc'},\n", " 31: {'address': 31,\n", " 'word': 'sekapal.',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 29,\n", " 'deps': defaultdict(list, {'cc': [30]}),\n", " 'rel': 'conj'},\n", " 32: {'address': 32,\n", " 'word': 'Namun,',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 35,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'mark'},\n", " 33: {'address': 33,\n", " 'word': 'situasi',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 35,\n", " 'deps': defaultdict(list, {'det': [34]}),\n", " 'rel': 'nsubj'},\n", " 34: {'address': 34,\n", " 'word': 'itu',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 33,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'det'},\n", " 36: {'address': 36,\n", " 'word': 'apabila',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 37,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'mark'},\n", " 37: {'address': 37,\n", " 'word': 'melibatkan',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 35,\n", " 'deps': defaultdict(list, {'mark': [36], 'obj': [38]}),\n", " 'rel': 'advcl'},\n", " 38: {'address': 38,\n", " 'word': 'isu',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 37,\n", " 'deps': defaultdict(list, {'compound': [39], 'nmod': [41]}),\n", " 'rel': 'obj'},\n", " 39: {'address': 39,\n", " 'word': 'ketidakpatuhan',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 38,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'compound'},\n", " 40: {'address': 40,\n", " 'word': 'terhadap',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 41,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'case'},\n", " 41: {'address': 41,\n", " 'word': 'prosedur',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 38,\n", " 'deps': defaultdict(list,\n", " {'case': [40], 'compound': [42], 'acl': [51]}),\n", " 'rel': 'nmod'},\n", " 42: {'address': 42,\n", " 'word': 'operasi',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 41,\n", " 'deps': defaultdict(list, {'flat': [43]}),\n", " 'rel': 'compound'},\n", " 43: {'address': 43,\n", " 'word': 'standard',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 42,\n", " 'deps': defaultdict(list, {'flat': [44]}),\n", " 'rel': 'flat'},\n", " 44: {'address': 44,\n", " 'word': '(SOP).',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 43,\n", " 'deps': defaultdict(list, {'flat': [45]}),\n", " 'rel': 'flat'},\n", " 45: {'address': 45,\n", " 'word': 'Najib,',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 44,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'flat'},\n", " 46: {'address': 46,\n", " 'word': 'yang',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 51,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'obj'},\n", " 51: {'address': 51,\n", " 'word': 'memuji',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 41,\n", " 'deps': defaultdict(list, {'obj': [46, 52], 'nsubj': [48]}),\n", " 'rel': 'acl'},\n", " 47: {'address': 47,\n", " 'word': 'juga',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 48,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'advmod'},\n", " 48: {'address': 48,\n", " 'word': 'Ahli',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 51,\n", " 'deps': defaultdict(list, {'advmod': [47], 'flat': [49]}),\n", " 'rel': 'nsubj'},\n", " 49: {'address': 49,\n", " 'word': 'Parlimen',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 48,\n", " 'deps': defaultdict(list, {'flat': [50]}),\n", " 'rel': 'flat'},\n", " 50: {'address': 50,\n", " 'word': 'Pekan',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 49,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'flat'},\n", " 52: {'address': 52,\n", " 'word': 'sikap',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 51,\n", " 'deps': defaultdict(list, {'flat': [53], 'acl': [58]}),\n", " 'rel': 'obj'},\n", " 53: {'address': 53,\n", " 'word': 'Ahli',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 52,\n", " 'deps': defaultdict(list, {'flat': [54]}),\n", " 'rel': 'flat'},\n", " 54: {'address': 54,\n", " 'word': 'Parlimen',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 53,\n", " 'deps': defaultdict(list, {'flat': [55]}),\n", " 'rel': 'flat'},\n", " 55: {'address': 55,\n", " 'word': 'Langkawi',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 54,\n", " 'deps': defaultdict(list, {'det': [56]}),\n", " 'rel': 'flat'},\n", " 56: {'address': 56,\n", " 'word': 'itu',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 55,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'det'},\n", " 57: {'address': 57,\n", " 'word': 'yang',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 58,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'nsubj'},\n", " 58: {'address': 58,\n", " 'word': 'mengaku',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 52,\n", " 'deps': defaultdict(list, {'nsubj': [57], 'xcomp': [59]}),\n", " 'rel': 'acl'},\n", " 59: {'address': 59,\n", " 'word': 'bersalah',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 58,\n", " 'deps': defaultdict(list, {'xcomp': [61]}),\n", " 'rel': 'xcomp'},\n", " 60: {'address': 60,\n", " 'word': 'selepas',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 61,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'det'},\n", " 61: {'address': 61,\n", " 'word': 'melanggar',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 59,\n", " 'deps': defaultdict(list,\n", " {'det': [60], 'obj': [62], 'advcl': [65]}),\n", " 'rel': 'xcomp'},\n", " 62: {'address': 62,\n", " 'word': 'SOP',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 61,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'obj'},\n", " 63: {'address': 63,\n", " 'word': 'kerana',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 65,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'mark'},\n", " 65: {'address': 65,\n", " 'word': 'mengambil',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 61,\n", " 'deps': defaultdict(list,\n", " {'mark': [63],\n", " 'advmod': [64],\n", " 'obj': [66],\n", " 'advcl': [69]}),\n", " 'rel': 'advcl'},\n", " 64: {'address': 64,\n", " 'word': 'tidak',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 65,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'advmod'},\n", " 66: {'address': 66,\n", " 'word': 'suhu',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 65,\n", " 'deps': defaultdict(list, {'compound': [67]}),\n", " 'rel': 'obj'},\n", " 67: {'address': 67,\n", " 'word': 'badan',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 66,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'compound'},\n", " 68: {'address': 68,\n", " 'word': 'ketika',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 69,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'mark'},\n", " 69: {'address': 69,\n", " 'word': 'masuk',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 65,\n", " 'deps': defaultdict(list, {'mark': [68], 'obl': [72, 74, 76]}),\n", " 'rel': 'advcl'},\n", " 70: {'address': 70,\n", " 'word': 'ke',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 72,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'case'},\n", " 72: {'address': 72,\n", " 'word': 'surau',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 69,\n", " 'deps': defaultdict(list, {'case': [70], 'det': [71]}),\n", " 'rel': 'obl'},\n", " 71: {'address': 71,\n", " 'word': 'sebuah',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 72,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'det'},\n", " 73: {'address': 73,\n", " 'word': 'di',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 74,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'case'},\n", " 74: {'address': 74,\n", " 'word': 'Langkawi',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 69,\n", " 'deps': defaultdict(list, {'case': [73]}),\n", " 'rel': 'obl'},\n", " 75: {'address': 75,\n", " 'word': 'pada',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 76,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'case'},\n", " 76: {'address': 76,\n", " 'word': 'Sabtu',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 69,\n", " 'deps': defaultdict(list, {'case': [75], 'punct': [77]}),\n", " 'rel': 'obl'},\n", " 77: {'address': 77,\n", " 'word': 'lalu.',\n", " 'lemma': '_',\n", " 'ctag': '_',\n", " 'tag': '_',\n", " 'feats': '_',\n", " 'head': 76,\n", " 'deps': defaultdict(list, {}),\n", " 'rel': 'punct'}})" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "graph.nodes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Flat the graph" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(('mempunyai', '_'), 'nsubj', ('KUALA', '_')),\n", " (('KUALA', '_'), 'flat', ('LUMPUR:', '_')),\n", " (('KUALA', '_'), 'nmod', ('hal', '_')),\n", " (('hal', '_'), 'case', ('Dalam', '_')),\n", " (('hal', '_'), 'compound', ('politik,', '_')),\n", " (('mempunyai', '_'), 'xcomp', ('melihat', '_')),\n", " (('melihat', '_'), 'advmod', ('jarang', '_')),\n", " (('jarang', '_'), 'advmod', ('sekali', '_')),\n", " (('melihat', '_'), 'case', ('untuk', '_')),\n", " (('melihat', '_'), 'obj', ('figura', '_')),\n", " (('figura', '_'), 'nummod', ('dua', '_')),\n", " (('figura', '_'), 'det', ('ini', '_')),\n", " (('figura', '_'), 'punct', ('-', '_')),\n", " (('figura', '_'), 'compound', ('bekas', '_')),\n", " (('bekas', '_'), 'flat', ('Perdana', '_')),\n", " (('Perdana', '_'), 'flat', ('Menteri,', '_')),\n", " (('Menteri,', '_'), 'flat', ('Datuk', '_')),\n", " (('Datuk', '_'), 'flat', ('Seri', '_')),\n", " (('Seri', '_'), 'flat', ('Najib', '_')),\n", " (('Najib', '_'), 'flat', ('Razak', '_')),\n", " (('Perdana', '_'), 'conj', ('Tun', '_')),\n", " (('Tun', '_'), 'cc', ('dan', '_')),\n", " (('Tun', '_'), 'flat', ('Dr', '_')),\n", " (('Dr', '_'), 'flat', ('Mahathir', '_')),\n", " (('Mahathir', '_'), 'flat', ('Mohamad', '_')),\n", " (('mempunyai', '_'), 'obj', (\"'pandangan\", '_')),\n", " ((\"'pandangan\", '_'), 'acl', ('berbeza', '_')),\n", " (('berbeza', '_'), 'nsubj', ('yang', '_')),\n", " (('berbeza', '_'), 'punct', (\"sama'\", '_')),\n", " ((\"sama'\", '_'), 'conj', ('sekapal.', '_')),\n", " (('sekapal.', '_'), 'cc', ('atau', '_')),\n", " (('berbeza', '_'), 'mark', ('Namun,', '_')),\n", " (('berbeza', '_'), 'nsubj', ('situasi', '_')),\n", " (('situasi', '_'), 'det', ('itu', '_')),\n", " (('berbeza', '_'), 'advcl', ('melibatkan', '_')),\n", " (('melibatkan', '_'), 'mark', ('apabila', '_')),\n", " (('melibatkan', '_'), 'obj', ('isu', '_')),\n", " (('isu', '_'), 'compound', ('ketidakpatuhan', '_')),\n", " (('isu', '_'), 'nmod', ('prosedur', '_')),\n", " (('prosedur', '_'), 'case', ('terhadap', '_')),\n", " (('prosedur', '_'), 'compound', ('operasi', '_')),\n", " (('operasi', '_'), 'flat', ('standard', '_')),\n", " (('standard', '_'), 'flat', ('(SOP).', '_')),\n", " (('(SOP).', '_'), 'flat', ('Najib,', '_')),\n", " (('prosedur', '_'), 'acl', ('memuji', '_')),\n", " (('memuji', '_'), 'obj', ('yang', '_')),\n", " (('memuji', '_'), 'nsubj', ('Ahli', '_')),\n", " (('Ahli', '_'), 'advmod', ('juga', '_')),\n", " (('Ahli', '_'), 'flat', ('Parlimen', '_')),\n", " (('Parlimen', '_'), 'flat', ('Pekan', '_')),\n", " (('memuji', '_'), 'obj', ('sikap', '_')),\n", " (('sikap', '_'), 'flat', ('Ahli', '_')),\n", " (('Ahli', '_'), 'flat', ('Parlimen', '_')),\n", " (('Parlimen', '_'), 'flat', ('Langkawi', '_')),\n", " (('Langkawi', '_'), 'det', ('itu', '_')),\n", " (('sikap', '_'), 'acl', ('mengaku', '_')),\n", " (('mengaku', '_'), 'nsubj', ('yang', '_')),\n", " (('mengaku', '_'), 'xcomp', ('bersalah', '_')),\n", " (('bersalah', '_'), 'xcomp', ('melanggar', '_')),\n", " (('melanggar', '_'), 'det', ('selepas', '_')),\n", " (('melanggar', '_'), 'obj', ('SOP', '_')),\n", " (('melanggar', '_'), 'advcl', ('mengambil', '_')),\n", " (('mengambil', '_'), 'mark', ('kerana', '_')),\n", " (('mengambil', '_'), 'advmod', ('tidak', '_')),\n", " (('mengambil', '_'), 'obj', ('suhu', '_')),\n", " (('suhu', '_'), 'compound', ('badan', '_')),\n", " (('mengambil', '_'), 'advcl', ('masuk', '_')),\n", " (('masuk', '_'), 'mark', ('ketika', '_')),\n", " (('masuk', '_'), 'obl', ('surau', '_')),\n", " (('surau', '_'), 'case', ('ke', '_')),\n", " (('surau', '_'), 'det', ('sebuah', '_')),\n", " (('masuk', '_'), 'obl', ('Langkawi', '_')),\n", " (('Langkawi', '_'), 'case', ('di', '_')),\n", " (('masuk', '_'), 'obl', ('Sabtu', '_')),\n", " (('Sabtu', '_'), 'case', ('pada', '_')),\n", " (('Sabtu', '_'), 'punct', ('lalu.', '_'))]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(graph.triples())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Check the graph contains cycles" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "graph.contains_cycle()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }