{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Models Accuracy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dependency parsing\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/dependency](https://github.com/huseinzol05/Malaya/tree/master/session/dependency)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", "\n", "arc accuracy: 0.8554239102233114\n", "types accuracy: 0.8481064607232274\n", "root accuracy: 0.9203253968253969\n", "\n", " precision recall f1-score support\n", "\n", " PAD 0.99996 1.00000 0.99998 877864\n", " X 1.00000 0.99986 0.99993 145204\n", " acl 0.96111 0.96190 0.96150 6037\n", " advcl 0.94287 0.93895 0.94091 2408\n", " advmod 0.97171 0.96904 0.97037 9464\n", " amod 0.96283 0.94008 0.95132 8128\n", " appos 0.97426 0.95940 0.96677 4852\n", " aux 1.00000 0.50000 0.66667 4\n", " case 0.98907 0.98834 0.98870 21519\n", " cc 0.98089 0.98708 0.98397 6500\n", " ccomp 0.95515 0.92164 0.93810 855\n", " compound 0.95432 0.96565 0.95995 13479\n", "compound:plur 0.96507 0.97778 0.97138 1215\n", " conj 0.96943 0.98036 0.97486 8604\n", " cop 0.96407 0.98531 0.97457 1906\n", " csubj 0.92157 0.85455 0.88679 55\n", " csubj:pass 0.93750 0.78947 0.85714 19\n", " dep 0.95199 0.93574 0.94380 996\n", " det 0.97043 0.96678 0.96860 8248\n", " fixed 0.94176 0.93672 0.93923 1122\n", " flat 0.98010 0.98217 0.98113 20755\n", " iobj 0.87500 0.80000 0.83582 35\n", " mark 0.94507 0.97448 0.95955 2860\n", " nmod 0.96363 0.95912 0.96137 8121\n", " nsubj 0.97076 0.97091 0.97083 12788\n", " nsubj:pass 0.95192 0.96362 0.95774 3986\n", " nummod 0.98563 0.97942 0.98251 7773\n", " obj 0.96915 0.97071 0.96993 10551\n", " obl 0.97549 0.97164 0.97356 11389\n", " parataxis 0.95038 0.90415 0.92669 699\n", " punct 0.99752 0.99773 0.99762 33438\n", " root 0.98046 0.98124 0.98085 10073\n", " xcomp 0.95153 0.94749 0.94951 2590\n", "\n", " accuracy 0.99562 1243537\n", " macro avg 0.96396 0.93822 0.94823 1243537\n", " weighted avg 0.99562 0.99562 0.99562 1243537\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", "\n", "arc accuracy: 0.7189048051328787\n", "types accuracy: 0.6942783162846734\n", "root accuracy: 0.8860992063492065\n", "\n", " precision recall f1-score support\n", "\n", " PAD 0.99996 1.00000 0.99998 943088\n", " X 0.99999 0.99981 0.99990 145797\n", " acl 0.85006 0.80040 0.82448 6042\n", " advcl 0.61783 0.60566 0.61169 2437\n", " advmod 0.86865 0.86755 0.86810 9513\n", " amod 0.82596 0.78837 0.80672 8217\n", " appos 0.84113 0.79100 0.81530 5000\n", " aux 0.80000 0.50000 0.61538 8\n", " case 0.94714 0.95046 0.94879 21376\n", " cc 0.92151 0.94487 0.93304 6349\n", " ccomp 0.59326 0.26201 0.36349 874\n", " compound 0.85764 0.83530 0.84632 13667\n", "compound:plur 0.83743 0.91349 0.87381 1156\n", " conj 0.87306 0.90624 0.88934 8500\n", " cop 0.90592 0.93670 0.92105 1943\n", " csubj 0.75000 0.05263 0.09836 57\n", " csubj:pass 0.00000 0.00000 0.00000 16\n", " dep 0.66704 0.55176 0.60395 1082\n", " det 0.89147 0.84818 0.86929 7970\n", " fixed 0.80819 0.61696 0.69975 1120\n", " flat 0.90396 0.93947 0.92137 21129\n", " iobj 0.00000 0.00000 0.00000 25\n", " mark 0.74718 0.83845 0.79019 2767\n", " nmod 0.86083 0.78159 0.81930 8017\n", " nsubj 0.85174 0.89750 0.87402 12712\n", " nsubj:pass 0.78514 0.82246 0.80337 4061\n", " nummod 0.88943 0.93509 0.91169 8026\n", " obj 0.89982 0.84423 0.87114 10618\n", " obl 0.84081 0.88283 0.86131 11385\n", " parataxis 0.48635 0.26667 0.34446 735\n", " punct 0.98350 0.99126 0.98736 33736\n", " root 0.91085 0.93726 0.92387 10073\n", " xcomp 0.69305 0.76415 0.72686 2544\n", "\n", " accuracy 0.98102 1310040\n", " macro avg 0.77906 0.72946 0.74011 1310040\n", " weighted avg 0.98076 0.98102 0.98073 1310040\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", "\n", "arc accuracy: 0.8118309576064845\n", "types accuracy: 0.7931625589721538\n", "root accuracy: 0.879281746031746\n", "\n", " precision recall f1-score support\n", "\n", " PAD 1.00000 1.00000 1.00000 905035\n", " X 0.99997 0.99998 0.99998 159607\n", " acl 0.89111 0.88994 0.89052 6051\n", " advcl 0.75213 0.78003 0.76583 2373\n", " advmod 0.89975 0.92642 0.91289 9378\n", " amod 0.86607 0.87808 0.87204 8145\n", " appos 0.87914 0.89496 0.88698 4779\n", " aux 1.00000 0.37500 0.54545 8\n", " case 0.96890 0.97142 0.97016 21521\n", " cc 0.96049 0.96393 0.96221 6405\n", " ccomp 0.70574 0.67583 0.69046 873\n", " compound 0.88800 0.89660 0.89228 13530\n", "compound:plur 0.93381 0.93981 0.93680 1246\n", " conj 0.94147 0.93436 0.93790 8608\n", " cop 0.94652 0.96651 0.95641 1941\n", " csubj 0.75000 0.39623 0.51852 53\n", " csubj:pass 0.77778 0.77778 0.77778 9\n", " dep 0.81778 0.72871 0.77068 1010\n", " det 0.91665 0.90606 0.91132 8314\n", " fixed 0.87862 0.80565 0.84055 1168\n", " flat 0.96177 0.93608 0.94875 20400\n", " iobj 0.71429 0.42857 0.53571 35\n", " mark 0.88640 0.88577 0.88608 2854\n", " nmod 0.86857 0.90150 0.88473 8020\n", " nsubj 0.89466 0.93382 0.91382 12633\n", " nsubj:pass 0.91977 0.81904 0.86648 4045\n", " nummod 0.95316 0.95864 0.95589 8003\n", " obj 0.90795 0.92092 0.91439 10357\n", " obl 0.93016 0.90607 0.91796 11466\n", " parataxis 0.72669 0.62953 0.67463 718\n", " punct 0.99482 0.99724 0.99603 33312\n", " root 0.93869 0.94093 0.93981 10073\n", " xcomp 0.85300 0.80468 0.82813 2524\n", "\n", " accuracy 0.98785 1284494\n", " macro avg 0.88860 0.84152 0.85761 1284494\n", " weighted avg 0.98786 0.98785 0.98782 1284494\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", "\n", "arc accuracy: 0.7087220659183397\n", "types accuracy: 0.6735055899028873\n", "root accuracy: 0.8178452380952382\n", "\n", " precision recall f1-score support\n", "\n", " PAD 1.00000 1.00000 1.00000 901404\n", " X 0.99997 0.99998 0.99997 158217\n", " acl 0.74523 0.72259 0.73374 6056\n", " advcl 0.44763 0.44416 0.44589 2319\n", " advmod 0.80839 0.80245 0.80541 9537\n", " amod 0.74481 0.69167 0.71726 8144\n", " appos 0.71137 0.68084 0.69577 4963\n", " aux 0.00000 0.00000 0.00000 9\n", " case 0.90625 0.93745 0.92159 21056\n", " cc 0.92435 0.90888 0.91655 6453\n", " ccomp 0.32162 0.13918 0.19429 855\n", " compound 0.76535 0.75323 0.75924 13008\n", "compound:plur 0.76103 0.77066 0.76581 1186\n", " conj 0.79454 0.78507 0.78978 8640\n", " cop 0.87581 0.90736 0.89130 1943\n", " csubj 0.66667 0.04082 0.07692 49\n", " csubj:pass 0.00000 0.00000 0.00000 18\n", " dep 0.41637 0.38321 0.39910 929\n", " det 0.81424 0.77924 0.79636 7909\n", " fixed 0.63932 0.41054 0.50000 1101\n", " flat 0.85963 0.91321 0.88561 20856\n", " iobj 1.00000 0.03333 0.06452 30\n", " mark 0.69997 0.72039 0.71003 2879\n", " nmod 0.71129 0.68985 0.70041 7964\n", " nsubj 0.74144 0.81233 0.77527 12719\n", " nsubj:pass 0.68649 0.56466 0.61964 3905\n", " nummod 0.84427 0.87244 0.85813 7581\n", " obj 0.79591 0.78073 0.78825 10380\n", " obl 0.75820 0.78392 0.77085 11144\n", " parataxis 0.25150 0.06231 0.09988 674\n", " punct 0.98207 0.98323 0.98265 33034\n", " root 0.84186 0.87362 0.85745 10073\n", " xcomp 0.62652 0.63961 0.63300 2489\n", "\n", " accuracy 0.96997 1277524\n", " macro avg 0.70128 0.63294 0.64105 1277524\n", " weighted avg 0.96929 0.96997 0.96946 1277524\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", "\n", "arc accuracy: 0.9310084738376598\n", "types accuracy: 0.9258795751889828\n", "root accuracy: 0.9474206349206349\n", "\n", " precision recall f1-score support\n", "\n", " PAD 0.99998 1.00000 0.99999 632972\n", " X 1.00000 0.99997 0.99999 143586\n", " acl 0.98091 0.98226 0.98158 5806\n", " advcl 0.97098 0.95161 0.96120 2356\n", " advmod 0.98802 0.97806 0.98302 9527\n", " amod 0.95966 0.97100 0.96530 8208\n", " appos 0.98846 0.98947 0.98896 4936\n", " aux 1.00000 1.00000 1.00000 10\n", " case 0.99454 0.99110 0.99282 21128\n", " cc 0.98704 0.99518 0.99109 6429\n", " ccomp 0.89091 0.97313 0.93021 856\n", " compound 0.98091 0.96643 0.97362 13079\n", "compound:plur 0.99068 0.98401 0.98733 1188\n", " conj 0.98303 0.99214 0.98756 8524\n", " cop 0.98664 0.99071 0.98867 1938\n", " csubj 0.96000 0.96000 0.96000 50\n", " csubj:pass 0.95652 0.91667 0.93617 24\n", " dep 0.98182 0.96716 0.97444 1005\n", " det 0.98698 0.97756 0.98225 8065\n", " fixed 0.96071 0.97162 0.96613 1057\n", " flat 0.98389 0.99064 0.98726 20411\n", " iobj 0.96154 0.80645 0.87719 31\n", " mark 0.96611 0.98539 0.97565 2806\n", " nmod 0.97956 0.97285 0.97619 8030\n", " nsubj 0.98317 0.98402 0.98359 12701\n", " nsubj:pass 0.96930 0.97858 0.97392 3969\n", " nummod 0.99113 0.99327 0.99220 7879\n", " obj 0.98266 0.98076 0.98171 10342\n", " obl 0.98468 0.98256 0.98362 11183\n", " parataxis 0.95595 0.95455 0.95525 682\n", " punct 0.99952 0.99949 0.99950 33107\n", " root 0.98888 0.98888 0.98888 10073\n", " xcomp 0.95951 0.96027 0.95989 2517\n", "\n", " accuracy 0.99678 994475\n", " macro avg 0.97738 0.97381 0.97531 994475\n", " weighted avg 0.99679 0.99678 0.99678 994475\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", "\n", "arc accuracy: 0.8943757029483008\n", "types accuracy: 0.88690168487317\n", "root accuracy: 0.9425595238095238\n", "\n", " precision recall f1-score support\n", "\n", " PAD 0.99999 1.00000 0.99999 644667\n", " X 0.99998 0.99999 0.99998 144988\n", " acl 0.95995 0.96137 0.96066 6058\n", " advcl 0.91687 0.93839 0.92751 2386\n", " advmod 0.97160 0.97620 0.97389 9496\n", " amod 0.95264 0.94761 0.95012 8342\n", " appos 0.97560 0.97638 0.97599 4995\n", " aux 1.00000 1.00000 1.00000 6\n", " case 0.99147 0.98685 0.98916 21680\n", " cc 0.97523 0.99377 0.98441 6418\n", " ccomp 0.95249 0.90112 0.92610 890\n", " compound 0.95478 0.95656 0.95567 13399\n", "compound:plur 0.97575 0.98067 0.97821 1190\n", " conj 0.96575 0.98929 0.97738 8494\n", " cop 0.98201 0.98708 0.98454 1935\n", " csubj 1.00000 0.90476 0.95000 42\n", " csubj:pass 0.91667 0.91667 0.91667 12\n", " dep 0.96490 0.94781 0.95628 1073\n", " det 0.96461 0.97375 0.96916 8230\n", " fixed 0.95762 0.92188 0.93941 1152\n", " flat 0.98208 0.98030 0.98119 20967\n", " iobj 1.00000 0.82927 0.90667 41\n", " mark 0.96463 0.95609 0.96034 2824\n", " nmod 0.96933 0.95492 0.96207 8207\n", " nsubj 0.97533 0.97086 0.97309 12867\n", " nsubj:pass 0.95811 0.94145 0.94970 3911\n", " nummod 0.98952 0.98590 0.98770 7659\n", " obj 0.97249 0.96839 0.97044 10440\n", " obl 0.97129 0.97222 0.97175 11483\n", " parataxis 0.95691 0.91348 0.93469 705\n", " punct 0.99883 0.99955 0.99919 33252\n", " root 0.98284 0.98372 0.98328 10073\n", " xcomp 0.92520 0.94988 0.93738 2474\n", "\n", " accuracy 0.99475 1010356\n", " macro avg 0.97044 0.95958 0.96462 1010356\n", " weighted avg 0.99476 0.99475 0.99475 1010356\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Emotion Analysis\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/emotion](https://github.com/huseinzol05/Malaya/tree/master/session/emotion)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### multinomial\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " anger 0.88832 0.90889 0.89848 5872\n", " fear 0.89515 0.88078 0.88791 4110\n", " happy 0.88992 0.92776 0.90845 6091\n", " love 0.92420 0.90616 0.91509 4252\n", " sadness 0.91943 0.87356 0.89591 5212\n", " surprise 0.92340 0.92838 0.92588 2597\n", "\n", " accuracy 0.90371 28134\n", " macro avg 0.90674 0.90426 0.90529 28134\n", "weighted avg 0.90409 0.90371 0.90366 28134\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " anger 0.99712 0.99763 0.99737 5895\n", " fear 0.99687 0.99759 0.99723 4150\n", " happy 0.99900 0.99900 0.99900 6017\n", " love 0.99855 0.99615 0.99735 4154\n", " sadness 0.99793 0.99906 0.99849 5307\n", " surprise 0.99770 0.99694 0.99732 2612\n", "\n", " accuracy 0.99790 28135\n", " macro avg 0.99786 0.99773 0.99779 28135\n", "weighted avg 0.99790 0.99790 0.99790 28135\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " anger 0.99765 0.99481 0.99623 5970\n", " fear 0.99607 0.99656 0.99631 4068\n", " happy 0.99671 0.99918 0.99794 6062\n", " love 0.99758 0.99638 0.99698 4145\n", " sadness 0.99736 0.99793 0.99764 5303\n", " surprise 0.99614 0.99691 0.99652 2587\n", "\n", " accuracy 0.99701 28135\n", " macro avg 0.99692 0.99696 0.99694 28135\n", "weighted avg 0.99702 0.99701 0.99701 28135\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " anger 0.99785 0.99472 0.99628 6062\n", " fear 0.99582 0.99926 0.99754 4056\n", " happy 0.99866 0.99866 0.99866 5988\n", " love 0.99712 0.99760 0.99736 4162\n", " sadness 0.99813 0.99813 0.99813 5334\n", " surprise 0.99685 0.99803 0.99744 2533\n", "\n", " accuracy 0.99758 28135\n", " macro avg 0.99740 0.99773 0.99757 28135\n", "weighted avg 0.99758 0.99758 0.99758 28135\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " anger 0.99396 0.98603 0.98998 6012\n", " fear 0.99390 0.99512 0.99451 4096\n", " happy 0.99652 0.99652 0.99652 6030\n", " love 0.99114 0.99187 0.99150 4059\n", " sadness 0.99121 0.99699 0.99409 5316\n", " surprise 0.99278 0.99619 0.99448 2622\n", "\n", " accuracy 0.99346 28135\n", " macro avg 0.99325 0.99378 0.99351 28135\n", "weighted avg 0.99346 0.99346 0.99346 28135\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " anger 0.99699 0.99733 0.99716 5983\n", " fear 0.99778 0.99827 0.99802 4045\n", " happy 0.99883 0.99850 0.99867 6005\n", " love 0.99718 0.99625 0.99671 4261\n", " sadness 0.99754 0.99773 0.99764 5288\n", " surprise 0.99804 0.99843 0.99824 2553\n", "\n", " accuracy 0.99773 28135\n", " macro avg 0.99773 0.99775 0.99774 28135\n", "weighted avg 0.99773 0.99773 0.99773 28135\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " anger 0.99669 0.99439 0.99554 6065\n", " fear 0.99702 0.99727 0.99714 4027\n", " happy 0.99764 0.99949 0.99857 5918\n", " love 0.99554 0.99694 0.99624 4250\n", " sadness 0.99867 0.99641 0.99754 5286\n", " surprise 0.99422 0.99730 0.99576 2589\n", "\n", " accuracy 0.99691 28135\n", " macro avg 0.99663 0.99697 0.99680 28135\n", "weighted avg 0.99691 0.99691 0.99691 28135\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Entities Recognition\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/entities](https://github.com/huseinzol05/Malaya/tree/master/session/entities)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " OTHER 0.99224 0.99931 0.99576 5160854\n", " PAD 1.00000 1.00000 1.00000 877767\n", " X 0.99995 1.00000 0.99998 2921053\n", " event 0.99911 0.88679 0.93961 143787\n", " law 0.99704 0.97040 0.98354 146950\n", " location 0.98677 0.98420 0.98548 428869\n", "organization 0.99335 0.95355 0.97304 694150\n", " person 0.97636 0.99476 0.98547 507960\n", " quantity 0.99965 0.99803 0.99884 88200\n", " time 0.98462 0.99938 0.99194 179880\n", "\n", " accuracy 0.99406 11149470\n", " macro avg 0.99291 0.97864 0.98537 11149470\n", "weighted avg 0.99409 0.99406 0.99400 11149470\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " OTHER 0.98178 0.99946 0.99054 5160854\n", " PAD 1.00000 1.00000 1.00000 1673627\n", " X 1.00000 1.00000 1.00000 2921053\n", " event 0.99666 0.70215 0.82388 143787\n", " law 0.99522 0.94921 0.97167 146950\n", " location 0.96753 0.96547 0.96650 428869\n", "organization 0.99403 0.87009 0.92794 694150\n", " person 0.92771 0.99283 0.95917 507960\n", " quantity 0.99643 0.99762 0.99703 88200\n", " time 0.95574 0.99855 0.97668 179880\n", "\n", " accuracy 0.98642 11945330\n", " macro avg 0.98151 0.94754 0.96134 11945330\n", "weighted avg 0.98675 0.98642 0.98594 11945330\n", "\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " OTHER 0.98087 0.99948 0.99008 5160854\n", " PAD 1.00000 1.00000 1.00000 881183\n", " X 0.99996 1.00000 0.99998 2933007\n", " event 0.99021 0.80012 0.88507 143787\n", " law 0.96373 0.94234 0.95291 146950\n", " location 0.97388 0.96256 0.96819 428869\n", "organization 0.99506 0.83927 0.91055 694150\n", " person 0.91340 0.99378 0.95189 507960\n", " quantity 0.99636 0.99704 0.99670 88200\n", " time 0.98911 0.99859 0.99383 179880\n", "\n", " accuracy 0.98466 11164840\n", " macro avg 0.98026 0.95332 0.96492 11164840\n", "weighted avg 0.98509 0.98466 0.98421 11164840\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " OTHER 0.96614 0.99651 0.98109 5160854\n", " PAD 1.00000 1.00000 1.00000 881183\n", " X 0.99984 1.00000 0.99992 2933007\n", " event 0.97661 0.52453 0.68250 143787\n", " law 0.97992 0.89007 0.93284 146950\n", " location 0.92117 0.91206 0.91659 428869\n", "organization 0.96821 0.76413 0.85414 694150\n", " person 0.87211 0.97366 0.92009 507960\n", " quantity 0.98545 0.99220 0.98881 88200\n", " time 0.94056 0.98312 0.96137 179880\n", "\n", " accuracy 0.97124 11164840\n", " macro avg 0.96100 0.90363 0.92374 11164840\n", "weighted avg 0.97185 0.97124 0.96965 11164840\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " OTHER 0.98873 0.99965 0.99416 5160854\n", " PAD 1.00000 1.00000 1.00000 877767\n", " X 0.99999 1.00000 0.99999 2921053\n", " event 0.99404 0.93677 0.96456 143787\n", " law 0.99734 0.98832 0.99281 146950\n", " location 0.99189 0.97927 0.98554 428869\n", "organization 0.99785 0.92433 0.95968 694150\n", " person 0.97446 0.98956 0.98195 507960\n", " quantity 0.99861 0.99875 0.99868 88200\n", " time 0.99153 0.99872 0.99511 179880\n", "\n", " accuracy 0.99285 11149470\n", " macro avg 0.99344 0.98154 0.98725 11149470\n", "weighted avg 0.99291 0.99285 0.99276 11149470\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " OTHER 0.99124 0.99962 0.99541 5160854\n", " PAD 1.00000 1.00000 1.00000 877767\n", " X 1.00000 1.00000 1.00000 2921053\n", " event 0.99766 0.86900 0.92890 143787\n", " law 0.99837 0.97023 0.98410 146950\n", " location 0.99004 0.98249 0.98625 428869\n", "organization 0.99584 0.94088 0.96758 694150\n", " person 0.96062 0.99571 0.97785 507960\n", " quantity 0.99920 0.99976 0.99948 88200\n", " time 0.98851 0.99976 0.99410 179880\n", "\n", " accuracy 0.99319 11149470\n", " macro avg 0.99215 0.97575 0.98337 11149470\n", "weighted avg 0.99327 0.99319 0.99309 11149470\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Language Detection\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/language-detection](https://github.com/huseinzol05/Malaya/tree/master/session/language-detection)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### fast-text\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " eng 0.94014 0.96750 0.95362 553739\n", " ind 0.97290 0.97316 0.97303 576059\n", " malay 0.98674 0.95262 0.96938 1800649\n", " manglish 0.96595 0.98417 0.97498 181442\n", " other 0.98454 0.99698 0.99072 1428083\n", " rojak 0.81149 0.91650 0.86080 189678\n", "\n", " accuracy 0.97002 4729650\n", " macro avg 0.94363 0.96515 0.95375 4729650\n", "weighted avg 0.97111 0.97002 0.97028 4729650\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Deep learning\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " eng 0.96760 0.97401 0.97080 553739\n", " ind 0.97635 0.96131 0.96877 576059\n", " malay 0.96985 0.98498 0.97736 1800649\n", " manglish 0.98036 0.96569 0.97297 181442\n", " other 0.99641 0.99627 0.99634 1428083\n", " rojak 0.94221 0.84302 0.88986 189678\n", "\n", " accuracy 0.97779 4729650\n", " macro avg 0.97213 0.95421 0.96268 4729650\n", "weighted avg 0.97769 0.97779 0.97760 4729650\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### POS Recognition\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/pos](https://github.com/huseinzol05/Malaya/tree/master/session/pos)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " ADJ 0.79261 0.80819 0.80033 45666\n", " ADP 0.95551 0.96155 0.95852 119589\n", " ADV 0.86824 0.83832 0.85302 47760\n", " AUX 0.99362 0.99710 0.99536 10000\n", " CCONJ 0.97639 0.92470 0.94984 37171\n", " DET 0.93663 0.92556 0.93107 38839\n", " NOUN 0.91335 0.89454 0.90385 268329\n", " NUM 0.91883 0.94521 0.93183 41211\n", " PAD 0.98980 1.00000 0.99487 147445\n", " PART 0.91225 0.91291 0.91258 5500\n", " PRON 0.97505 0.94047 0.95745 48835\n", " PROPN 0.91824 0.94054 0.92926 227608\n", " PUNCT 0.99829 0.99853 0.99841 182824\n", " SCONJ 0.76934 0.84297 0.80447 15150\n", " SYM 0.99711 0.95722 0.97676 3600\n", " VERB 0.94284 0.94533 0.94408 124518\n", " X 0.99947 0.99882 0.99914 413549\n", "\n", " accuracy 0.95254 1777594\n", " macro avg 0.93280 0.93129 0.93181 1777594\n", "weighted avg 0.95272 0.95254 0.95254 1777594\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " ADJ 0.78068 0.79622 0.78837 45666\n", " ADP 0.95356 0.96107 0.95730 119589\n", " ADV 0.85048 0.83499 0.84266 47760\n", " AUX 0.99502 0.99850 0.99676 10000\n", " CCONJ 0.96900 0.91986 0.94379 37171\n", " DET 0.93853 0.94263 0.94058 38839\n", " NOUN 0.89955 0.89812 0.89883 268329\n", " NUM 0.93685 0.93740 0.93712 41211\n", " PAD 0.99445 1.00000 0.99722 272341\n", " PART 0.91302 0.91418 0.91360 5500\n", " PRON 0.97478 0.93785 0.95596 48835\n", " PROPN 0.92504 0.92239 0.92371 227608\n", " PUNCT 0.99776 0.99815 0.99796 182824\n", " SCONJ 0.75747 0.84376 0.79829 15150\n", " SYM 0.95358 0.90167 0.92690 3600\n", " VERB 0.93816 0.94470 0.94142 124518\n", " X 0.99974 0.99879 0.99926 413549\n", "\n", " accuracy 0.95343 1902490\n", " macro avg 0.92810 0.92649 0.92704 1902490\n", "weighted avg 0.95364 0.95343 0.95349 1902490\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " ADJ 0.81706 0.76324 0.78923 45666\n", " ADP 0.95181 0.96143 0.95660 119589\n", " ADV 0.84898 0.84148 0.84521 47760\n", " AUX 0.99502 1.00000 0.99751 10000\n", " CCONJ 0.93370 0.94071 0.93719 37171\n", " DET 0.93324 0.92824 0.93073 38839\n", " NOUN 0.90102 0.89915 0.90008 268329\n", " NUM 0.93291 0.94002 0.93645 41211\n", " PAD 1.00000 1.00000 1.00000 147215\n", " PART 0.91795 0.89909 0.90842 5500\n", " PRON 0.97728 0.93198 0.95409 48835\n", " PROPN 0.91565 0.93866 0.92701 227608\n", " PUNCT 0.99818 0.99890 0.99854 182824\n", " SCONJ 0.79499 0.74330 0.76828 15150\n", " SYM 0.98485 0.90278 0.94203 3600\n", " VERB 0.94143 0.94251 0.94197 124518\n", " X 0.99972 0.99975 0.99973 414899\n", "\n", " accuracy 0.95105 1778714\n", " macro avg 0.93199 0.91948 0.92547 1778714\n", "weighted avg 0.95085 0.95105 0.95088 1778714\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " ADJ 0.71343 0.69192 0.70251 45666\n", " ADP 0.94552 0.92892 0.93715 119589\n", " ADV 0.82394 0.77969 0.80120 47760\n", " AUX 0.99502 0.99930 0.99716 10000\n", " CCONJ 0.95223 0.92397 0.93789 37171\n", " DET 0.92886 0.89495 0.91159 38839\n", " NOUN 0.85984 0.87755 0.86860 268329\n", " NUM 0.90365 0.90240 0.90303 41211\n", " PAD 1.00000 1.00000 1.00000 147215\n", " PART 0.88633 0.82509 0.85461 5500\n", " PRON 0.94693 0.93722 0.94205 48835\n", " PROPN 0.90464 0.89602 0.90031 227608\n", " PUNCT 0.98900 0.99757 0.99327 182824\n", " SCONJ 0.70104 0.77234 0.73496 15150\n", " SYM 0.94761 0.86417 0.90397 3600\n", " VERB 0.90093 0.92448 0.91255 124518\n", " X 0.99946 0.99954 0.99950 414899\n", "\n", " accuracy 0.93335 1778714\n", " macro avg 0.90579 0.89501 0.90002 1778714\n", "weighted avg 0.93344 0.93335 0.93331 1778714\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " ADJ 0.83194 0.77563 0.80280 45666\n", " ADP 0.96501 0.95786 0.96142 119589\n", " ADV 0.85073 0.84144 0.84606 47760\n", " AUX 0.99502 0.99950 0.99726 10000\n", " CCONJ 0.96564 0.92473 0.94474 37171\n", " DET 0.94985 0.93192 0.94080 38839\n", " NOUN 0.89484 0.92123 0.90784 268329\n", " NUM 0.94009 0.94511 0.94260 41211\n", " PAD 0.99816 1.00000 0.99908 146373\n", " PART 0.91259 0.94345 0.92777 5500\n", " PRON 0.96988 0.94223 0.95586 48835\n", " PROPN 0.93581 0.92557 0.93066 227608\n", " PUNCT 0.99831 0.99933 0.99882 182824\n", " SCONJ 0.73907 0.82376 0.77912 15150\n", " SYM 0.96944 0.96917 0.96930 3600\n", " VERB 0.94517 0.94727 0.94622 124518\n", " X 0.99992 0.99957 0.99975 410749\n", "\n", " accuracy 0.95410 1773722\n", " macro avg 0.93303 0.93222 0.93236 1773722\n", "weighted avg 0.95433 0.95410 0.95411 1773722\n", "\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " ADJ 0.79153 0.79396 0.79275 45666\n", " ADP 0.95941 0.96102 0.96021 119589\n", " ADV 0.85117 0.82073 0.83567 47760\n", " AUX 0.99641 0.99860 0.99750 10000\n", " CCONJ 0.96687 0.92793 0.94700 37171\n", " DET 0.91526 0.93156 0.92334 38839\n", " NOUN 0.91155 0.89253 0.90194 268329\n", " NUM 0.92871 0.93635 0.93252 41211\n", " PAD 0.99816 1.00000 0.99908 146373\n", " PART 0.91285 0.92364 0.91821 5500\n", " PRON 0.97040 0.94404 0.95704 48835\n", " PROPN 0.90899 0.94301 0.92569 227608\n", " PUNCT 0.99887 0.99928 0.99908 182824\n", " SCONJ 0.69691 0.86964 0.77375 15150\n", " SYM 0.99941 0.94556 0.97174 3600\n", " VERB 0.95809 0.93052 0.94411 124518\n", " X 0.99985 0.99945 0.99965 410749\n", "\n", " accuracy 0.95109 1773722\n", " macro avg 0.92732 0.93046 0.92819 1773722\n", "weighted avg 0.95168 0.95109 0.95121 1773722\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Relevancy\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/relevancy](https://github.com/huseinzol05/Malaya/tree/master/session/relevancy)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", "not relevant 0.87625 0.73478 0.79930 5946\n", " relevant 0.87117 0.94531 0.90673 11281\n", "\n", " accuracy 0.87264 17227\n", " macro avg 0.87371 0.84004 0.85302 17227\n", "weighted avg 0.87293 0.87264 0.86965 17227\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", "not relevant 0.95455 0.00353 0.00704 5946\n", " relevant 0.65562 0.99991 0.79197 11281\n", "\n", " accuracy 0.65601 17227\n", " macro avg 0.80508 0.50172 0.39950 17227\n", "weighted avg 0.75880 0.65601 0.52104 17227\n", "\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", "not relevant 0.81807 0.80844 0.81323 5946\n", " relevant 0.89966 0.90524 0.90244 11281\n", "\n", " accuracy 0.87183 17227\n", " macro avg 0.85886 0.85684 0.85783 17227\n", "weighted avg 0.87150 0.87183 0.87165 17227\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", "not relevant 0.84793 0.66768 0.74708 5946\n", " relevant 0.84249 0.93689 0.88718 11281\n", "\n", " accuracy 0.84397 17227\n", " macro avg 0.84521 0.80228 0.81713 17227\n", "weighted avg 0.84437 0.84397 0.83883 17227\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", "not relevant 0.85676 0.80272 0.82886 5946\n", " relevant 0.89937 0.92926 0.91407 11281\n", "\n", " accuracy 0.88559 17227\n", " macro avg 0.87806 0.86599 0.87147 17227\n", "weighted avg 0.88466 0.88559 0.88466 17227\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", "not relevant 0.89878 0.71678 0.79753 5946\n", " relevant 0.86512 0.95745 0.90895 11281\n", "\n", " accuracy 0.87438 17227\n", " macro avg 0.88195 0.83712 0.85324 17227\n", "weighted avg 0.87674 0.87438 0.87049 17227\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sentiment Analysis\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/sentiment](https://github.com/huseinzol05/Malaya/tree/master/session/sentiment)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### multinomial\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.76305 0.89993 0.82586 15459\n", " neutral 0.81065 0.76562 0.78749 16938\n", " positive 0.76113 0.61208 0.67852 9355\n", "\n", " accuracy 0.78094 41752\n", " macro avg 0.77828 0.75921 0.76396 41752\n", "weighted avg 0.78193 0.78094 0.77728 41752\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.95700 0.94722 0.95208 15459\n", " neutral 0.94767 0.94403 0.94585 16938\n", " positive 0.89079 0.91203 0.90128 9355\n", "\n", " accuracy 0.93804 41752\n", " macro avg 0.93182 0.93442 0.93307 41752\n", "weighted avg 0.93838 0.93804 0.93817 41752\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.95214 0.95362 0.95288 15459\n", " neutral 0.93852 0.94728 0.94288 16938\n", " positive 0.91104 0.89332 0.90209 9355\n", "\n", " accuracy 0.93754 41752\n", " macro avg 0.93390 0.93141 0.93262 41752\n", "weighted avg 0.93741 0.93754 0.93744 41752\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.95209 0.93447 0.94320 15459\n", " neutral 0.93541 0.91575 0.92548 16938\n", " positive 0.84935 0.90764 0.87753 9355\n", "\n", " accuracy 0.92087 41752\n", " macro avg 0.91228 0.91929 0.91540 41752\n", "weighted avg 0.92230 0.92087 0.92130 41752\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.92378 0.95731 0.94025 15459\n", " neutral 0.94531 0.90825 0.92641 16938\n", " positive 0.87418 0.88381 0.87897 9355\n", "\n", " accuracy 0.92094 41752\n", " macro avg 0.91442 0.91646 0.91521 41752\n", "weighted avg 0.92140 0.92094 0.92090 41752\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.91680 0.97438 0.94471 15459\n", " neutral 0.96408 0.90164 0.93182 16938\n", " positive 0.89083 0.90283 0.89679 9355\n", "\n", " accuracy 0.92884 41752\n", " macro avg 0.92390 0.92629 0.92444 41752\n", "weighted avg 0.93016 0.92884 0.92874 41752\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.93771 0.95336 0.94547 15459\n", " neutral 0.95482 0.90949 0.93160 16938\n", " positive 0.86436 0.91480 0.88887 9355\n", "\n", " accuracy 0.92693 41752\n", " macro avg 0.91896 0.92589 0.92198 41752\n", "weighted avg 0.92821 0.92693 0.92716 41752\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### fastformer-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.91534 0.92322 0.91926 15459\n", " neutral 0.92113 0.89710 0.90895 16938\n", " positive 0.84189 0.86970 0.85557 9355\n", "\n", " accuracy 0.90063 41752\n", " macro avg 0.89279 0.89667 0.89459 41752\n", "weighted avg 0.90123 0.90063 0.90081 41752\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### fastformer-tiny\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.92475 0.92690 0.92583 15459\n", " neutral 0.90404 0.93441 0.91897 16938\n", " positive 0.89086 0.83324 0.86109 9355\n", "\n", " accuracy 0.90896 41752\n", " macro avg 0.90655 0.89819 0.90196 41752\n", "weighted avg 0.90875 0.90896 0.90854 41752\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Similarity\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/similarity](https://github.com/huseinzol05/Malaya/tree/master/session/similarity)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " not similar 0.91813 0.86843 0.89259 114935\n", " similar 0.84816 0.90468 0.87551 93371\n", "\n", " accuracy 0.88468 208306\n", " macro avg 0.88315 0.88656 0.88405 208306\n", "weighted avg 0.88677 0.88468 0.88493 208306\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " not similar 0.90845 0.85704 0.88200 114843\n", " similar 0.83576 0.89387 0.86384 93463\n", "\n", " accuracy 0.87357 208306\n", " macro avg 0.87210 0.87546 0.87292 208306\n", "weighted avg 0.87583 0.87357 0.87385 208306\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " not similar 0.88351 0.88549 0.88450 114523\n", " similar 0.85978 0.85743 0.85860 93783\n", "\n", " accuracy 0.87286 208306\n", " macro avg 0.87164 0.87146 0.87155 208306\n", "weighted avg 0.87283 0.87286 0.87284 208306\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " not similar 0.84881 0.82946 0.83902 114914\n", " similar 0.79588 0.81821 0.80689 93392\n", "\n", " accuracy 0.82441 208306\n", " macro avg 0.82234 0.82383 0.82295 208306\n", "weighted avg 0.82508 0.82441 0.82461 208306\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " not similar 0.74384 0.92845 0.82596 114854\n", " similar 0.87347 0.60705 0.71629 93452\n", "\n", " accuracy 0.78426 208306\n", " macro avg 0.80866 0.76775 0.77112 208306\n", "weighted avg 0.80200 0.78426 0.77676 208306\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " not similar 0.89614 0.90170 0.89891 114554\n", " similar 0.87897 0.87231 0.87563 93752\n", "\n", " accuracy 0.88847 208306\n", " macro avg 0.88756 0.88700 0.88727 208306\n", "weighted avg 0.88841 0.88847 0.88843 208306\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Subjectivity Analysis\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/subjectivity](https://github.com/huseinzol05/Malaya/tree/master/session/subjectivity)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### multinomial\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " negative 0.91527 0.87238 0.89331 1003\n", " positive 0.87657 0.91818 0.89689 990\n", "\n", " accuracy 0.89513 1993\n", " macro avg 0.89592 0.89528 0.89510 1993\n", "weighted avg 0.89605 0.89513 0.89509 1993\n", "\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " negative 0.87825 0.96429 0.91926 980\n", " positive 0.96183 0.87068 0.91399 1013\n", "\n", " accuracy 0.91671 1993\n", " macro avg 0.92004 0.91748 0.91663 1993\n", "weighted avg 0.92073 0.91671 0.91658 1993\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " negative 0.95678 0.84086 0.89508 974\n", " positive 0.86368 0.96369 0.91095 1019\n", "\n", " accuracy 0.90366 1993\n", " macro avg 0.91023 0.90228 0.90301 1993\n", "weighted avg 0.90917 0.90366 0.90319 1993\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " negative 0.87616 0.94006 0.90699 1001\n", " positive 0.93471 0.86593 0.89901 992\n", "\n", " accuracy 0.90316 1993\n", " macro avg 0.90544 0.90299 0.90300 1993\n", "weighted avg 0.90531 0.90316 0.90301 1993\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " negative 0.90070 0.89184 0.89625 1017\n", " positive 0.88844 0.89754 0.89297 976\n", "\n", " accuracy 0.89463 1993\n", " macro avg 0.89457 0.89469 0.89461 1993\n", "weighted avg 0.89469 0.89463 0.89464 1993\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " negative 0.89613 0.94616 0.92047 1003\n", " positive 0.94218 0.88889 0.91476 990\n", "\n", " accuracy 0.91771 1993\n", " macro avg 0.91916 0.91753 0.91761 1993\n", "weighted avg 0.91901 0.91771 0.91763 1993\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " negative 0.89258 0.92604 0.90900 987\n", " positive 0.92466 0.89066 0.90734 1006\n", "\n", " accuracy 0.90818 1993\n", " macro avg 0.90862 0.90835 0.90817 1993\n", "weighted avg 0.90877 0.90818 0.90816 1993\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Toxicity Analysis\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/toxic](https://github.com/huseinzol05/Malaya/tree/master/session/toxic)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### multinomial\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " severe toxic 0.32096 0.99468 0.48532 9955\n", " obscene 0.06031 0.68096 0.11081 2799\n", " identity attack 0.03312 0.60086 0.06277 1393\n", " insult 0.15655 0.69002 0.25519 12575\n", " threat 0.00661 0.11058 0.01247 416\n", " asian 0.00087 0.01799 0.00166 389\n", " atheist 0.00137 0.04494 0.00266 178\n", " bisexual 0.00052 0.08333 0.00104 24\n", " buddhist 0.00000 0.00000 0.00000 45\n", " christian 0.13652 0.86153 0.23570 4622\n", " female 0.12714 0.78073 0.21867 6891\n", " heterosexual 0.00153 0.06299 0.00299 127\n", " indian 0.14732 0.97509 0.25597 4014\n", " homosexual, gay or lesbian 0.04442 0.45581 0.08095 1369\n", "intellectual or learning disability 0.00000 0.00000 0.00000 6\n", " male 0.08106 0.58298 0.14233 4947\n", " muslim 0.07845 0.59531 0.13863 2602\n", " other disability 0.00000 0.00000 0.00000 0\n", " other gender 0.00000 0.00000 0.00000 2\n", " other race or ethnicity 0.00000 0.00000 0.00000 7\n", " other religion 0.00000 0.00000 0.00000 8\n", " other sexual orientation 0.00000 0.00000 0.00000 1\n", " physical disability 0.00000 0.00000 0.00000 2\n", " psychiatric or mental illness 0.00720 0.09651 0.01340 601\n", " transgender 0.00249 0.06608 0.00481 227\n", " malay 0.54919 0.99337 0.70733 17044\n", " chinese 0.29545 0.99079 0.45517 8793\n", "\n", " micro avg 0.14989 0.82799 0.25383 79037\n", " macro avg 0.07597 0.35869 0.11807 79037\n", " weighted avg 0.25444 0.82799 0.37086 79037\n", " samples avg 0.07772 0.16003 0.09295 79037\n", " \n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " severe toxic 0.85194 0.65179 0.73854 9790\n", " obscene 0.63710 0.41623 0.50351 2847\n", " identity attack 0.63238 0.29603 0.40328 1412\n", " insult 0.71381 0.56111 0.62832 12673\n", " threat 0.56707 0.22850 0.32574 407\n", " asian 0.54394 0.56965 0.55650 402\n", " atheist 0.80097 0.96491 0.87533 171\n", " bisexual 1.00000 0.51852 0.68293 27\n", " buddhist 0.60938 0.90698 0.72897 43\n", " christian 0.86376 0.86044 0.86210 4679\n", " female 0.88242 0.90816 0.89510 6925\n", " heterosexual 0.67073 0.81481 0.73579 135\n", " indian 0.95325 0.88580 0.91829 4028\n", " homosexual, gay or lesbian 0.88355 0.92161 0.90218 1416\n", "intellectual or learning disability 0.00000 0.00000 0.00000 6\n", " male 0.75975 0.59414 0.66682 5019\n", " muslim 0.87416 0.89385 0.88390 2619\n", " other disability 0.00000 0.00000 0.00000 0\n", " other gender 0.00000 0.00000 0.00000 0\n", " other race or ethnicity 0.00000 0.00000 0.00000 11\n", " other religion 0.14286 0.09091 0.11111 11\n", " other sexual orientation 0.00000 0.00000 0.00000 0\n", " physical disability 0.00000 0.00000 0.00000 6\n", " psychiatric or mental illness 0.60000 0.81588 0.69148 592\n", " transgender 0.79012 0.87671 0.83117 219\n", " malay 0.96219 0.96486 0.96352 16987\n", " chinese 0.94062 0.90214 0.92098 8727\n", "\n", " micro avg 0.86098 0.77313 0.81469 79152\n", " macro avg 0.58074 0.54233 0.54909 79152\n", " weighted avg 0.84966 0.77313 0.80502 79152\n", " samples avg 0.15924 0.15441 0.15445 79152\n", " \n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " severe toxic 0.77495 0.77346 0.77421 9857\n", " obscene 0.62343 0.41033 0.49492 2788\n", " identity attack 0.55057 0.34761 0.42616 1378\n", " insult 0.69412 0.56324 0.62187 12659\n", " threat 0.60825 0.13170 0.21651 448\n", " asian 0.66667 0.47478 0.55459 337\n", " atheist 0.85784 0.92593 0.89059 189\n", " bisexual 1.00000 0.05263 0.10000 19\n", " buddhist 0.63043 0.67442 0.65169 43\n", " christian 0.79541 0.89441 0.84201 4612\n", " female 0.85257 0.92515 0.88738 6907\n", " heterosexual 0.67785 0.78295 0.72662 129\n", " indian 0.94898 0.87673 0.91143 3967\n", " homosexual, gay or lesbian 0.88188 0.92275 0.90185 1424\n", "intellectual or learning disability 0.00000 0.00000 0.00000 5\n", " male 0.70644 0.64640 0.67509 4918\n", " muslim 0.81178 0.94261 0.87232 2544\n", " other disability 0.00000 0.00000 0.00000 0\n", " other gender 0.00000 0.00000 0.00000 0\n", " other race or ethnicity 0.00000 0.00000 0.00000 7\n", " other religion 0.00000 0.00000 0.00000 9\n", " other sexual orientation 0.00000 0.00000 0.00000 2\n", " physical disability 0.00000 0.00000 0.00000 4\n", " psychiatric or mental illness 0.67727 0.76410 0.71807 585\n", " transgender 0.80090 0.84689 0.82326 209\n", " malay 0.95652 0.97334 0.96486 16839\n", " chinese 0.96350 0.88984 0.92521 8869\n", "\n", " micro avg 0.83535 0.79611 0.81526 78748\n", " macro avg 0.57331 0.51182 0.51773 78748\n", " weighted avg 0.82603 0.79611 0.80692 78748\n", " samples avg 0.15765 0.15682 0.15490 78748\n", " \n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " severe toxic 0.79715 0.71003 0.75107 9863\n", " obscene 0.64770 0.38489 0.48285 2780\n", " identity attack 0.65517 0.27496 0.38736 1382\n", " insult 0.73404 0.49344 0.59016 12652\n", " threat 0.68478 0.14754 0.24277 427\n", " asian 0.67557 0.48361 0.56369 366\n", " atheist 0.85149 0.91489 0.88205 188\n", " bisexual 0.93750 0.62500 0.75000 24\n", " buddhist 0.55556 0.33333 0.41667 45\n", " christian 0.84738 0.87439 0.86068 4737\n", " female 0.88191 0.91253 0.89696 6997\n", " heterosexual 0.76812 0.76812 0.76812 138\n", " indian 0.92663 0.91164 0.91907 4142\n", " homosexual, gay or lesbian 0.89547 0.92446 0.90973 1390\n", "intellectual or learning disability 0.00000 0.00000 0.00000 7\n", " male 0.73157 0.61368 0.66746 5014\n", " muslim 0.86958 0.87620 0.87288 2496\n", " other disability 0.00000 0.00000 0.00000 0\n", " other gender 0.00000 0.00000 0.00000 1\n", " other race or ethnicity 0.00000 0.00000 0.00000 11\n", " other religion 0.00000 0.00000 0.00000 9\n", " other sexual orientation 0.00000 0.00000 0.00000 1\n", " physical disability 0.00000 0.00000 0.00000 1\n", " psychiatric or mental illness 0.65781 0.72131 0.68810 549\n", " transgender 0.76995 0.84536 0.80590 194\n", " malay 0.98510 0.94072 0.96240 16869\n", " chinese 0.90845 0.95077 0.92913 8694\n", "\n", " micro avg 0.86054 0.76973 0.81261 78977\n", " macro avg 0.58448 0.50766 0.53137 78977\n", " weighted avg 0.84634 0.76973 0.79982 78977\n", " samples avg 0.15569 0.15257 0.15179 78977\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " severe toxic 0.78533 0.72620 0.75460 9788\n", " obscene 0.67641 0.33796 0.45072 2808\n", " identity attack 0.66042 0.22988 0.34104 1379\n", " insult 0.74085 0.47457 0.57854 12662\n", " threat 0.52941 0.02153 0.04138 418\n", " asian 0.65027 0.29975 0.41034 397\n", " atheist 0.85882 0.82022 0.83908 178\n", " bisexual 1.00000 0.03125 0.06061 32\n", " buddhist 0.73333 0.26190 0.38596 42\n", " christian 0.87017 0.84438 0.85708 4723\n", " female 0.85865 0.92302 0.88967 6963\n", " heterosexual 0.76147 0.70339 0.73128 118\n", " indian 0.93209 0.90115 0.91636 4097\n", " homosexual, gay or lesbian 0.89625 0.89690 0.89658 1387\n", "intellectual or learning disability 0.00000 0.00000 0.00000 7\n", " male 0.68679 0.62619 0.65509 4941\n", " muslim 0.86187 0.87102 0.86642 2543\n", " other disability 0.00000 0.00000 0.00000 0\n", " other gender 0.00000 0.00000 0.00000 0\n", " other race or ethnicity 0.00000 0.00000 0.00000 8\n", " other religion 0.00000 0.00000 0.00000 8\n", " other sexual orientation 0.00000 0.00000 0.00000 0\n", " physical disability 0.00000 0.00000 0.00000 1\n", " psychiatric or mental illness 0.74208 0.57243 0.64631 573\n", " transgender 0.79327 0.76037 0.77647 217\n", " malay 0.99392 0.93774 0.96501 16896\n", " chinese 0.89948 0.96317 0.93024 8770\n", "\n", " micro avg 0.85977 0.76240 0.80816 78956\n", " macro avg 0.59003 0.45196 0.48121 78956\n", " weighted avg 0.84448 0.76240 0.79239 78956\n", " samples avg 0.15465 0.15102 0.15056 78956\n", "\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " severe toxic 0.76274 0.78363 0.77305 10006\n", " obscene 0.50862 0.52366 0.51603 2874\n", " identity attack 0.40349 0.52707 0.45707 1404\n", " insult 0.58435 0.70709 0.63989 12717\n", " threat 0.29885 0.46547 0.36400 391\n", " asian 0.41160 0.74425 0.53005 391\n", " atheist 0.78571 0.96175 0.86486 183\n", " bisexual 0.54545 0.72000 0.62069 25\n", " buddhist 0.54054 0.80000 0.64516 50\n", " christian 0.73638 0.92561 0.82022 4584\n", " female 0.87304 0.92314 0.89739 6935\n", " heterosexual 0.70130 0.81818 0.75524 132\n", " indian 0.92564 0.91477 0.92018 4001\n", " homosexual, gay or lesbian 0.84066 0.93236 0.88414 1375\n", "intellectual or learning disability 0.10526 0.50000 0.17391 4\n", " male 0.71216 0.65484 0.68230 5044\n", " muslim 0.83993 0.92537 0.88058 2546\n", " other disability 0.00000 0.00000 0.00000 0\n", " other gender 0.00000 0.00000 0.00000 0\n", " other race or ethnicity 0.00000 0.00000 0.00000 9\n", " other religion 0.15625 0.71429 0.25641 7\n", " other sexual orientation 0.00000 0.00000 0.00000 0\n", " physical disability 0.05556 0.33333 0.09524 3\n", " psychiatric or mental illness 0.57323 0.86678 0.69008 578\n", " transgender 0.76557 0.90086 0.82772 232\n", " malay 0.95376 0.97807 0.96576 17103\n", " chinese 0.94832 0.90540 0.92636 8837\n", "\n", " micro avg 0.77904 0.83829 0.80758 79431\n", " macro avg 0.51957 0.64911 0.56246 79431\n", " weighted avg 0.79150 0.83829 0.81220 79431\n", " samples avg 0.16354 0.16681 0.16247 79431\n", " \n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", "\n", " precision recall f1-score support\n", "\n", " severe toxic 0.81360 0.70005 0.75257 9795\n", " obscene 0.55653 0.49517 0.52406 2694\n", " identity attack 0.50581 0.34938 0.41329 1371\n", " insult 0.68040 0.60330 0.63953 12672\n", " threat 0.40360 0.35123 0.37560 447\n", " asian 0.66192 0.49077 0.56364 379\n", " atheist 0.87151 0.92308 0.89655 169\n", " bisexual 0.87500 0.60870 0.71795 23\n", " buddhist 0.60417 0.63043 0.61702 46\n", " christian 0.88051 0.84715 0.86351 4619\n", " female 0.86958 0.92578 0.89680 6979\n", " heterosexual 0.75000 0.82051 0.78367 117\n", " indian 0.96407 0.87242 0.91596 4029\n", " homosexual, gay or lesbian 0.88719 0.93265 0.90935 1366\n", "intellectual or learning disability 0.21429 0.50000 0.30000 6\n", " male 0.72205 0.65407 0.68638 4897\n", " muslim 0.82114 0.94992 0.88085 2576\n", " other disability 0.00000 0.00000 0.00000 0\n", " other gender 0.00000 0.00000 0.00000 1\n", " other race or ethnicity 0.00000 0.00000 0.00000 9\n", " other religion 0.00000 0.00000 0.00000 6\n", " other sexual orientation 0.00000 0.00000 0.00000 2\n", " physical disability 0.00000 0.00000 0.00000 1\n", " psychiatric or mental illness 0.57044 0.87589 0.69091 564\n", " transgender 0.71756 0.88679 0.79325 212\n", " malay 0.95141 0.97619 0.96364 17051\n", " chinese 0.92615 0.92417 0.92516 8888\n", "\n", " micro avg 0.83376 0.80221 0.81768 78919\n", " macro avg 0.56470 0.56732 0.55962 78919\n", " weighted avg 0.82757 0.80221 0.81282 78919\n", " samples avg 0.16116 0.15980 0.15799 78919\n", " \n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Entities Recognition Ontonotes5\n", "\n", "Trained on 80% of dataset, tested on 20% of dataset. Link to download dataset available inside the notebooks. All training sessions stored in [session/entities-ontonotes5](https://github.com/huseinzol05/Malaya/tree/master/session/entities-ontonotes5)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### bert-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " ADDRESS 0.99858 0.99974 0.99916 93446\n", " CARDINAL 0.93840 0.90631 0.92207 48255\n", " DATE 0.95490 0.93656 0.94564 126548\n", " EVENT 0.92876 0.93591 0.93232 5711\n", " FAC 0.93271 0.92658 0.92964 27392\n", " GPE 0.93437 0.94852 0.94139 101357\n", " LANGUAGE 0.93478 0.96389 0.94911 803\n", " LAW 0.94824 0.95744 0.95281 24834\n", " LOC 0.94148 0.93213 0.93678 34538\n", " MONEY 0.87803 0.87563 0.87683 30032\n", " NORP 0.95516 0.90446 0.92912 57014\n", " ORDINAL 0.91510 0.91083 0.91296 6213\n", " ORG 0.92453 0.95354 0.93881 219533\n", " OTHER 0.99135 0.99308 0.99221 3553350\n", " PAD 0.99956 1.00000 0.99978 1292421\n", " PERCENT 0.96287 0.96814 0.96550 21722\n", " PERSON 0.97376 0.93891 0.95602 101981\n", " PRODUCT 0.87537 0.81769 0.84555 11124\n", " QUANTITY 0.94385 0.92483 0.93424 11614\n", " TIME 0.91912 0.90170 0.91033 9502\n", " WORK_OF_ART 0.93126 0.81978 0.87197 13800\n", " X 0.99906 0.99792 0.99849 1350434\n", "\n", " accuracy 0.98821 7141624\n", " macro avg 0.94460 0.93244 0.93822 7141624\n", "weighted avg 0.98821 0.98821 0.98818 7141624\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### tiny-bert\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " ADDRESS 0.99501 0.99981 0.99740 93446\n", " CARDINAL 0.93442 0.87581 0.90416 48255\n", " DATE 0.93723 0.92710 0.93214 126548\n", " EVENT 0.78758 0.93942 0.85682 5711\n", " FAC 0.91859 0.91403 0.91630 27392\n", " GPE 0.92833 0.93455 0.93143 101357\n", " LANGUAGE 0.90220 0.81569 0.85677 803\n", " LAW 0.92771 0.95289 0.94013 24834\n", " LOC 0.92497 0.91983 0.92239 34538\n", " MONEY 0.84986 0.85362 0.85174 30032\n", " NORP 0.93555 0.89741 0.91608 57014\n", " ORDINAL 0.86050 0.92435 0.89129 6213\n", " ORG 0.93290 0.93551 0.93420 219533\n", " OTHER 0.99018 0.99121 0.99070 3553350\n", " PAD 0.99956 1.00000 0.99978 1292421\n", " PERCENT 0.95852 0.96165 0.96008 21722\n", " PERSON 0.93958 0.95846 0.94893 101981\n", " PRODUCT 0.86273 0.77742 0.81786 11124\n", " QUANTITY 0.90690 0.90839 0.90764 11614\n", " TIME 0.89077 0.89339 0.89208 9502\n", " WORK_OF_ART 0.83798 0.78145 0.80873 13800\n", " X 0.99872 0.99767 0.99819 1350434\n", "\n", " accuracy 0.98592 7141624\n", " macro avg 0.91908 0.91635 0.91704 7141624\n", "weighted avg 0.98590 0.98592 0.98589 7141624\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " ADDRESS 0.99832 0.99969 0.99901 93446\n", " CARDINAL 0.93291 0.89046 0.91119 48255\n", " DATE 0.94941 0.93032 0.93977 126548\n", " EVENT 0.89330 0.92506 0.90890 5711\n", " FAC 0.92540 0.91257 0.91894 27392\n", " GPE 0.93484 0.93404 0.93444 101357\n", " LANGUAGE 0.87207 0.92528 0.89789 803\n", " LAW 0.95567 0.95140 0.95353 24834\n", " LOC 0.91754 0.92362 0.92057 34538\n", " MONEY 0.85349 0.87696 0.86507 30032\n", " NORP 0.91698 0.91416 0.91557 57014\n", " ORDINAL 0.89159 0.93320 0.91192 6213\n", " ORG 0.95070 0.92537 0.93786 219533\n", " OTHER 0.99000 0.99315 0.99157 3553350\n", " PAD 1.00000 1.00000 1.00000 1291289\n", " PERCENT 0.96746 0.95953 0.96348 21722\n", " PERSON 0.93748 0.96710 0.95206 101981\n", " PRODUCT 0.84749 0.77832 0.81143 11124\n", " QUANTITY 0.92798 0.92079 0.92437 11614\n", " TIME 0.91058 0.88413 0.89716 9502\n", " WORK_OF_ART 0.88983 0.77196 0.82671 13800\n", " X 0.99917 0.99799 0.99858 1351758\n", "\n", " accuracy 0.98714 7141816\n", " macro avg 0.93010 0.92341 0.92636 7141816\n", "weighted avg 0.98707 0.98714 0.98707 7141816\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### albert-tiny\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " ADDRESS 0.99322 0.99706 0.99513 93446\n", " CARDINAL 0.89708 0.84719 0.87142 48255\n", " DATE 0.92805 0.90984 0.91886 126548\n", " EVENT 0.82915 0.90247 0.86426 5711\n", " FAC 0.93366 0.84317 0.88611 27392\n", " GPE 0.90096 0.91216 0.90653 101357\n", " LANGUAGE 0.87336 0.74720 0.80537 803\n", " LAW 0.91511 0.91677 0.91594 24834\n", " LOC 0.90699 0.89105 0.89895 34538\n", " MONEY 0.83930 0.84377 0.84152 30032\n", " NORP 0.88694 0.85367 0.86999 57014\n", " ORDINAL 0.82854 0.88041 0.85369 6213\n", " ORG 0.91629 0.89916 0.90764 219533\n", " OTHER 0.98521 0.98937 0.98728 3553350\n", " PAD 1.00000 1.00000 1.00000 1291289\n", " PERCENT 0.95564 0.95898 0.95731 21722\n", " PERSON 0.90445 0.93733 0.92060 101981\n", " PRODUCT 0.79770 0.72456 0.75937 11124\n", " QUANTITY 0.88216 0.86310 0.87252 11614\n", " TIME 0.84045 0.86203 0.85110 9502\n", " WORK_OF_ART 0.85491 0.64130 0.73286 13800\n", " X 0.99631 0.99466 0.99549 1351758\n", "\n", " accuracy 0.98068 7141816\n", " macro avg 0.90298 0.88251 0.89145 7141816\n", "weighted avg 0.98053 0.98068 0.98054 7141816\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### xlnet-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " ADDRESS 0.99908 0.99993 0.99950 93446\n", " CARDINAL 0.93228 0.92861 0.93044 48255\n", " DATE 0.95220 0.95546 0.95383 126548\n", " EVENT 0.90646 0.95535 0.93026 5711\n", " FAC 0.94217 0.93432 0.93823 27392\n", " GPE 0.95861 0.94860 0.95358 101357\n", " LANGUAGE 0.91076 0.99128 0.94931 803\n", " LAW 0.93475 0.96392 0.94911 24834\n", " LOC 0.92387 0.94305 0.93336 34538\n", " MONEY 0.85448 0.93027 0.89077 30032\n", " NORP 0.95467 0.92540 0.93981 57014\n", " ORDINAL 0.89995 0.95847 0.92829 6213\n", " ORG 0.94905 0.95571 0.95237 219533\n", " OTHER 0.99394 0.99254 0.99324 3553350\n", " PAD 0.99992 1.00000 0.99996 1292031\n", " PERCENT 0.97215 0.96423 0.96817 21722\n", " PERSON 0.96138 0.97204 0.96668 101981\n", " PRODUCT 0.88197 0.83028 0.85534 11124\n", " QUANTITY 0.93301 0.95695 0.94483 11614\n", " TIME 0.90852 0.91454 0.91152 9502\n", " WORK_OF_ART 0.87106 0.88457 0.87776 13800\n", " X 0.99879 0.99898 0.99889 1349384\n", "\n", " accuracy 0.98994 7140184\n", " macro avg 0.93814 0.95021 0.94388 7140184\n", "weighted avg 0.99001 0.98994 0.98996 7140184\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### alxlnet-base\n", "\n", "```text\n", " precision recall f1-score support\n", "\n", " ADDRESS 0.99949 0.99981 0.99965 93446\n", " CARDINAL 0.92765 0.91402 0.92078 48255\n", " DATE 0.95309 0.93386 0.94338 126548\n", " EVENT 0.88426 0.93241 0.90770 5711\n", " FAC 0.92367 0.92991 0.92678 27392\n", " GPE 0.93880 0.95315 0.94592 101357\n", " LANGUAGE 0.84296 0.90909 0.87478 803\n", " LAW 0.95472 0.95091 0.95281 24834\n", " LOC 0.92551 0.92953 0.92751 34538\n", " MONEY 0.86719 0.87403 0.87060 30032\n", " NORP 0.95470 0.89518 0.92398 57014\n", " ORDINAL 0.86582 0.94721 0.90469 6213\n", " ORG 0.95300 0.93138 0.94206 219533\n", " OTHER 0.99080 0.99334 0.99206 3553350\n", " PAD 0.99992 1.00000 0.99996 1292031\n", " PERCENT 0.96856 0.96851 0.96853 21722\n", " PERSON 0.94616 0.96716 0.95655 101981\n", " PRODUCT 0.87820 0.79333 0.83361 11124\n", " QUANTITY 0.94752 0.91872 0.93290 11614\n", " TIME 0.90322 0.90949 0.90635 9502\n", " WORK_OF_ART 0.88971 0.79732 0.84098 13800\n", " X 0.99883 0.99891 0.99887 1349384\n", "\n", " accuracy 0.98816 7140184\n", " macro avg 0.93244 0.92942 0.93047 7140184\n", "weighted avg 0.98810 0.98816 0.98810 7140184\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 2 }