{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sentiment bias towards countries" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "\n", "This tutorial is available as an IPython notebook at [Malaya/example/sentiment-bias-towards-countries](https://github.com/huseinzol05/Malaya/tree/master/example/sentiment-bias-towards-countries).\n", " \n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "\n", "This module trained on both standard and local (included social media) language structures, so it is save to use for both.\n", " \n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.9 s, sys: 3.84 s, total: 6.74 s\n", "Wall time: 1.97 s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/husein/dev/malaya/malaya/tokenizer.py:214: FutureWarning: Possible nested set at position 3397\n", " self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n", "/home/husein/dev/malaya/malaya/tokenizer.py:214: FutureWarning: Possible nested set at position 3927\n", " self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n" ] } ], "source": [ "%%time\n", "import malaya" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook simply want to test the bias of sentiment model given a text,\n", "\n", "`movie ni dirakam di `." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] } ], "source": [ "model = malaya.sentiment.huggingface()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'negative': 0.971319854259491,\n", " 'neutral': 0.019404958933591843,\n", " 'positive': 0.009275294840335846},\n", " {'negative': 0.8590973615646362,\n", " 'neutral': 0.040735069662332535,\n", " 'positive': 0.10016759485006332}]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.predict_proba(['movie ni dirakam di Malaysia',\n", " 'movie ni dirakam di Israel'])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# !wget https://datahub.io/core/geo-countries/r/countries.geojson" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "import json\n", "\n", "with open('countries.geojson') as fopen:\n", " countries_json = json.load(fopen)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████| 255/255 [00:01<00:00, 140.87it/s]\n" ] } ], "source": [ "from tqdm import tqdm\n", "\n", "reviews = []\n", "country_names = []\n", "sentiments = []\n", "for feature in tqdm(countries_json['features']):\n", " country_name = feature['properties']['ADMIN']\n", " country_names.append(country_name)\n", " text = f'movie ni dirakam di {country_name}'\n", " reviews.append(text)\n", " sentiments.append(model.predict_proba([text])[0]['positive'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [], "source": [ "import pandas as pd\n", "pd.set_option('display.max_rows', None)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryPositive class probability
0Aruba0.039369
1Afghanistan0.169634
2Angola0.144256
3Anguilla0.356770
4Albania0.023863
5Aland0.044475
6Andorra0.130069
7United Arab Emirates0.050178
8Argentina0.227486
9Armenia0.029338
10American Samoa0.616004
11Antarctica0.009058
12Ashmore and Cartier Islands0.079729
13French Southern and Antarctic Lands0.027264
14Antigua and Barbuda0.105469
15Australia0.067188
16Austria0.616803
17Azerbaijan0.030310
18Burundi0.038826
19Belgium0.116550
20Benin0.073243
21Burkina Faso0.028318
22Bangladesh0.079000
23Bulgaria0.655491
24Bahrain0.036841
25The Bahamas0.012471
26Bosnia and Herzegovina0.106377
27Bajo Nuevo Bank (Petrel Is.)0.887322
28Saint Barthelemy0.153432
29Belarus0.038862
30Belize0.097082
31Bermuda0.047066
32Bolivia0.202820
33Brazil0.123829
34Barbados0.373711
35Brunei0.380603
36Bhutan0.974151
37Botswana0.064480
38Central African Republic0.412737
39Canada0.034126
40Switzerland0.013300
41Chile0.455905
42China0.038819
43Ivory Coast0.060523
44Clipperton Island0.941178
45Cameroon0.302088
46Cyprus No Mans Area0.005227
47Democratic Republic of the Congo0.989785
48Republic of Congo0.992472
49Cook Islands0.781634
50Colombia0.078325
51Comoros0.552591
52Cape Verde0.026867
53Costa Rica0.146759
54Coral Sea Islands0.143805
55Cuba0.034538
56Curaçao0.142810
57Cayman Islands0.140016
58Northern Cyprus0.078509
59Cyprus0.048920
60Czech Republic0.528698
61Germany0.039214
62Djibouti0.038966
63Dominica0.185570
64Denmark0.431805
65Dominican Republic0.201829
66Algeria0.034873
67Ecuador0.128374
68Egypt0.246791
69Eritrea0.516720
70Dhekelia Sovereign Base Area0.002587
71Spain0.069895
72Estonia0.003157
73Ethiopia0.049434
74Finland0.229206
75Fiji0.033329
76Falkland Islands0.030577
77France0.042942
78Faroe Islands0.098745
79Federated States of Micronesia0.000912
80Gabon0.463258
81United Kingdom0.009613
82Georgia0.028626
83Guernsey0.167616
84Ghana0.208999
85Gibraltar0.013775
86Guinea0.018618
87Gambia0.103650
88Guinea Bissau0.022831
89Equatorial Guinea0.014294
90Greece0.296132
91Grenada0.140380
92Greenland0.377565
93Guatemala0.039723
94Guam0.046184
95Guyana0.091745
96Hong Kong S.A.R.0.135367
97Heard Island and McDonald Islands0.747990
98Honduras0.102065
99Croatia0.055273
100Haiti0.042153
101Hungary0.667065
102Indonesia0.211004
103Isle of Man0.166882
104India0.089701
105Indian Ocean Territories0.012646
106British Indian Ocean Territory0.002079
107Ireland0.367753
108Iran0.036559
109Iraq0.168938
110Iceland0.039860
111Israel0.100167
112Italy0.046229
113Jamaica0.064513
114Jersey0.146864
115Jordan0.043022
116Japan0.042664
117Baykonur Cosmodrome0.060239
118Siachen Glacier0.778532
119Kazakhstan0.082745
120Kenya0.210720
121Kyrgyzstan0.386843
122Cambodia0.133686
123Kiribati0.095092
124Saint Kitts and Nevis0.615573
125South Korea0.859220
126Kosovo0.308428
127Kuwait0.040864
128Laos0.042060
129Lebanon0.096411
130Liberia0.240108
131Libya0.083740
132Saint Lucia0.506887
133Liechtenstein0.092944
134Sri Lanka0.083067
135Lesotho0.114256
136Lithuania0.118200
137Luxembourg0.030526
138Latvia0.166041
139Macao S.A.R0.054484
140Saint Martin0.480976
141Morocco0.014966
142Monaco0.090793
143Moldova0.038843
144Madagascar0.699353
145Maldives0.086702
146Mexico0.013596
147Marshall Islands0.145141
148Macedonia0.016141
149Mali0.028769
150Malta0.049830
151Myanmar0.168810
152Montenegro0.080645
153Mongolia0.050451
154Northern Mariana Islands0.083891
155Mozambique0.052984
156Mauritania0.047578
157Montserrat0.024121
158Mauritius0.066048
159Malawi0.208873
160Malaysia0.009275
161Namibia0.020265
162New Caledonia0.064238
163Niger0.020084
164Norfolk Island0.247524
165Nigeria0.067220
166Nicaragua0.053066
167Niue0.058268
168Netherlands0.029782
169Norway0.661565
170Nepal0.530866
171Nauru0.046705
172New Zealand0.063832
173Oman0.056455
174Pakistan0.088653
175Panama0.169475
176Pitcairn Islands0.282857
177Peru0.051750
178Spratly Islands0.026154
179Philippines0.087797
180Palau0.176909
181Papua New Guinea0.026789
182Poland0.062732
183Puerto Rico0.222873
184North Korea0.315357
185Portugal0.122907
186Paraguay0.141607
187Palestine0.265100
188French Polynesia0.061339
189Qatar0.097631
190Romania0.743275
191Russia0.006057
192Rwanda0.123763
193Western Sahara0.054943
194Saudi Arabia0.188411
195Scarborough Reef0.069580
196Sudan0.145442
197South Sudan0.182804
198Senegal0.041048
199Serranilla Bank0.404349
200Singapore0.266856
201South Georgia and South Sandwich Islands0.149831
202Saint Helena0.428387
203Solomon Islands0.344656
204Sierra Leone0.100867
205El Salvador0.176265
206San Marino0.038682
207Somaliland0.040974
208Somalia0.044995
209Saint Pierre and Miquelon0.699458
210Republic of Serbia0.042408
211Sao Tome and Principe0.915933
212Suriname0.041542
213Slovakia0.113862
214Slovenia0.753359
215Sweden0.001290
216Swaziland0.288331
217Sint Maarten0.081521
218Seychelles0.126393
219Syria0.028406
220Turks and Caicos Islands0.012284
221Chad0.067085
222Togo0.065655
223Thailand0.034735
224Tajikistan0.023857
225Turkmenistan0.006858
226East Timor0.038230
227Tonga0.345105
228Trinidad and Tobago0.020336
229Tunisia0.174806
230Turkey0.047196
231Tuvalu0.041594
232Taiwan0.121632
233United Republic of Tanzania0.922501
234Uganda0.066297
235Ukraine0.088036
236United States Minor Outlying Islands0.061749
237Uruguay0.025120
238United States of America0.098839
239US Naval Base Guantanamo Bay0.013662
240Uzbekistan0.090956
241Vatican0.024224
242Saint Vincent and the Grenadines0.798123
243Venezuela0.164420
244British Virgin Islands0.070114
245United States Virgin Islands0.022017
246Vietnam0.051927
247Vanuatu0.071065
248Wallis and Futuna0.026419
249Akrotiri Sovereign Base Area0.003673
250Samoa0.153828
251Yemen0.044515
252South Africa0.435813
253Zambia0.051639
254Zimbabwe0.044964
\n", "
" ], "text/plain": [ " Country Positive class probability\n", "0 Aruba 0.039369\n", "1 Afghanistan 0.169634\n", "2 Angola 0.144256\n", "3 Anguilla 0.356770\n", "4 Albania 0.023863\n", "5 Aland 0.044475\n", "6 Andorra 0.130069\n", "7 United Arab Emirates 0.050178\n", "8 Argentina 0.227486\n", "9 Armenia 0.029338\n", "10 American Samoa 0.616004\n", "11 Antarctica 0.009058\n", "12 Ashmore and Cartier Islands 0.079729\n", "13 French Southern and Antarctic Lands 0.027264\n", "14 Antigua and Barbuda 0.105469\n", "15 Australia 0.067188\n", "16 Austria 0.616803\n", "17 Azerbaijan 0.030310\n", "18 Burundi 0.038826\n", "19 Belgium 0.116550\n", "20 Benin 0.073243\n", "21 Burkina Faso 0.028318\n", "22 Bangladesh 0.079000\n", "23 Bulgaria 0.655491\n", "24 Bahrain 0.036841\n", "25 The Bahamas 0.012471\n", "26 Bosnia and Herzegovina 0.106377\n", "27 Bajo Nuevo Bank (Petrel Is.) 0.887322\n", "28 Saint Barthelemy 0.153432\n", "29 Belarus 0.038862\n", "30 Belize 0.097082\n", "31 Bermuda 0.047066\n", "32 Bolivia 0.202820\n", "33 Brazil 0.123829\n", "34 Barbados 0.373711\n", "35 Brunei 0.380603\n", "36 Bhutan 0.974151\n", "37 Botswana 0.064480\n", "38 Central African Republic 0.412737\n", "39 Canada 0.034126\n", "40 Switzerland 0.013300\n", "41 Chile 0.455905\n", "42 China 0.038819\n", "43 Ivory Coast 0.060523\n", "44 Clipperton Island 0.941178\n", "45 Cameroon 0.302088\n", "46 Cyprus No Mans Area 0.005227\n", "47 Democratic Republic of the Congo 0.989785\n", "48 Republic of Congo 0.992472\n", "49 Cook Islands 0.781634\n", "50 Colombia 0.078325\n", "51 Comoros 0.552591\n", "52 Cape Verde 0.026867\n", "53 Costa Rica 0.146759\n", "54 Coral Sea Islands 0.143805\n", "55 Cuba 0.034538\n", "56 Curaçao 0.142810\n", "57 Cayman Islands 0.140016\n", "58 Northern Cyprus 0.078509\n", "59 Cyprus 0.048920\n", "60 Czech Republic 0.528698\n", "61 Germany 0.039214\n", "62 Djibouti 0.038966\n", "63 Dominica 0.185570\n", "64 Denmark 0.431805\n", "65 Dominican Republic 0.201829\n", "66 Algeria 0.034873\n", "67 Ecuador 0.128374\n", "68 Egypt 0.246791\n", "69 Eritrea 0.516720\n", "70 Dhekelia Sovereign Base Area 0.002587\n", "71 Spain 0.069895\n", "72 Estonia 0.003157\n", "73 Ethiopia 0.049434\n", "74 Finland 0.229206\n", "75 Fiji 0.033329\n", "76 Falkland Islands 0.030577\n", "77 France 0.042942\n", "78 Faroe Islands 0.098745\n", "79 Federated States of Micronesia 0.000912\n", "80 Gabon 0.463258\n", "81 United Kingdom 0.009613\n", "82 Georgia 0.028626\n", "83 Guernsey 0.167616\n", "84 Ghana 0.208999\n", "85 Gibraltar 0.013775\n", "86 Guinea 0.018618\n", "87 Gambia 0.103650\n", "88 Guinea Bissau 0.022831\n", "89 Equatorial Guinea 0.014294\n", "90 Greece 0.296132\n", "91 Grenada 0.140380\n", "92 Greenland 0.377565\n", "93 Guatemala 0.039723\n", "94 Guam 0.046184\n", "95 Guyana 0.091745\n", "96 Hong Kong S.A.R. 0.135367\n", "97 Heard Island and McDonald Islands 0.747990\n", "98 Honduras 0.102065\n", "99 Croatia 0.055273\n", "100 Haiti 0.042153\n", "101 Hungary 0.667065\n", "102 Indonesia 0.211004\n", "103 Isle of Man 0.166882\n", "104 India 0.089701\n", "105 Indian Ocean Territories 0.012646\n", "106 British Indian Ocean Territory 0.002079\n", "107 Ireland 0.367753\n", "108 Iran 0.036559\n", "109 Iraq 0.168938\n", "110 Iceland 0.039860\n", "111 Israel 0.100167\n", "112 Italy 0.046229\n", "113 Jamaica 0.064513\n", "114 Jersey 0.146864\n", "115 Jordan 0.043022\n", "116 Japan 0.042664\n", "117 Baykonur Cosmodrome 0.060239\n", "118 Siachen Glacier 0.778532\n", "119 Kazakhstan 0.082745\n", "120 Kenya 0.210720\n", "121 Kyrgyzstan 0.386843\n", "122 Cambodia 0.133686\n", "123 Kiribati 0.095092\n", "124 Saint Kitts and Nevis 0.615573\n", "125 South Korea 0.859220\n", "126 Kosovo 0.308428\n", "127 Kuwait 0.040864\n", "128 Laos 0.042060\n", "129 Lebanon 0.096411\n", "130 Liberia 0.240108\n", "131 Libya 0.083740\n", "132 Saint Lucia 0.506887\n", "133 Liechtenstein 0.092944\n", "134 Sri Lanka 0.083067\n", "135 Lesotho 0.114256\n", "136 Lithuania 0.118200\n", "137 Luxembourg 0.030526\n", "138 Latvia 0.166041\n", "139 Macao S.A.R 0.054484\n", "140 Saint Martin 0.480976\n", "141 Morocco 0.014966\n", "142 Monaco 0.090793\n", "143 Moldova 0.038843\n", "144 Madagascar 0.699353\n", "145 Maldives 0.086702\n", "146 Mexico 0.013596\n", "147 Marshall Islands 0.145141\n", "148 Macedonia 0.016141\n", "149 Mali 0.028769\n", "150 Malta 0.049830\n", "151 Myanmar 0.168810\n", "152 Montenegro 0.080645\n", "153 Mongolia 0.050451\n", "154 Northern Mariana Islands 0.083891\n", "155 Mozambique 0.052984\n", "156 Mauritania 0.047578\n", "157 Montserrat 0.024121\n", "158 Mauritius 0.066048\n", "159 Malawi 0.208873\n", "160 Malaysia 0.009275\n", "161 Namibia 0.020265\n", "162 New Caledonia 0.064238\n", "163 Niger 0.020084\n", "164 Norfolk Island 0.247524\n", "165 Nigeria 0.067220\n", "166 Nicaragua 0.053066\n", "167 Niue 0.058268\n", "168 Netherlands 0.029782\n", "169 Norway 0.661565\n", "170 Nepal 0.530866\n", "171 Nauru 0.046705\n", "172 New Zealand 0.063832\n", "173 Oman 0.056455\n", "174 Pakistan 0.088653\n", "175 Panama 0.169475\n", "176 Pitcairn Islands 0.282857\n", "177 Peru 0.051750\n", "178 Spratly Islands 0.026154\n", "179 Philippines 0.087797\n", "180 Palau 0.176909\n", "181 Papua New Guinea 0.026789\n", "182 Poland 0.062732\n", "183 Puerto Rico 0.222873\n", "184 North Korea 0.315357\n", "185 Portugal 0.122907\n", "186 Paraguay 0.141607\n", "187 Palestine 0.265100\n", "188 French Polynesia 0.061339\n", "189 Qatar 0.097631\n", "190 Romania 0.743275\n", "191 Russia 0.006057\n", "192 Rwanda 0.123763\n", "193 Western Sahara 0.054943\n", "194 Saudi Arabia 0.188411\n", "195 Scarborough Reef 0.069580\n", "196 Sudan 0.145442\n", "197 South Sudan 0.182804\n", "198 Senegal 0.041048\n", "199 Serranilla Bank 0.404349\n", "200 Singapore 0.266856\n", "201 South Georgia and South Sandwich Islands 0.149831\n", "202 Saint Helena 0.428387\n", "203 Solomon Islands 0.344656\n", "204 Sierra Leone 0.100867\n", "205 El Salvador 0.176265\n", "206 San Marino 0.038682\n", "207 Somaliland 0.040974\n", "208 Somalia 0.044995\n", "209 Saint Pierre and Miquelon 0.699458\n", "210 Republic of Serbia 0.042408\n", "211 Sao Tome and Principe 0.915933\n", "212 Suriname 0.041542\n", "213 Slovakia 0.113862\n", "214 Slovenia 0.753359\n", "215 Sweden 0.001290\n", "216 Swaziland 0.288331\n", "217 Sint Maarten 0.081521\n", "218 Seychelles 0.126393\n", "219 Syria 0.028406\n", "220 Turks and Caicos Islands 0.012284\n", "221 Chad 0.067085\n", "222 Togo 0.065655\n", "223 Thailand 0.034735\n", "224 Tajikistan 0.023857\n", "225 Turkmenistan 0.006858\n", "226 East Timor 0.038230\n", "227 Tonga 0.345105\n", "228 Trinidad and Tobago 0.020336\n", "229 Tunisia 0.174806\n", "230 Turkey 0.047196\n", "231 Tuvalu 0.041594\n", "232 Taiwan 0.121632\n", "233 United Republic of Tanzania 0.922501\n", "234 Uganda 0.066297\n", "235 Ukraine 0.088036\n", "236 United States Minor Outlying Islands 0.061749\n", "237 Uruguay 0.025120\n", "238 United States of America 0.098839\n", "239 US Naval Base Guantanamo Bay 0.013662\n", "240 Uzbekistan 0.090956\n", "241 Vatican 0.024224\n", "242 Saint Vincent and the Grenadines 0.798123\n", "243 Venezuela 0.164420\n", "244 British Virgin Islands 0.070114\n", "245 United States Virgin Islands 0.022017\n", "246 Vietnam 0.051927\n", "247 Vanuatu 0.071065\n", "248 Wallis and Futuna 0.026419\n", "249 Akrotiri Sovereign Base Area 0.003673\n", "250 Samoa 0.153828\n", "251 Yemen 0.044515\n", "252 South Africa 0.435813\n", "253 Zambia 0.051639\n", "254 Zimbabwe 0.044964" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame({'Country': country_names,\n", " 'Positive class probability': sentiments})\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }