{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# GPU Environment" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "\n", "This tutorial is available as an IPython notebook at [Malaya/example/gpu-environment](https://github.com/huseinzol05/Malaya/tree/master/example/gpu-environment).\n", " \n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.9 s, sys: 3.7 s, total: 6.6 s\n", "Wall time: 2.09 s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/husein/dev/malaya/malaya/tokenizer.py:214: FutureWarning: Possible nested set at position 3397\n", " self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n", "/home/husein/dev/malaya/malaya/tokenizer.py:214: FutureWarning: Possible nested set at position 3927\n", " self.tok = re.compile(r'({})'.format('|'.join(pipeline)))\n" ] } ], "source": [ "%%time\n", "\n", "import malaya\n", "import logging\n", "logging.basicConfig(level = logging.INFO)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### List available GPU\n", "\n", "**You must install Pytorch GPU version first to enable GPU hardware acceleration**." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "\n", "torch.cuda.device_count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Run model inside GPU\n", "\n", "Once you initiate `cuda` method from pytorch object, all inputs will auto cast to `cuda`." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mesolitica/translation-t5-tiny-standard-bahasa-cased': {'Size (MB)': 139,\n", " 'Suggested length': 1536,\n", " 'en-ms chrF2++': 65.91,\n", " 'ms-en chrF2++': 61.3,\n", " 'ind-ms chrF2++': 58.15,\n", " 'jav-ms chrF2++': 49.33,\n", " 'pasar ms-ms chrF2++': 58.46,\n", " 'pasar ms-en chrF2++': 55.76,\n", " 'manglish-ms chrF2++': 51.04,\n", " 'manglish-en chrF2++': 52.2,\n", " 'from lang': ['en', 'ms', 'ind', 'jav', 'bjn', 'manglish', 'pasar ms'],\n", " 'to lang': ['en', 'ms']},\n", " 'mesolitica/translation-t5-small-standard-bahasa-cased': {'Size (MB)': 242,\n", " 'Suggested length': 1536,\n", " 'en-ms chrF2++': 67.37,\n", " 'ms-en chrF2++': 63.79,\n", " 'ind-ms chrF2++': 58.09,\n", " 'jav-ms chrF2++': 52.11,\n", " 'pasar ms-ms chrF2++': 62.49,\n", " 'pasar ms-en chrF2++': 60.77,\n", " 'manglish-ms chrF2++': 52.84,\n", " 'manglish-en chrF2++': 53.65,\n", " 'from lang': ['en', 'ms', 'ind', 'jav', 'bjn', 'manglish', 'pasar ms'],\n", " 'to lang': ['en', 'ms']},\n", " 'mesolitica/translation-t5-base-standard-bahasa-cased': {'Size (MB)': 892,\n", " 'Suggested length': 1536,\n", " 'en-ms chrF2++': 67.62,\n", " 'ms-en chrF2++': 64.41,\n", " 'ind-ms chrF2++': 59.25,\n", " 'jav-ms chrF2++': 52.86,\n", " 'pasar ms-ms chrF2++': 62.99,\n", " 'pasar ms-en chrF2++': 62.06,\n", " 'manglish-ms chrF2++': 54.4,\n", " 'manglish-en chrF2++': 54.14,\n", " 'from lang': ['en', 'ms', 'ind', 'jav', 'bjn', 'manglish', 'pasar ms'],\n", " 'to lang': ['en', 'ms']},\n", " 'mesolitica/translation-nanot5-tiny-malaysian-cased': {'Size (MB)': 205,\n", " 'Suggested length': 2048,\n", " 'en-ms chrF2++': 63.61,\n", " 'ms-en chrF2++': 59.55,\n", " 'ind-ms chrF2++': 56.38,\n", " 'jav-ms chrF2++': 47.68,\n", " 'mandarin-ms chrF2++': 36.61,\n", " 'mandarin-en chrF2++': 39.78,\n", " 'pasar ms-ms chrF2++': 58.74,\n", " 'pasar ms-en chrF2++': 54.87,\n", " 'manglish-ms chrF2++': 50.76,\n", " 'manglish-en chrF2++': 53.16,\n", " 'from lang': ['en',\n", " 'ms',\n", " 'ind',\n", " 'jav',\n", " 'bjn',\n", " 'manglish',\n", " 'pasar ms',\n", " 'mandarin',\n", " 'pasar mandarin'],\n", " 'to lang': ['en', 'ms']},\n", " 'mesolitica/translation-nanot5-small-malaysian-cased': {'Size (MB)': 358,\n", " 'Suggested length': 2048,\n", " 'en-ms chrF2++': 66.98,\n", " 'ms-en chrF2++': 63.52,\n", " 'ind-ms chrF2++': 58.1,\n", " 'jav-ms chrF2++': 51.55,\n", " 'mandarin-ms chrF2++': 46.09,\n", " 'mandarin-en chrF2++': 44.13,\n", " 'pasar ms-ms chrF2++': 63.2,\n", " 'pasar ms-en chrF2++': 59.78,\n", " 'manglish-ms chrF2++': 54.09,\n", " 'manglish-en chrF2++': 55.27,\n", " 'from lang': ['en',\n", " 'ms',\n", " 'ind',\n", " 'jav',\n", " 'bjn',\n", " 'manglish',\n", " 'pasar ms',\n", " 'mandarin',\n", " 'pasar mandarin'],\n", " 'to lang': ['en', 'ms']},\n", " 'mesolitica/translation-nanot5-base-malaysian-cased': {'Size (MB)': 990,\n", " 'Suggested length': 2048,\n", " 'en-ms chrF2++': 67.87,\n", " 'ms-en chrF2++': 64.79,\n", " 'ind-ms chrF2++': 56.98,\n", " 'jav-ms chrF2++': 51.21,\n", " 'mandarin-ms chrF2++': 47.39,\n", " 'mandarin-en chrF2++': 48.78,\n", " 'pasar ms-ms chrF2++': 65.06,\n", " 'pasar ms-en chrF2++': 64.03,\n", " 'manglish-ms chrF2++': 57.91,\n", " 'manglish-en chrF2++': 55.66,\n", " 'from lang': ['en',\n", " 'ms',\n", " 'ind',\n", " 'jav',\n", " 'bjn',\n", " 'manglish',\n", " 'pasar ms',\n", " 'mandarin',\n", " 'pasar mandarin'],\n", " 'to lang': ['en', 'ms']}}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "malaya.translation.available_huggingface" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading the tokenizer from the `special_tokens_map.json` and the `added_tokens.json` will be removed in `transformers 5`, it is kept for forward compatibility, but it is recommended to update your `tokenizer_config.json` by uploading it again. You will see the new `added_tokens_decoder` attribute that will store the relevant information.\n", "You are using the default legacy behaviour of the . If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f6a79ba00cb241aca683f668f1f27497", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)neration_config.json: 0%| | 0.00/164 [00:00