Source code for malaya.rumi_jawi
from malaya.supervised import transformer as load_transformer
from malaya.model.tf import RumiJawi
from malaya.function import describe_availability
from herpetologist import check_type
from typing import List
import logging
logger = logging.getLogger(__name__)
_transformer_availability = {
'small': {
'Size (MB)': 42.7,
'Quantized Size (MB)': 13.1,
'CER': 0.0006167541656054869,
'WER': 0.0019283112815117458,
'Suggested length': 256,
},
'base': {
'Size (MB)': 234,
'Quantized Size (MB)': 63.8,
'CER': 0.00012427460315431668,
'WER': 0.0004379943010206167,
'Suggested length': 256,
},
}
[docs]def available_transformer():
"""
List available transformer models.
"""
logger.info('tested on first 10k Rumi-Jawi test set, dataset at https://huggingface.co/datasets/mesolitica/rumi-jawi')
return describe_availability(_transformer_availability)
[docs]def transformer(model='base', quantized: bool = False, **kwargs):
"""
Load transformer encoder-decoder model to convert rumi to jawi.
Parameters
----------
model: str, optional (default='base')
Check available models at `malaya.rumi_jawi.available_transformer()`.
quantized: bool, optional (default=False)
if True, will load 8-bit quantized model.
Quantized model not necessary faster, totally depends on the machine.
Returns
-------
result: malaya.model.tf.RumiJawi class
"""
model = model.lower()
if model not in _transformer_availability:
raise ValueError(
'model not supported, please check supported models from `malaya.rumi_jawi.available_transformer()`.'
)
return load_transformer.load(
module='rumi-jawi',
model=model,
encoder='yttm',
model_class=RumiJawi,
quantized=quantized,
**kwargs,
)