Source code for malaya.stem

from malaya.model.stem import Naive, Sastrawi
from malaya.supervised.rnn import load
from malaya.torch_model.rnn import Stem

available_huggingface = {
    'mesolitica/stem-lstm-512': {
        'Size (MB)': 35.2,
        'hidden size': 512,
        'CER': 0.02549779186652238,
        'WER': 0.05448552235248484,
    },
}

info = """
Trained on train set and tested on test set, https://github.com/huseinzol05/malay-dataset/tree/master/normalization/stemmer
""".strip()


[docs]def naive():
    """
    Load stemming model using startswith and endswith naively using regex patterns.

    Returns
    -------
    result : malaya.stem.Naive class
    """

    return Naive()


[docs]def sastrawi():
    """
    Load stemming model using Sastrawi, this also include lemmatization.

    Returns
    -------
    result: malaya.stem.Sastrawi class
    """

    try:
        from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
    except BaseException:
        raise ModuleNotFoundError(
            'PySastrawi not installed. Please install it by `pip install PySastrawi` and try again.'
        )
    return Sastrawi(StemmerFactory())


[docs]def huggingface(
    model: str = 'mesolitica/stem-lstm-512',
    force_check: bool = True,
    **kwargs,
):
    """
    Load HuggingFace model to stem and lemmatization.

    Parameters
    ----------
    model: str, optional (default='mesolitica/stem-lstm-512')
        Check available models at `malaya.stem.available_huggingface`.
    force_check: bool, optional (default=True)
        Force check model one of malaya model.
        Set to False if you have your own huggingface model.

    Returns
    -------
    result: malaya.torch_model.rnn.Stem
    """

    return load(
        model=model,
        class_model=Stem,
        available_huggingface=available_huggingface,
        force_check=force_check,
        path=__name__,
        **kwargs,
    )