Source code for malaya.spelling_correction.spylls

import re
from functools import partial
from malaya.path import PATH_NGRAM, S3_PATH_NGRAM
from malaya.function import check_file
from malaya.dictionary import is_english, is_malay
from malaya.text.rules import rules_normalizer
from malaya.text.tatabahasa import stopword_tatabahasa
from malaya.spelling_correction.probability import Spell
from typing import List


[docs]class Spylls(Spell): def __init__(self, dictionary): self._dictionary = dictionary
[docs] def correct(self, word: str): """ Correct a word within a text, returning the corrected word. Parameters ---------- word: str Returns ------- result: str """ if is_english(word): return word if is_malay(word): return word if word in stopword_tatabahasa: return word if word in rules_normalizer: return rules_normalizer[word] else: r = self.edit_candidates(word=word)[:1] if len(r): return r[0] else: return word
[docs] def edit_candidates(self, word: str): """ Generate candidates given a word. Parameters ---------- word: str Returns ------- result: List[str] """ return list(self._dictionary.suggest(word))
[docs]def load(model: str = 'libreoffice-pejam', **kwargs): """ Load a spylls Spell Corrector for Malay. Parameters ---------- model: str, optional (default='libreoffice-pejam') Model spelling correction supported. Allowed values: * ``'libreoffice-pejam'`` - from LibreOffice pEJAm, https://extensions.libreoffice.org/en/extensions/show/3868 Returns ------- result: malaya.spelling_correction.spylls.Spylls class """ try: from spylls.hunspell import Dictionary except BaseException: raise ModuleNotFoundError( 'spylls not installed. Please install it and try again.' ) model = model.lower() supported_models = ['libreoffice-pejam'] if model not in supported_models: raise ValueError( f'model not supported, available models are {str(supported_models)}' ) path = check_file(PATH_NGRAM['spylls'][model], S3_PATH_NGRAM['spylls'][model], **kwargs) try: dictionary = Dictionary.from_zip(path['model']) except BaseException: raise Exception('failed to load spylls model, please try clear cache or rerun again.') return Spylls(dictionary=dictionary)