Source code for malaya.entity

from malaya.text.entity import EntityRegex
from malaya.supervised.huggingface import load
from malaya.torch_model.huggingface import Tagging

label = {
    'OTHER': 0,
    'law': 1,
    'location': 2,
    'organization': 3,
    'person': 4,
    'quantity': 5,
    'time': 6,
    'event': 7,
}

available_huggingface = {
    'mesolitica/ner-t5-tiny-standard-bahasa-cased': {
        'Size (MB)': 84.7,
        'law': {
            'precision': 0.9642625081221572,
            'recall': 0.9598965071151359,
            'f1': 0.9620745542949757,
            'number': 1546
        },
        'person': {
            'precision': 0.9673319980661648,
            'recall': 0.971424608128728,
            'f1': 0.9693739834584906,
            'number': 14418
        },
        'time': {
            'precision': 0.9796992481203007,
            'recall': 0.983148893360161,
            'f1': 0.9814210394175245,
            'number': 3976
        },
        'location': {
            'precision': 0.966455899689208,
            'recall': 0.9753406878650227,
            'f1': 0.970877967379017,
            'number': 9246
        },
        'organization': {
            'precision': 0.9308265342319971,
            'recall': 0.9475204622051036,
            'f1': 0.9390993140471219,
            'number': 8308
        },
        'quantity': {
            'precision': 0.9824689554419284,
            'recall': 0.9853479853479854,
            'f1': 0.9839063643013899,
            'number': 2730
        },
        'event': {
            'precision': 0.8535980148883374,
            'recall': 0.8973913043478261,
            'f1': 0.8749470114455278,
            'number': 1150
        },
        'overall_precision': 0.9585080133195985,
        'overall_recall': 0.9670566055977183,
        'overall_f1': 0.9627633336140621,
        'overall_accuracy': 0.9951433495221682
    },
    'mesolitica/ner-t5-small-standard-bahasa-cased': {
        'Size (MB)': 141,
        'law': {
            'precision': 0.9320327249842668,
            'recall': 0.9579560155239327,
            'f1': 0.9448165869218501,
            'number': 1546
        },
        'person': {
            'precision': 0.9745341614906833,
            'recall': 0.9794007490636704,
            'f1': 0.976961394769614,
            'number': 14418
        },
        'time': {
            'precision': 0.9583539910758553,
            'recall': 0.9723340040241448,
            'f1': 0.9652933832709114,
            'number': 3976
        },
        'location': {
            'precision': 0.9709677419354839,
            'recall': 0.9766385463984426,
            'f1': 0.9737948883856357,
            'number': 9246
        },
        'organization': {
            'precision': 0.9493625210488333,
            'recall': 0.9500481463649495,
            'f1': 0.9497052099627,
            'number': 8308
        },
        'quantity': {
            'precision': 0.9823008849557522,
            'recall': 0.9758241758241758,
            'f1': 0.9790518191841234,
            'number': 2730
        },
        'event': {
            'precision': 0.8669991687448046,
            'recall': 0.9069565217391304,
            'f1': 0.88652783680408,
            'number': 1150
        },
        'overall_precision': 0.9629220498535133,
        'overall_recall': 0.9691593754531832,
        'overall_f1': 0.9660306446949986,
        'overall_accuracy': 0.9953954840983863
    },
}

describe = [
    {'Tag': 'OTHER', 'Description': 'other'},
    {
        'Tag': 'law',
        'Description': 'law, regulation, related law documents, documents, etc',
    },
    {'Tag': 'location', 'Description': 'location, place'},
    {
        'Tag': 'organization',
        'Description': 'organization, company, government, facilities, etc',
    },
    {
        'Tag': 'person',
        'Description': 'person, group of people, believes, unique arts (eg; food, drink), etc',
    },
    {'Tag': 'quantity', 'Description': 'numbers, quantity'},
    {'Tag': 'time', 'Description': 'date, day, time, etc'},
    {'Tag': 'event', 'Description': 'unique event happened, etc'},
]


[docs]def huggingface( model: str = 'mesolitica/ner-t5-small-standard-bahasa-cased', force_check: bool = True, **kwargs, ): """ Load HuggingFace model to Entity Recognition. Parameters ---------- model: str, optional (default='mesolitica/ner-t5-small-standard-bahasa-cased') Check available models at `malaya.entity.available_huggingface`. force_check: bool, optional (default=True) Force check model one of malaya model. Set to False if you have your own huggingface model. Returns ------- result: malaya.torch_model.huggingface.Tagging """ return load( model=model, class_model=Tagging, available_huggingface=available_huggingface, force_check=force_check, path=__name__, **kwargs, )
[docs]def general_entity(model=None): """ Load Regex based general entities tagging along with another supervised entity tagging model. Parameters ---------- model: object model must have `predict` method. Make sure the `predict` method returned [(string, label), (string, label)]. Returns ------- result: malaya.text.entity.EntityRegex class """ if not hasattr(model, 'predict') and model is not None: raise ValueError('model must have `predict` method') return EntityRegex(model=model)