Malay
Contents
Malay#
This tutorial is available as an IPython notebook at Malaya/example/dictionary-malay.
requirements#
Make sure you already installed,
pip3 install requests beautifulsoup4
[2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''
[3]:
import malaya
/home/ubuntu/dev/malaya/malaya/tokenizer.py:202: FutureWarning: Possible nested set at position 3361
self.tok = re.compile(r'({})'.format('|'.join(pipeline)))
/home/ubuntu/dev/malaya/malaya/tokenizer.py:202: FutureWarning: Possible nested set at position 3879
self.tok = re.compile(r'({})'.format('|'.join(pipeline)))
DBP#
Query from https://prpm.dbp.gov.my/cari1?keyword=,
def keyword_dbp(word, parse: bool = False):
"""
crawl https://prpm.dbp.gov.my/cari1?keyword= to check a word is a malay word.
Parameters
----------
word: str
parse: bool, optional (default=False)
if True, will parse using BeautifulSoup.
Returns
-------
result: Dict
"""
[4]:
malaya.dictionary.keyword_dbp('ayam')
[4]:
True
[6]:
malaya.dictionary.keyword_dbp('ayamaaaaa')
[6]:
False
[5]:
malaya.dictionary.keyword_dbp('ayam', parse = True)
[5]:
{'definisi': ['Definisi : sj ikan; ~ hutan a) Euxiphippops sextriatus; b) Pomacanthus annularis; ~ laut, Abalistes spp.\xa0(Kamus Dewan Edisi Keempat)',
'Definisi : beberapa jenis binatang (yg bentuk tubuhnya seakan-akan burung tetapi tidak pandai terbang) yg biasanya dipelihara, Gallus gallus. ~ belanda sj ayam yg besar, Meleagris gallopavo. ~ beroga (denak, hutan) sj ayam liar, Gallus bankiva. ~ biring ayam jantan yg kuning kakinya. ~ bulu balik ayam yg bulunya terbalik. ~ dara ayam betina yg hampir bertelur. ~ katik ayam yg kecil. ~ percik ayam panggang yg disaluti sos atau kuah yg dibuat drpd santan dan rempah-ratus. ~ sabung ayam yg dipelihara utk disabung. ~ serama sj ayam peliharaan yg kecil, jinak, berbulu cantik dan berkaki pendek. ~ tambatan ki orang yg dianggap hebat dan diharapkan dpt membawa kemenangan dlm sesuatu perlawanan, mis bola sepak dan bola jaring.\xa0(Kamus Pelajar Edisi Kedua)'],
'tesaurus': None}
[7]:
malaya.dictionary.keyword_dbp('ayamaaaaa', parse = True)
[7]:
False
Wiktionary#
Query from https://en.wiktionary.org/wiki/,
def keyword_wiktionary(
word,
acceptable_lang: List[str] = ['brunei malay', 'malay'],
):
"""
crawl https://en.wiktionary.org/wiki/ to check a word is a malay word.
Parameters
----------
word: str
acceptable_lang: List[str], optional (default=['brunei malay', 'malay'])
acceptable languages in wiktionary section.
Returns
-------
result: Dict
"""
[8]:
malaya.dictionary.keyword_wiktionary('ayam')
[8]:
{'brunei malay': [{'etymology': 'From Proto-Malayic *hayam, from Proto-Malayo-Polynesian *qayam.\n',
'definitions': [{'partOfSpeech': 'noun',
'text': ['ayam', 'chicken (bird)', 'chicken (meat)'],
'relatedWords': [],
'examples': []}],
'pronunciations': {'text': ['IPA: /ajam/',
'(Kedayan) IPA: /hajam/',
'Hyphenation: a‧yam'],
'audio': []}}],
'malay': [{'etymology': 'From hayam, from Proto-Malayic *hayam, from Proto-Malayo-Polynesian *qayam.\n',
'definitions': [{'partOfSpeech': 'noun',
'text': ['ayam (Jawi spelling ايم\u200e, plural ayam-ayam, informal 1st possessive ayamku, 2nd possessive ayammu, 3rd possessive ayamnya)',
'chicken (bird)',
'chicken (meat)'],
'relatedWords': [{'relationshipType': 'synonyms',
'words': ['manuk / مانوق\u200e']}],
'examples': []}],
'pronunciations': {'text': ['IPA: /ajam/', 'Rhymes: -ajam, -jam, -am'],
'audio': []}}]}
[9]:
malaya.dictionary.keyword_wiktionary('ayamaaaa')
[9]:
{'brunei malay': [{'etymology': '',
'definitions': [],
'pronunciations': {'text': [], 'audio': []}}],
'malay': [{'etymology': '',
'definitions': [],
'pronunciations': {'text': [], 'audio': []}}]}
Check a word is a malay word#
def is_malay(word, stemmer=None):
"""
Check a word is a malay word.
Parameters
----------
word: str
stemmer: Callable, optional (default=None)
a Callable object, must have `stem_word` method.
Returns
-------
result: bool
"""
[10]:
malaya.dictionary.is_malay('ayam')
[10]:
True
[13]:
malaya.dictionary.is_malay('sakitkan')
[13]:
True
[12]:
malaya.dictionary.is_malay('tersakitkan')
[12]:
False
[14]:
stemmer = malaya.stem.sastrawi()
[15]:
malaya.dictionary.is_malay('tersakitkan', stemmer = stemmer)
[15]:
True