Source code for malaya.word2num

from __future__ import print_function, unicode_literals

malaysian_number_system = {
    'kosong': 0,
    'satu': 1,
    'dua': 2,
    'tiga': 3,
    'empat': 4,
    'lima': 5,
    'enam': 6,
    'tujuh': 7,
    'lapan': 8,
    'sembilan': 9,
    'sepuluh': 10,
    'seribu': 1000,
    'sejuta': 1000000,
    'seratus': 100,
    'sebelas': 11,
    'ratus': 100,
    'ribu': 1000,
    'juta': 1000000,
    'bilion': 1000000000,
    'perpuluhan': '.',
    'negatif': -1,
    'belas': 10,
    'puluh': 10,
    'pertama': 1,
}

decimal_words = [
    'kosong',
    'satu',
    'dua',
    'tiga',
    'empat',
    'lima',
    'enam',
    'tujuh',
    'lapan',
    'sembilan',
]


def _get_decimal_sum(decimal_digit_words):
    decimal_number_str = []
    for dec_word in decimal_digit_words:
        if dec_word not in decimal_words:
            return 0
        else:
            decimal_number_str.append(malaysian_number_system[dec_word])
    final_decimal_string = '0.' + ''.join(map(str, decimal_number_str))
    return float(final_decimal_string)


def _number_formation(number_words):
    numbers = []
    belas = False
    for number_word in number_words:
        if number_word in ['belas', 'sebelas']:
            belas = True
        numbers.append(malaysian_number_system[number_word])
    if len(numbers) == 5:
        return (
            (numbers[0] * numbers[1]) + (numbers[2] * numbers[3]) + numbers[4]
        )
    elif len(numbers) == 4:
        if numbers[0] == 100:
            return numbers[0] + (numbers[1] * numbers[2]) + numbers[3]
        return (numbers[0] * numbers[1]) + numbers[2] + numbers[3]
    elif len(numbers) == 3:
        if belas:
            return numbers[0] + numbers[1] + numbers[2]
        return numbers[0] * numbers[1] + numbers[2]
    elif len(numbers) == 2:
        if 100 in numbers or 10 in numbers:
            if belas:
                return numbers[0] + numbers[1]
            return numbers[0] * numbers[1]
        else:
            return numbers[0] + numbers[1]
    else:
        return numbers[0]


[docs]def word2num(string): """ Translate from string to number, eg 'kesepuluh' -> 10. Parameters ---------- string: str Returns ------- result: int / float """ if not isinstance(string, str): raise ValueError('input must be a string') string = string.replace('-', ' ') string = string.replace('ke', '') string = string.replace('dan', '') string = string.lower() if string.isdigit(): return int(string) split_words = string.strip().split() clean_numbers = [] clean_decimal_numbers = [] for word in split_words: if word in malaysian_number_system: clean_numbers.append(word) if not len(clean_numbers): raise ValueError( 'No valid number words found! Please enter a valid number word' ) if ( clean_numbers.count('ribu') > 1 or clean_numbers.count('juta') > 1 or clean_numbers.count('bilion') > 1 or clean_numbers.count('perpuluhan') > 1 or clean_numbers.count('negatif') > 1 or clean_numbers.count('seribu') > 1 or clean_numbers.count('sejuta') > 1 ): raise ValueError( 'Redundant number word! Please enter a valid number word' ) negative = False if clean_numbers[0] == 'negatif': negative = True clean_numbers = clean_numbers[1:] if clean_numbers.count('perpuluhan') == 1: clean_decimal_numbers = clean_numbers[ clean_numbers.index('perpuluhan') + 1: ] clean_numbers = clean_numbers[: clean_numbers.index('perpuluhan')] billion_index = ( clean_numbers.index('bilion') if 'bilion' in clean_numbers else -1 ) million_index = ( clean_numbers.index('juta') if 'juta' in clean_numbers else -1 ) thousand_index = ( clean_numbers.index('ribu') if 'ribu' in clean_numbers else -1 ) if ( thousand_index > -1 and (thousand_index < million_index or thousand_index < billion_index) ) or (million_index > -1 and million_index < billion_index): raise ValueError('Malformed number! Please enter a valid number word') total_sum = 0 if len(clean_numbers) > 0: if len(clean_numbers) == 1: total_sum += malaysian_number_system[clean_numbers[0]] else: if billion_index > -1: billion_multiplier = _number_formation( clean_numbers[0:billion_index] ) total_sum += billion_multiplier * 1000000000 if million_index > -1: if billion_index > -1: million_multiplier = _number_formation( clean_numbers[billion_index + 1: million_index] ) else: million_multiplier = _number_formation( clean_numbers[0:million_index] ) total_sum += million_multiplier * 1000000 if thousand_index > -1: if million_index > -1: thousand_multiplier = _number_formation( clean_numbers[million_index + 1: thousand_index] ) elif billion_index > -1 and million_index == -1: thousand_multiplier = _number_formation( clean_numbers[billion_index + 1: thousand_index] ) else: thousand_multiplier = _number_formation( clean_numbers[0:thousand_index] ) total_sum += thousand_multiplier * 1000 if thousand_index > -1 and thousand_index != len(clean_numbers) - 1: hundreds = _number_formation( clean_numbers[thousand_index + 1:] ) elif million_index > -1 and million_index != len(clean_numbers) - 1: hundreds = _number_formation(clean_numbers[million_index + 1:]) elif billion_index > -1 and billion_index != len(clean_numbers) - 1: hundreds = _number_formation(clean_numbers[billion_index + 1:]) elif ( thousand_index == -1 and million_index == -1 and billion_index == -1 ): hundreds = _number_formation(clean_numbers) else: hundreds = 0 total_sum += hundreds if len(clean_decimal_numbers) > 0: decimal_sum = _get_decimal_sum(clean_decimal_numbers) total_sum += decimal_sum return total_sum * -1 if negative else total_sum