Skip to content

Commit c0fb56e

Browse files
authored
Allow alternative space characters as group separator when parsing numbers (#1007)
The French group separator is `"\u202f"` (narrow non-breaking space), but when parsing numbers in the real world, you will most often encounter either a regular space character (`" "`) or a non-breaking space character (`"\xa0"`). The issue was partially adressed earlier in #637, but only to allow regular spaces instead of non-breaking spaces `"\xa0"` in `parse_decimal`. This commit goes further by changing both `parse_number` and `parse_decimal` to allow certain other space characters when the group character is itself a space character, but is not present in the string to parse. Unit tests are included.
1 parent fe82fbc commit c0fb56e

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

babel/numbers.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -998,6 +998,15 @@ def __init__(self, message: str, suggestions: list[str] | None = None) -> None:
998998
self.suggestions = suggestions
999999

10001000

1001+
SPACE_CHARS = {
1002+
' ', # space
1003+
'\xa0', # no-break space
1004+
'\u202f', # narrow no-break space
1005+
}
1006+
1007+
SPACE_CHARS_RE = re.compile('|'.join(SPACE_CHARS))
1008+
1009+
10011010
def parse_number(
10021011
string: str,
10031012
locale: Locale | str | None = LC_NUMERIC,
@@ -1026,8 +1035,18 @@ def parse_number(
10261035
:raise `NumberFormatError`: if the string can not be converted to a number
10271036
:raise `UnsupportedNumberingSystemError`: if the numbering system is not supported by the locale.
10281037
"""
1038+
group_symbol = get_group_symbol(locale, numbering_system=numbering_system)
1039+
1040+
if (
1041+
group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space,
1042+
group_symbol not in string and # and the string to be parsed does not contain it,
1043+
SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead,
1044+
):
1045+
# ... it's reasonable to assume it is taking the place of the grouping symbol.
1046+
string = SPACE_CHARS_RE.sub(group_symbol, string)
1047+
10291048
try:
1030-
return int(string.replace(get_group_symbol(locale, numbering_system=numbering_system), ''))
1049+
return int(string.replace(group_symbol, ''))
10311050
except ValueError as ve:
10321051
raise NumberFormatError(f"{string!r} is not a valid number") from ve
10331052

@@ -1085,12 +1104,12 @@ def parse_decimal(
10851104
decimal_symbol = get_decimal_symbol(locale, numbering_system=numbering_system)
10861105

10871106
if not strict and (
1088-
group_symbol == '\xa0' and # if the grouping symbol is U+00A0 NO-BREAK SPACE,
1107+
group_symbol in SPACE_CHARS and # if the grouping symbol is a kind of space,
10891108
group_symbol not in string and # and the string to be parsed does not contain it,
1090-
' ' in string # but it does contain a space instead,
1109+
SPACE_CHARS_RE.search(string) # but it does contain any other kind of space instead,
10911110
):
10921111
# ... it's reasonable to assume it is taking the place of the grouping symbol.
1093-
string = string.replace(' ', group_symbol)
1112+
string = SPACE_CHARS_RE.sub(group_symbol, string)
10941113

10951114
try:
10961115
parsed = decimal.Decimal(string.replace(group_symbol, '')

tests/test_numbers.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,15 @@ def test_parse_number():
751751
with pytest.raises(numbers.UnsupportedNumberingSystemError):
752752
numbers.parse_number('1.099,98', locale='en', numbering_system="unsupported")
753753

754+
@pytest.mark.parametrize('string', [
755+
'1 099',
756+
'1\xa0099',
757+
'1\u202f099',
758+
])
759+
def test_parse_number_group_separator_can_be_any_space(string):
760+
assert numbers.parse_number(string, locale='fr') == 1099
761+
762+
754763
def test_parse_decimal():
755764
assert (numbers.parse_decimal('1,099.98', locale='en_US')
756765
== decimal.Decimal('1099.98'))
@@ -761,6 +770,15 @@ def test_parse_decimal():
761770
assert excinfo.value.args[0] == "'2,109,998' is not a valid decimal number"
762771

763772

773+
@pytest.mark.parametrize('string', [
774+
'1 099,98',
775+
'1\xa0099,98',
776+
'1\u202f099,98',
777+
])
778+
def test_parse_decimal_group_separator_can_be_any_space(string):
779+
assert decimal.Decimal('1099.98') == numbers.parse_decimal(string, locale='fr')
780+
781+
764782
def test_parse_grouping():
765783
assert numbers.parse_grouping('##') == (1000, 1000)
766784
assert numbers.parse_grouping('#,###') == (3, 3)

0 commit comments

Comments
 (0)