From 2810e39e568abcd094752ca5b37dc393503b2cf9 Mon Sep 17 00:00:00 2001 From: Sisyphus Date: Sat, 27 Jun 2026 11:40:16 +0800 Subject: [PATCH] fix: prevent Hebrew locale from corrupting subsequent locale formatting Fixes #1234 The bug: After using Hebrew locale ('he'), subsequent calls to format_date() with other locales (like 'no', 'fr', 'es') returned Hebrew-formatted text instead of the requested locale. Root cause: LocaleDataDict.__getitem__ was mutating the cached locale data when resolving aliases. Hebrew locale's months.stand-alone dict was the same object as root's months.stand-alone dict (due to shallow copying in merge()). When the alias at months.stand-alone.wide was resolved for Hebrew, the resolved Hebrew month names were written back into the shared dict, corrupting the root locale data that all other locales inherit from. Fix: Remove the write-back in LocaleDataDict.__getitem__. The resolved values are no longer stored back into the original data dict, preventing mutation of shared/cached locale data. Added regression tests: - test_locale_data_isolation_hebrew: Verifies month names are not corrupted - test_locale_data_isolation_format_date: Verifies format_date output - test_locale_data_cache_not_mutated: Verifies root data integrity --- babel/localedata.py | 4 +-- tests/test_localedata.py | 78 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/babel/localedata.py b/babel/localedata.py index 4648e6626..0600a8174 100644 --- a/babel/localedata.py +++ b/babel/localedata.py @@ -262,7 +262,7 @@ def __iter__(self) -> Iterator[str | int | None]: return iter(self._data) def __getitem__(self, key: str | int | None) -> Any: - orig = val = self._data[key] + val = self._data[key] if isinstance(val, Alias): # resolve an alias val = val.resolve(self.base) if isinstance(val, tuple): # Merge a partial dict with an alias @@ -271,8 +271,6 @@ def __getitem__(self, key: str | int | None) -> Any: merge(val, others) if isinstance(val, dict): # Return a nested alias-resolving dict val = LocaleDataDict(val, base=self.base) - if val is not orig: - self._data[key] = val return val def __setitem__(self, key: str | int | None, value: Any) -> None: diff --git a/tests/test_localedata.py b/tests/test_localedata.py index 42810b992..06230f49c 100644 --- a/tests/test_localedata.py +++ b/tests/test_localedata.py @@ -165,3 +165,81 @@ def test_reserved_locale_names(): localedata.load(name) with pytest.raises(ValueError): Locale(name) + + +def test_locale_data_isolation_hebrew(): + """Regression test for https://github.com/python-babel/babel/issues/1234 + + Using Hebrew locale should not corrupt subsequent locale formatting calls. + The bug was that LocaleDataDict.__getitem__ was mutating the cached data + when resolving aliases, which corrupted shared parent locale data. + """ + from babel.dates import get_month_names + from datetime import datetime + + date_obj = datetime(2025, 10, 15) + localedata._cache.clear() + + no_months_before = get_month_names('wide', 'stand-alone', 'no') + fr_months_before = get_month_names('wide', 'stand-alone', 'fr') + es_months_before = get_month_names('wide', 'stand-alone', 'es') + + he_months = get_month_names('wide', 'stand-alone', 'he') + assert he_months[10] != 'oktober' + + no_months_after = get_month_names('wide', 'stand-alone', 'no') + fr_months_after = get_month_names('wide', 'stand-alone', 'fr') + es_months_after = get_month_names('wide', 'stand-alone', 'es') + + assert no_months_after[10] == 'oktober', f"Norwegian corrupted after Hebrew: got '{no_months_after[10]}'" + assert fr_months_after[10] == 'octobre', f"French corrupted after Hebrew: got '{fr_months_after[10]}'" + assert es_months_after[10] == 'octubre', f"Spanish corrupted after Hebrew: got '{es_months_after[10]}'" + + assert dict(no_months_before) == dict(no_months_after) + assert dict(fr_months_before) == dict(fr_months_after) + assert dict(es_months_before) == dict(es_months_after) + + +def test_locale_data_isolation_format_date(): + """Regression test for https://github.com/python-babel/babel/issues/1234 + + format_date with Hebrew locale should not corrupt format_date with other locales. + """ + from babel.dates import format_date + from datetime import datetime + + date_obj = datetime(2025, 10, 15) + localedata._cache.clear() + + he_result = format_date(date_obj, 'LLLL', 'he') + assert 'אוקטובר' in he_result + + no_result = format_date(date_obj, 'LLLL', 'no') + fr_result = format_date(date_obj, 'LLLL', 'fr') + es_result = format_date(date_obj, 'LLLL', 'es') + de_result = format_date(date_obj, 'LLLL', 'de') + + assert no_result == 'oktober', f"Norwegian corrupted after Hebrew: got '{no_result}'" + assert fr_result == 'octobre', f"French corrupted after Hebrew: got '{fr_result}'" + assert es_result == 'octubre', f"Spanish corrupted after Hebrew: got '{es_result}'" + assert de_result == 'Oktober', f"German corrupted after Hebrew: got '{de_result}'" + + +def test_locale_data_cache_not_mutated(): + """Test that accessing locale data through LocaleDataDict doesn't mutate the cache.""" + from babel.localedata import Alias, LocaleDataDict + + localedata._cache.clear() + + root_data = localedata.load('root', merge_inherited=False) + root_sa_wide = root_data['months']['stand-alone']['wide'] + assert isinstance(root_sa_wide, Alias), "Expected root stand-alone wide to be an Alias" + + he_data = localedata.load('he') + he_locale = LocaleDataDict(he_data) + he_locale['months']['stand-alone']['wide'] + + root_data_after = localedata.load('root', merge_inherited=False) + root_sa_wide_after = root_data_after['months']['stand-alone']['wide'] + assert isinstance(root_sa_wide_after, Alias), \ + f"Root data was mutated by alias resolution: got {type(root_sa_wide_after).__name__}"