summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdward Welbourne <[email protected]>2023-08-01 11:48:37 +0200
committerEdward Welbourne <[email protected]>2023-08-09 17:53:20 +0200
commite212b3633cbfe15947e0e8059fc82c279867828a (patch)
tree57383cd2c9a43ed1f9e942fc4080f9b653d114e4
parent4f686b7b78552282e47fce7640f0154efe091077 (diff)
Break clashing-names test function out of CldrAccess.__checkEnum()
Moving it makes it easier to document what it's up to and why, while leaving __checkEnum() easier to read; and I'm going to need it elsewhere anyway. This makes no difference to generated data. Task-number: QTBUG-94460 Change-Id: I684375bc926d5d54928fbf5b5e08978528aef487 Reviewed-by: Ievgenii Meshcheriakov <[email protected]>
-rw-r--r--util/locale_database/cldr.py22
-rw-r--r--util/locale_database/localetools.py35
2 files changed, 40 insertions, 17 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py
index 0bbdad39f9b..d1ad90295ef 100644
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@@ -16,6 +16,7 @@ from weakref import WeakValueDictionary as CacheDict
from pathlib import Path
from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
+from localetools import names_clash
from qlocalexml import Locale
class CldrReader (object):
@@ -353,10 +354,7 @@ class CldrAccess (object):
language, script, territory, variant)
@staticmethod
- def __checkEnum(given, proper, scraps,
- remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'},
- prefix = { 'St.': 'Saint', 'U.S.': 'United States' },
- skip = '\u02bc'):
+ def __checkEnum(given, proper, scraps):
# Each is a { code: full name } mapping
for code, name in given.items():
try: right = proper[code]
@@ -366,19 +364,9 @@ class CldrAccess (object):
if code not in scraps:
yield name, f'[Found no CLDR name for code {code}]'
continue
- if name == right: continue
- ok = right.replace('&', 'And')
- for k, v in prefix.items():
- if ok.startswith(k + ' '):
- ok = v + ok[len(k):]
- while '(' in ok:
- try: f, t = ok.index('('), ok.index(')')
- except ValueError: break
- ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip()
- if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join(
- remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip):
- continue
- yield name, ok
+ cleaned = names_clash(right, name)
+ if cleaned:
+ yield name, cleaned
def checkEnumData(self, grumble):
scraps = set()
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
index ae4a50c09ab..a0ad5f397d7 100644
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@@ -48,6 +48,41 @@ def wrap_list(lst, perline=20):
yield head
return ",\n".join(", ".join(x) for x in split(lst, perline))
+def names_clash(cldr, enum):
+ """True if the reader might not recognize cldr as the name of enum
+
+ First argument, cldr, is the name CLDR gives for some language,
+ script or territory; second, enum, is the name enumdata.py gives
+ for it. If these are enough alike, returns None; otherwise, a
+ non-empty string that results from adapting cldr to be more like
+ how enumdata.py would express it."""
+ if cldr == enum:
+ return None
+
+ # Some common substitutions:
+ cldr = cldr.replace('&', 'And')
+ prefix = { 'St.': 'Saint', 'U.S.': 'United States' }
+ for k, v in prefix.items():
+ if cldr.startswith(k + ' '):
+ cldr = v + cldr[len(k):]
+
+ # Chop out any parenthesised part, e.g. (Burma):
+ while '(' in cldr:
+ try:
+ f, t = cldr.index('('), cldr.rindex(')')
+ except ValueError:
+ break
+ cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip()
+
+ # Various accented letters:
+ remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'}
+ skip = '\u02bc' # Punctuation for which .isalpha() is true.
+ # Let cldr match (ignoring non-letters and case) any substring as enum:
+ if ''.join(enum.lower().split()) in ''.join(
+ remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip):
+ return None
+ return cldr
+
@contextmanager
def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path):