diff options
author | Edward Welbourne <[email protected]> | 2023-08-01 11:48:37 +0200 |
---|---|---|
committer | Edward Welbourne <[email protected]> | 2023-08-09 17:53:20 +0200 |
commit | e212b3633cbfe15947e0e8059fc82c279867828a (patch) | |
tree | 57383cd2c9a43ed1f9e942fc4080f9b653d114e4 | |
parent | 4f686b7b78552282e47fce7640f0154efe091077 (diff) |
Break clashing-names test function out of CldrAccess.__checkEnum()
Moving it makes it easier to document what it's up to and why, while
leaving __checkEnum() easier to read; and I'm going to need it
elsewhere anyway. This makes no difference to generated data.
Task-number: QTBUG-94460
Change-Id: I684375bc926d5d54928fbf5b5e08978528aef487
Reviewed-by: Ievgenii Meshcheriakov <[email protected]>
-rw-r--r-- | util/locale_database/cldr.py | 22 | ||||
-rw-r--r-- | util/locale_database/localetools.py | 35 |
2 files changed, 40 insertions, 17 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 0bbdad39f9b..d1ad90295ef 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -16,6 +16,7 @@ from weakref import WeakValueDictionary as CacheDict from pathlib import Path from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner +from localetools import names_clash from qlocalexml import Locale class CldrReader (object): @@ -353,10 +354,7 @@ class CldrAccess (object): language, script, territory, variant) @staticmethod - def __checkEnum(given, proper, scraps, - remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ü': 'u'}, - prefix = { 'St.': 'Saint', 'U.S.': 'United States' }, - skip = '\u02bc'): + def __checkEnum(given, proper, scraps): # Each is a { code: full name } mapping for code, name in given.items(): try: right = proper[code] @@ -366,19 +364,9 @@ class CldrAccess (object): if code not in scraps: yield name, f'[Found no CLDR name for code {code}]' continue - if name == right: continue - ok = right.replace('&', 'And') - for k, v in prefix.items(): - if ok.startswith(k + ' '): - ok = v + ok[len(k):] - while '(' in ok: - try: f, t = ok.index('('), ok.index(')') - except ValueError: break - ok = ok[:f].rstrip() + ' ' + ok[t:].lstrip() - if ''.join(ch for ch in name.lower() if not ch.isspace()) in ''.join( - remap.get(ch, ch) for ch in ok.lower() if ch.isalpha() and ch not in skip): - continue - yield name, ok + cleaned = names_clash(right, name) + if cleaned: + yield name, cleaned def checkEnumData(self, grumble): scraps = set() diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py index ae4a50c09ab..a0ad5f397d7 100644 --- a/util/locale_database/localetools.py +++ b/util/locale_database/localetools.py @@ -48,6 +48,41 @@ def wrap_list(lst, perline=20): yield head return ",\n".join(", ".join(x) for x in split(lst, perline)) +def names_clash(cldr, enum): + """True if the reader might not recognize cldr as the name of enum + + First argument, cldr, is the name CLDR gives for some language, + script or territory; second, enum, is the name enumdata.py gives + for it. If these are enough alike, returns None; otherwise, a + non-empty string that results from adapting cldr to be more like + how enumdata.py would express it.""" + if cldr == enum: + return None + + # Some common substitutions: + cldr = cldr.replace('&', 'And') + prefix = { 'St.': 'Saint', 'U.S.': 'United States' } + for k, v in prefix.items(): + if cldr.startswith(k + ' '): + cldr = v + cldr[len(k):] + + # Chop out any parenthesised part, e.g. (Burma): + while '(' in cldr: + try: + f, t = cldr.index('('), cldr.rindex(')') + except ValueError: + break + cldr = cldr[:f].rstrip() + ' ' + cldr[t + 1:].lstrip() + + # Various accented letters: + remap = { 'å': 'a', 'ã': 'a', 'ç': 'c', 'é': 'e', 'í': 'i', 'ô': 'o', 'ü': 'u'} + skip = '\u02bc' # Punctuation for which .isalpha() is true. + # Let cldr match (ignoring non-letters and case) any substring as enum: + if ''.join(enum.lower().split()) in ''.join( + remap.get(ch, ch) for ch in cldr.lower() if ch.isalpha() and ch not in skip): + return None + return cldr + @contextmanager def AtomicRenameTemporaryFile(originalLocation: Path, *, prefix: str, dir: Path): |