diff options
author | Jeff Davis | 2023-12-07 23:44:03 +0000 |
---|---|---|
committer | Jeff Davis | 2023-12-07 23:44:03 +0000 |
commit | 719b342d36ce9a049137817e93e6a18a711a40e4 (patch) | |
tree | d64ff006485ee3194304aaf0cb299d061a2285f9 /src/common | |
parent | d16a0c1e2e3874cd5adfa9ee968008b6c4b1ae01 (diff) |
Shrink Unicode category table.
Missing entries can implicitly be considered "unassigned".
Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/common')
-rw-r--r-- | src/common/unicode/generate-unicode_category_table.pl | 21 | ||||
-rw-r--r-- | src/common/unicode_category.c | 6 |
2 files changed, 13 insertions, 14 deletions
diff --git a/src/common/unicode/generate-unicode_category_table.pl b/src/common/unicode/generate-unicode_category_table.pl index 8f03425e0bf..992b877eded 100644 --- a/src/common/unicode/generate-unicode_category_table.pl +++ b/src/common/unicode/generate-unicode_category_table.pl @@ -72,7 +72,10 @@ while (my $line = <$FH>) # the current range, emit the current range and initialize a new # range representing the gap. if ($range_end + 1 != $code && $range_category ne $gap_category) { - push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category}); + if ($range_category ne $CATEGORY_UNASSIGNED) { + push(@category_ranges, {start => $range_start, end => $range_end, + category => $range_category}); + } $range_start = $range_end + 1; $range_end = $code - 1; $range_category = $gap_category; @@ -80,7 +83,10 @@ while (my $line = <$FH>) # different category; new range if ($range_category ne $category) { - push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category}); + if ($range_category ne $CATEGORY_UNASSIGNED) { + push(@category_ranges, {start => $range_start, end => $range_end, + category => $range_category}); + } $range_start = $code; $range_end = $code; $range_category = $category; @@ -109,14 +115,9 @@ die "<..., First> entry with no corresponding <..., Last> entry" if $gap_category ne $CATEGORY_UNASSIGNED; # emit final range -push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category}); - -# emit range for any unassigned code points after last entry -if ($range_end < 0x10FFFF) { - $range_start = $range_end + 1; - $range_end = 0x10FFFF; - $range_category = $CATEGORY_UNASSIGNED; - push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category}); +if ($range_category ne $CATEGORY_UNASSIGNED) { + push(@category_ranges, {start => $range_start, end => $range_end, + category => $range_category}); } my $num_ranges = scalar @category_ranges; diff --git a/src/common/unicode_category.c b/src/common/unicode_category.c index cec9c0d998f..189cd6eca3c 100644 --- a/src/common/unicode_category.c +++ b/src/common/unicode_category.c @@ -28,8 +28,7 @@ unicode_category(pg_wchar ucs) int mid; int max = lengthof(unicode_categories) - 1; - Assert(ucs >= unicode_categories[0].first && - ucs <= unicode_categories[max].last); + Assert(ucs <= 0x10ffff); while (max >= min) { @@ -42,8 +41,7 @@ unicode_category(pg_wchar ucs) return unicode_categories[mid].category; } - Assert(false); - return (pg_unicode_category) - 1; + return PG_U_UNASSIGNED; } /* |