summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
authorJeff Davis2023-12-07 23:44:03 +0000
committerJeff Davis2023-12-07 23:44:03 +0000
commit719b342d36ce9a049137817e93e6a18a711a40e4 (patch)
treed64ff006485ee3194304aaf0cb299d061a2285f9 /src/common
parentd16a0c1e2e3874cd5adfa9ee968008b6c4b1ae01 (diff)
Shrink Unicode category table.
Missing entries can implicitly be considered "unassigned". Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/common')
-rw-r--r--src/common/unicode/generate-unicode_category_table.pl21
-rw-r--r--src/common/unicode_category.c6
2 files changed, 13 insertions, 14 deletions
diff --git a/src/common/unicode/generate-unicode_category_table.pl b/src/common/unicode/generate-unicode_category_table.pl
index 8f03425e0bf..992b877eded 100644
--- a/src/common/unicode/generate-unicode_category_table.pl
+++ b/src/common/unicode/generate-unicode_category_table.pl
@@ -72,7 +72,10 @@ while (my $line = <$FH>)
# the current range, emit the current range and initialize a new
# range representing the gap.
if ($range_end + 1 != $code && $range_category ne $gap_category) {
- push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
+ if ($range_category ne $CATEGORY_UNASSIGNED) {
+ push(@category_ranges, {start => $range_start, end => $range_end,
+ category => $range_category});
+ }
$range_start = $range_end + 1;
$range_end = $code - 1;
$range_category = $gap_category;
@@ -80,7 +83,10 @@ while (my $line = <$FH>)
# different category; new range
if ($range_category ne $category) {
- push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
+ if ($range_category ne $CATEGORY_UNASSIGNED) {
+ push(@category_ranges, {start => $range_start, end => $range_end,
+ category => $range_category});
+ }
$range_start = $code;
$range_end = $code;
$range_category = $category;
@@ -109,14 +115,9 @@ die "<..., First> entry with no corresponding <..., Last> entry"
if $gap_category ne $CATEGORY_UNASSIGNED;
# emit final range
-push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
-
-# emit range for any unassigned code points after last entry
-if ($range_end < 0x10FFFF) {
- $range_start = $range_end + 1;
- $range_end = 0x10FFFF;
- $range_category = $CATEGORY_UNASSIGNED;
- push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
+if ($range_category ne $CATEGORY_UNASSIGNED) {
+ push(@category_ranges, {start => $range_start, end => $range_end,
+ category => $range_category});
}
my $num_ranges = scalar @category_ranges;
diff --git a/src/common/unicode_category.c b/src/common/unicode_category.c
index cec9c0d998f..189cd6eca3c 100644
--- a/src/common/unicode_category.c
+++ b/src/common/unicode_category.c
@@ -28,8 +28,7 @@ unicode_category(pg_wchar ucs)
int mid;
int max = lengthof(unicode_categories) - 1;
- Assert(ucs >= unicode_categories[0].first &&
- ucs <= unicode_categories[max].last);
+ Assert(ucs <= 0x10ffff);
while (max >= min)
{
@@ -42,8 +41,7 @@ unicode_category(pg_wchar ucs)
return unicode_categories[mid].category;
}
- Assert(false);
- return (pg_unicode_category) - 1;
+ return PG_U_UNASSIGNED;
}
/*