From eb0d0d2c7300c9c5c22b35975c11265aa4becc84 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 20 Aug 2021 10:34:26 -0400 Subject: Rename unicode_combining_table to unicode_width_table No functional changes. A future commit will use this table for other purposes besides combining characters. --- src/common/unicode/Makefile | 4 +- .../unicode/generate-unicode_combining_table.pl | 53 ------ src/common/unicode/generate-unicode_width_table.pl | 53 ++++++ src/common/wchar.c | 2 +- src/include/common/unicode_combining_table.h | 196 --------------------- src/include/common/unicode_width_table.h | 196 +++++++++++++++++++++ 6 files changed, 252 insertions(+), 252 deletions(-) delete mode 100644 src/common/unicode/generate-unicode_combining_table.pl create mode 100644 src/common/unicode/generate-unicode_width_table.pl delete mode 100644 src/include/common/unicode_combining_table.h create mode 100644 src/include/common/unicode_width_table.h diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile index eb14add28ad..499e31d59f0 100644 --- a/src/common/unicode/Makefile +++ b/src/common/unicode/Makefile @@ -18,7 +18,7 @@ LIBS += $(PTHREAD_LIBS) # By default, do nothing. all: -update-unicode: unicode_norm_table.h unicode_combining_table.h unicode_normprops_table.h unicode_norm_hashfunc.h +update-unicode: unicode_norm_table.h unicode_width_table.h unicode_normprops_table.h unicode_norm_hashfunc.h mv $^ ../../../src/include/common/ $(MAKE) normalization-check @@ -35,7 +35,7 @@ unicode_norm_hashfunc.h: unicode_norm_table.h unicode_norm_table.h: generate-unicode_norm_table.pl UnicodeData.txt CompositionExclusions.txt $(PERL) generate-unicode_norm_table.pl -unicode_combining_table.h: generate-unicode_combining_table.pl UnicodeData.txt +unicode_width_table.h: generate-unicode_width_table.pl UnicodeData.txt $(PERL) $^ >$@ unicode_normprops_table.h: generate-unicode_normprops_table.pl DerivedNormalizationProps.txt diff --git a/src/common/unicode/generate-unicode_combining_table.pl b/src/common/unicode/generate-unicode_combining_table.pl deleted file mode 100644 index 86aed789070..00000000000 --- a/src/common/unicode/generate-unicode_combining_table.pl +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/perl -# -# Generate sorted list of non-overlapping intervals of non-spacing -# characters, using Unicode data files as input. Pass UnicodeData.txt -# as argument. The output is on stdout. -# -# Copyright (c) 2019-2021, PostgreSQL Global Development Group - -use strict; -use warnings; - -my $range_start = undef; -my $codepoint; -my $prev_codepoint; -my $count = 0; - -print - "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n"; - -print "static const struct mbinterval combining[] = {\n"; - -foreach my $line () -{ - chomp $line; - my @fields = split ';', $line; - $codepoint = hex $fields[0]; - - next if $codepoint > 0xFFFF; - - if ($fields[2] eq 'Me' || $fields[2] eq 'Mn') - { - # combining character, save for start of range - if (!defined($range_start)) - { - $range_start = $codepoint; - } - } - else - { - # not a combining character, print out previous range if any - if (defined($range_start)) - { - printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint; - $range_start = undef; - } - } -} -continue -{ - $prev_codepoint = $codepoint; -} - -print "};\n"; diff --git a/src/common/unicode/generate-unicode_width_table.pl b/src/common/unicode/generate-unicode_width_table.pl new file mode 100644 index 00000000000..90b57af075e --- /dev/null +++ b/src/common/unicode/generate-unicode_width_table.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl +# +# Generate sorted list of non-overlapping intervals of non-spacing +# characters, using Unicode data files as input. Pass UnicodeData.txt +# as argument. The output is on stdout. +# +# Copyright (c) 2019-2021, PostgreSQL Global Development Group + +use strict; +use warnings; + +my $range_start = undef; +my $codepoint; +my $prev_codepoint; +my $count = 0; + +print + "/* generated by src/common/unicode/generate-unicode_width_table.pl, do not edit */\n\n"; + +print "static const struct mbinterval combining[] = {\n"; + +foreach my $line () +{ + chomp $line; + my @fields = split ';', $line; + $codepoint = hex $fields[0]; + + next if $codepoint > 0xFFFF; + + if ($fields[2] eq 'Me' || $fields[2] eq 'Mn') + { + # combining character, save for start of range + if (!defined($range_start)) + { + $range_start = $codepoint; + } + } + else + { + # not a combining character, print out previous range if any + if (defined($range_start)) + { + printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint; + $range_start = undef; + } + } +} +continue +{ + $prev_codepoint = $codepoint; +} + +print "};\n"; diff --git a/src/common/wchar.c b/src/common/wchar.c index 0636b8765ba..bb97b5f54f4 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -644,7 +644,7 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max) static int ucs_wcwidth(pg_wchar ucs) { -#include "common/unicode_combining_table.h" +#include "common/unicode_width_table.h" /* test for 8-bit control characters */ if (ucs == 0) diff --git a/src/include/common/unicode_combining_table.h b/src/include/common/unicode_combining_table.h deleted file mode 100644 index a9f10c31bc8..00000000000 --- a/src/include/common/unicode_combining_table.h +++ /dev/null @@ -1,196 +0,0 @@ -/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */ - -static const struct mbinterval combining[] = { - {0x0300, 0x036F}, - {0x0483, 0x0489}, - {0x0591, 0x05BD}, - {0x05BF, 0x05BF}, - {0x05C1, 0x05C2}, - {0x05C4, 0x05C5}, - {0x05C7, 0x05C7}, - {0x0610, 0x061A}, - {0x064B, 0x065F}, - {0x0670, 0x0670}, - {0x06D6, 0x06DC}, - {0x06DF, 0x06E4}, - {0x06E7, 0x06E8}, - {0x06EA, 0x06ED}, - {0x0711, 0x0711}, - {0x0730, 0x074A}, - {0x07A6, 0x07B0}, - {0x07EB, 0x07F3}, - {0x07FD, 0x07FD}, - {0x0816, 0x0819}, - {0x081B, 0x0823}, - {0x0825, 0x0827}, - {0x0829, 0x082D}, - {0x0859, 0x085B}, - {0x08D3, 0x08E1}, - {0x08E3, 0x0902}, - {0x093A, 0x093A}, - {0x093C, 0x093C}, - {0x0941, 0x0948}, - {0x094D, 0x094D}, - {0x0951, 0x0957}, - {0x0962, 0x0963}, - {0x0981, 0x0981}, - {0x09BC, 0x09BC}, - {0x09C1, 0x09C4}, - {0x09CD, 0x09CD}, - {0x09E2, 0x09E3}, - {0x09FE, 0x0A02}, - {0x0A3C, 0x0A3C}, - {0x0A41, 0x0A51}, - {0x0A70, 0x0A71}, - {0x0A75, 0x0A75}, - {0x0A81, 0x0A82}, - {0x0ABC, 0x0ABC}, - {0x0AC1, 0x0AC8}, - {0x0ACD, 0x0ACD}, - {0x0AE2, 0x0AE3}, - {0x0AFA, 0x0B01}, - {0x0B3C, 0x0B3C}, - {0x0B3F, 0x0B3F}, - {0x0B41, 0x0B44}, - {0x0B4D, 0x0B56}, - {0x0B62, 0x0B63}, - {0x0B82, 0x0B82}, - {0x0BC0, 0x0BC0}, - {0x0BCD, 0x0BCD}, - {0x0C00, 0x0C00}, - {0x0C04, 0x0C04}, - {0x0C3E, 0x0C40}, - {0x0C46, 0x0C56}, - {0x0C62, 0x0C63}, - {0x0C81, 0x0C81}, - {0x0CBC, 0x0CBC}, - {0x0CBF, 0x0CBF}, - {0x0CC6, 0x0CC6}, - {0x0CCC, 0x0CCD}, - {0x0CE2, 0x0CE3}, - {0x0D00, 0x0D01}, - {0x0D3B, 0x0D3C}, - {0x0D41, 0x0D44}, - {0x0D4D, 0x0D4D}, - {0x0D62, 0x0D63}, - {0x0D81, 0x0D81}, - {0x0DCA, 0x0DCA}, - {0x0DD2, 0x0DD6}, - {0x0E31, 0x0E31}, - {0x0E34, 0x0E3A}, - {0x0E47, 0x0E4E}, - {0x0EB1, 0x0EB1}, - {0x0EB4, 0x0EBC}, - {0x0EC8, 0x0ECD}, - {0x0F18, 0x0F19}, - {0x0F35, 0x0F35}, - {0x0F37, 0x0F37}, - {0x0F39, 0x0F39}, - {0x0F71, 0x0F7E}, - {0x0F80, 0x0F84}, - {0x0F86, 0x0F87}, - {0x0F8D, 0x0FBC}, - {0x0FC6, 0x0FC6}, - {0x102D, 0x1030}, - {0x1032, 0x1037}, - {0x1039, 0x103A}, - {0x103D, 0x103E}, - {0x1058, 0x1059}, - {0x105E, 0x1060}, - {0x1071, 0x1074}, - {0x1082, 0x1082}, - {0x1085, 0x1086}, - {0x108D, 0x108D}, - {0x109D, 0x109D}, - {0x135D, 0x135F}, - {0x1712, 0x1714}, - {0x1732, 0x1734}, - {0x1752, 0x1753}, - {0x1772, 0x1773}, - {0x17B4, 0x17B5}, - {0x17B7, 0x17BD}, - {0x17C6, 0x17C6}, - {0x17C9, 0x17D3}, - {0x17DD, 0x17DD}, - {0x180B, 0x180D}, - {0x1885, 0x1886}, - {0x18A9, 0x18A9}, - {0x1920, 0x1922}, - {0x1927, 0x1928}, - {0x1932, 0x1932}, - {0x1939, 0x193B}, - {0x1A17, 0x1A18}, - {0x1A1B, 0x1A1B}, - {0x1A56, 0x1A56}, - {0x1A58, 0x1A60}, - {0x1A62, 0x1A62}, - {0x1A65, 0x1A6C}, - {0x1A73, 0x1A7F}, - {0x1AB0, 0x1B03}, - {0x1B34, 0x1B34}, - {0x1B36, 0x1B3A}, - {0x1B3C, 0x1B3C}, - {0x1B42, 0x1B42}, - {0x1B6B, 0x1B73}, - {0x1B80, 0x1B81}, - {0x1BA2, 0x1BA5}, - {0x1BA8, 0x1BA9}, - {0x1BAB, 0x1BAD}, - {0x1BE6, 0x1BE6}, - {0x1BE8, 0x1BE9}, - {0x1BED, 0x1BED}, - {0x1BEF, 0x1BF1}, - {0x1C2C, 0x1C33}, - {0x1C36, 0x1C37}, - {0x1CD0, 0x1CD2}, - {0x1CD4, 0x1CE0}, - {0x1CE2, 0x1CE8}, - {0x1CED, 0x1CED}, - {0x1CF4, 0x1CF4}, - {0x1CF8, 0x1CF9}, - {0x1DC0, 0x1DFF}, - {0x20D0, 0x20F0}, - {0x2CEF, 0x2CF1}, - {0x2D7F, 0x2D7F}, - {0x2DE0, 0x2DFF}, - {0x302A, 0x302D}, - {0x3099, 0x309A}, - {0xA66F, 0xA672}, - {0xA674, 0xA67D}, - {0xA69E, 0xA69F}, - {0xA6F0, 0xA6F1}, - {0xA802, 0xA802}, - {0xA806, 0xA806}, - {0xA80B, 0xA80B}, - {0xA825, 0xA826}, - {0xA82C, 0xA82C}, - {0xA8C4, 0xA8C5}, - {0xA8E0, 0xA8F1}, - {0xA8FF, 0xA8FF}, - {0xA926, 0xA92D}, - {0xA947, 0xA951}, - {0xA980, 0xA982}, - {0xA9B3, 0xA9B3}, - {0xA9B6, 0xA9B9}, - {0xA9BC, 0xA9BD}, - {0xA9E5, 0xA9E5}, - {0xAA29, 0xAA2E}, - {0xAA31, 0xAA32}, - {0xAA35, 0xAA36}, - {0xAA43, 0xAA43}, - {0xAA4C, 0xAA4C}, - {0xAA7C, 0xAA7C}, - {0xAAB0, 0xAAB0}, - {0xAAB2, 0xAAB4}, - {0xAAB7, 0xAAB8}, - {0xAABE, 0xAABF}, - {0xAAC1, 0xAAC1}, - {0xAAEC, 0xAAED}, - {0xAAF6, 0xAAF6}, - {0xABE5, 0xABE5}, - {0xABE8, 0xABE8}, - {0xABED, 0xABED}, - {0xFB1E, 0xFB1E}, - {0xFE00, 0xFE0F}, - {0xFE20, 0xFE2F}, -}; diff --git a/src/include/common/unicode_width_table.h b/src/include/common/unicode_width_table.h new file mode 100644 index 00000000000..3a02ed5c626 --- /dev/null +++ b/src/include/common/unicode_width_table.h @@ -0,0 +1,196 @@ +/* generated by src/common/unicode/generate-unicode_width_table.pl, do not edit */ + +static const struct mbinterval combining[] = { + {0x0300, 0x036F}, + {0x0483, 0x0489}, + {0x0591, 0x05BD}, + {0x05BF, 0x05BF}, + {0x05C1, 0x05C2}, + {0x05C4, 0x05C5}, + {0x05C7, 0x05C7}, + {0x0610, 0x061A}, + {0x064B, 0x065F}, + {0x0670, 0x0670}, + {0x06D6, 0x06DC}, + {0x06DF, 0x06E4}, + {0x06E7, 0x06E8}, + {0x06EA, 0x06ED}, + {0x0711, 0x0711}, + {0x0730, 0x074A}, + {0x07A6, 0x07B0}, + {0x07EB, 0x07F3}, + {0x07FD, 0x07FD}, + {0x0816, 0x0819}, + {0x081B, 0x0823}, + {0x0825, 0x0827}, + {0x0829, 0x082D}, + {0x0859, 0x085B}, + {0x08D3, 0x08E1}, + {0x08E3, 0x0902}, + {0x093A, 0x093A}, + {0x093C, 0x093C}, + {0x0941, 0x0948}, + {0x094D, 0x094D}, + {0x0951, 0x0957}, + {0x0962, 0x0963}, + {0x0981, 0x0981}, + {0x09BC, 0x09BC}, + {0x09C1, 0x09C4}, + {0x09CD, 0x09CD}, + {0x09E2, 0x09E3}, + {0x09FE, 0x0A02}, + {0x0A3C, 0x0A3C}, + {0x0A41, 0x0A51}, + {0x0A70, 0x0A71}, + {0x0A75, 0x0A75}, + {0x0A81, 0x0A82}, + {0x0ABC, 0x0ABC}, + {0x0AC1, 0x0AC8}, + {0x0ACD, 0x0ACD}, + {0x0AE2, 0x0AE3}, + {0x0AFA, 0x0B01}, + {0x0B3C, 0x0B3C}, + {0x0B3F, 0x0B3F}, + {0x0B41, 0x0B44}, + {0x0B4D, 0x0B56}, + {0x0B62, 0x0B63}, + {0x0B82, 0x0B82}, + {0x0BC0, 0x0BC0}, + {0x0BCD, 0x0BCD}, + {0x0C00, 0x0C00}, + {0x0C04, 0x0C04}, + {0x0C3E, 0x0C40}, + {0x0C46, 0x0C56}, + {0x0C62, 0x0C63}, + {0x0C81, 0x0C81}, + {0x0CBC, 0x0CBC}, + {0x0CBF, 0x0CBF}, + {0x0CC6, 0x0CC6}, + {0x0CCC, 0x0CCD}, + {0x0CE2, 0x0CE3}, + {0x0D00, 0x0D01}, + {0x0D3B, 0x0D3C}, + {0x0D41, 0x0D44}, + {0x0D4D, 0x0D4D}, + {0x0D62, 0x0D63}, + {0x0D81, 0x0D81}, + {0x0DCA, 0x0DCA}, + {0x0DD2, 0x0DD6}, + {0x0E31, 0x0E31}, + {0x0E34, 0x0E3A}, + {0x0E47, 0x0E4E}, + {0x0EB1, 0x0EB1}, + {0x0EB4, 0x0EBC}, + {0x0EC8, 0x0ECD}, + {0x0F18, 0x0F19}, + {0x0F35, 0x0F35}, + {0x0F37, 0x0F37}, + {0x0F39, 0x0F39}, + {0x0F71, 0x0F7E}, + {0x0F80, 0x0F84}, + {0x0F86, 0x0F87}, + {0x0F8D, 0x0FBC}, + {0x0FC6, 0x0FC6}, + {0x102D, 0x1030}, + {0x1032, 0x1037}, + {0x1039, 0x103A}, + {0x103D, 0x103E}, + {0x1058, 0x1059}, + {0x105E, 0x1060}, + {0x1071, 0x1074}, + {0x1082, 0x1082}, + {0x1085, 0x1086}, + {0x108D, 0x108D}, + {0x109D, 0x109D}, + {0x135D, 0x135F}, + {0x1712, 0x1714}, + {0x1732, 0x1734}, + {0x1752, 0x1753}, + {0x1772, 0x1773}, + {0x17B4, 0x17B5}, + {0x17B7, 0x17BD}, + {0x17C6, 0x17C6}, + {0x17C9, 0x17D3}, + {0x17DD, 0x17DD}, + {0x180B, 0x180D}, + {0x1885, 0x1886}, + {0x18A9, 0x18A9}, + {0x1920, 0x1922}, + {0x1927, 0x1928}, + {0x1932, 0x1932}, + {0x1939, 0x193B}, + {0x1A17, 0x1A18}, + {0x1A1B, 0x1A1B}, + {0x1A56, 0x1A56}, + {0x1A58, 0x1A60}, + {0x1A62, 0x1A62}, + {0x1A65, 0x1A6C}, + {0x1A73, 0x1A7F}, + {0x1AB0, 0x1B03}, + {0x1B34, 0x1B34}, + {0x1B36, 0x1B3A}, + {0x1B3C, 0x1B3C}, + {0x1B42, 0x1B42}, + {0x1B6B, 0x1B73}, + {0x1B80, 0x1B81}, + {0x1BA2, 0x1BA5}, + {0x1BA8, 0x1BA9}, + {0x1BAB, 0x1BAD}, + {0x1BE6, 0x1BE6}, + {0x1BE8, 0x1BE9}, + {0x1BED, 0x1BED}, + {0x1BEF, 0x1BF1}, + {0x1C2C, 0x1C33}, + {0x1C36, 0x1C37}, + {0x1CD0, 0x1CD2}, + {0x1CD4, 0x1CE0}, + {0x1CE2, 0x1CE8}, + {0x1CED, 0x1CED}, + {0x1CF4, 0x1CF4}, + {0x1CF8, 0x1CF9}, + {0x1DC0, 0x1DFF}, + {0x20D0, 0x20F0}, + {0x2CEF, 0x2CF1}, + {0x2D7F, 0x2D7F}, + {0x2DE0, 0x2DFF}, + {0x302A, 0x302D}, + {0x3099, 0x309A}, + {0xA66F, 0xA672}, + {0xA674, 0xA67D}, + {0xA69E, 0xA69F}, + {0xA6F0, 0xA6F1}, + {0xA802, 0xA802}, + {0xA806, 0xA806}, + {0xA80B, 0xA80B}, + {0xA825, 0xA826}, + {0xA82C, 0xA82C}, + {0xA8C4, 0xA8C5}, + {0xA8E0, 0xA8F1}, + {0xA8FF, 0xA8FF}, + {0xA926, 0xA92D}, + {0xA947, 0xA951}, + {0xA980, 0xA982}, + {0xA9B3, 0xA9B3}, + {0xA9B6, 0xA9B9}, + {0xA9BC, 0xA9BD}, + {0xA9E5, 0xA9E5}, + {0xAA29, 0xAA2E}, + {0xAA31, 0xAA32}, + {0xAA35, 0xAA36}, + {0xAA43, 0xAA43}, + {0xAA4C, 0xAA4C}, + {0xAA7C, 0xAA7C}, + {0xAAB0, 0xAAB0}, + {0xAAB2, 0xAAB4}, + {0xAAB7, 0xAAB8}, + {0xAABE, 0xAABF}, + {0xAAC1, 0xAAC1}, + {0xAAEC, 0xAAED}, + {0xAAF6, 0xAAF6}, + {0xABE5, 0xABE5}, + {0xABE8, 0xABE8}, + {0xABED, 0xABED}, + {0xFB1E, 0xFB1E}, + {0xFE00, 0xFE0F}, + {0xFE20, 0xFE2F}, +}; -- cgit v1.2.3