summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2015-05-15 15:03:54 +0000
committerTom Lane2015-05-15 15:04:02 +0000
commita868931fecdf93f3ceb1c9431bb93757b706269d (patch)
tree060d4ad6cd3574abe0e6720d4d7e3af0e7024517
parentaff27e33797c5161a322a10b44a260d848b73154 (diff)
Fix insufficiently-paranoid GB18030 encoding verifier.
The previous coding effectively only verified that the second byte of a multibyte character was in the expected range; moreover, it wasn't careful to make sure that the second byte even exists in the buffer before touching it. The latter seems unlikely to cause any real problems in the field (in particular, it could never be a problem with null-terminated input), but it's still a bug. Since GB18030 is not a supported backend encoding, the only thing we'd really be doing with GB18030 text is converting it to UTF8 in LocalToUtf, which would fail anyway on any invalid character for lack of a match in its lookup table. So the only user-visible consequence of this change should be that you'll get "invalid byte sequence for encoding" rather than "character has no equivalent" for malformed GB18030 input. However, impending changes to the GB18030 conversion code will require these tighter up-front checks to avoid producing bogus results.
-rw-r--r--src/backend/utils/mb/wchar.c52
1 files changed, 29 insertions, 23 deletions
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 0cc753e6684..fd51eedf7c6 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1070,9 +1070,9 @@ pg_uhc_dsplen(const unsigned char *s)
}
/*
- * * GB18030
- * * Added by Bill Huang <[email protected]>,<[email protected]>
- * */
+ * GB18030
+ * Added by Bill Huang <[email protected]>,<[email protected]>
+ */
static int
pg_gb18030_mblen(const unsigned char *s)
{
@@ -1080,15 +1080,10 @@ pg_gb18030_mblen(const unsigned char *s)
if (!IS_HIGHBIT_SET(*s))
len = 1; /* ASCII */
+ else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
+ len = 4;
else
- {
- if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) || (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
- len = 2;
- else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
- len = 4;
- else
- len = 2;
- }
+ len = 2;
return len;
}
@@ -1403,21 +1398,32 @@ pg_uhc_verifier(const unsigned char *s, int len)
static int
pg_gb18030_verifier(const unsigned char *s, int len)
{
- int l,
- mbl;
-
- l = mbl = pg_gb18030_mblen(s);
-
- if (len < l)
- return -1;
+ int l;
- while (--l > 0)
+ if (!IS_HIGHBIT_SET(*s))
+ l = 1; /* ASCII */
+ else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
{
- if (*++s == '\0')
- return -1;
+ /* Should be 4-byte, validate remaining bytes */
+ if (*s >= 0x81 && *s <= 0xfe &&
+ *(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
+ *(s + 3) >= 0x30 && *(s + 3) <= 0x39)
+ l = 4;
+ else
+ l = -1;
}
-
- return mbl;
+ else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
+ {
+ /* Should be 2-byte, validate */
+ if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
+ (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
+ l = 2;
+ else
+ l = -1;
+ }
+ else
+ l = -1;
+ return l;
}
static int