Modify wchar conversion routines to not fetch the next byte past the end
authorTom Lane <[email protected]>
Thu, 8 Mar 2001 00:24:34 +0000 (00:24 +0000)
committerTom Lane <[email protected]>
Thu, 8 Mar 2001 00:24:34 +0000 (00:24 +0000)
of a counted input string.  Marinos Yannikos' recent crash report turns
out to be due to applying pg_ascii2wchar_with_len to a TEXT object that
is smack up against the end of memory.  This is the second just-barely-
reproducible bug report I have seen that traces to some bit of code
fetching one more byte than it is allowed to.  Let's be more careful
out there, boys and girls.
While at it, I changed the code to not risk a similar crash when there
is a truncated multibyte character at the end of an input string.  The
output in this case might not be the most reasonable output possible;
if anyone wants to improve it further, step right up...

src/backend/utils/mb/mbutils.c
src/backend/utils/mb/wchar.c

index 842bfd908693c40366769f452c2ad17db171affa..2529005b9a5274b16fede060ad9f99b9e525baa5 100644 (file)
@@ -230,7 +230,7 @@ pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
        int                     len = 0;
        int                     l;
 
-       while (*mbstr && limit > 0)
+       while (limit > 0 && *mbstr)
        {
                l = pg_mblen(mbstr);
                limit -= l;
@@ -252,7 +252,7 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
        int                     clen = 0;
        int                     l;
 
-       while (*mbstr && len > 0)
+       while (len > 0 && *mbstr)
        {
                l = pg_mblen(mbstr);
                if ((clen + l) > limit)
@@ -267,7 +267,7 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
 }
 
 /*
- * fuctions for utils/init
+ * functions for utils/init
  */
 static int     DatabaseEncoding = MULTIBYTE;
 
index 9e23cf19db3fb8ff9843c583dfdf4ff69a47062c..ccdb0af9d3f0676f90a30b4d323061974f91935b 100644 (file)
@@ -27,7 +27,7 @@ static int pg_ascii2wchar_with_len
 {
        int cnt = 0;
 
-       while (*from && len > 0)
+       while (len > 0 && *from)
        {
                *to++ = *from++;
                len--;
@@ -52,23 +52,22 @@ static int pg_euc2wchar_with_len
 {
        int cnt = 0;
 
-       while (*from && len > 0)
+       while (len > 0 && *from)
        {
-               if (*from == SS2)
+               if (*from == SS2 && len >= 2)
                {
                        from++;
-                       len--;
                        *to = 0xff & *from++;
-                       len--;
+                       len -= 2;
                }
-               else if (*from == SS3)
+               else if (*from == SS3 && len >= 3)
                {
                        from++;
                        *to = *from++ << 8;
                        *to |= 0x3f & *from++;
                        len -= 3;
                }
-               else if (*from & 0x80)
+               else if ((*from & 0x80) && len >= 2)
                {
                        *to = *from++ << 8;
                        *to |= *from++;
@@ -140,24 +139,23 @@ static int pg_euccn2wchar_with_len
 {
        int cnt = 0;
 
-       while (*from && len > 0)
+       while (len > 0 && *from)
        {
-               if (*from == SS2)
+               if (*from == SS2 && len >= 3)
                {
                        from++;
-                       len--;
                        *to = 0x3f00 & (*from++ << 8);
                        *to = *from++;
-                       len -= 2;
+                       len -= 3;
                }
-               else if (*from == SS3)
+               else if (*from == SS3 && len >= 3)
                {
                        from++;
                        *to = *from++ << 8;
                        *to |= 0x3f & *from++;
                        len -= 3;
                }
-               else if (*from & 0x80)
+               else if ((*from & 0x80) && len >= 2)
                {
                        *to = *from++ << 8;
                        *to |= *from++;
@@ -195,25 +193,24 @@ static int pg_euctw2wchar_with_len
 {
        int cnt = 0;
 
-       while (*from && len > 0)
+       while (len > 0 && *from)
        {
-               if (*from == SS2)
+               if (*from == SS2 && len >= 4)
                {
                        from++;
-                       len--;
                        *to = *from++ << 16;
                        *to |= *from++ << 8;
                        *to |= *from++;
-                       len -= 3;
+                       len -= 4;
                }
-               else if (*from == SS3)
+               else if (*from == SS3 && len >= 3)
                {
                        from++;
                        *to = *from++ << 8;
                        *to |= 0x3f & *from++;
                        len -= 3;
                }
-               else if (*from & 0x80)
+               else if ((*from & 0x80) && len >= 2)
                {
                        *to = *from++ << 8;
                        *to |= *from++;
@@ -261,30 +258,30 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
                                c3;
        int cnt = 0;
 
-       while (*from && len > 0)
+       while (len > 0 && *from)
        {
                if ((*from & 0x80) == 0)
                {
                        *to = *from++;
                        len--;
                }
-               else if ((*from & 0xe0) == 0xc0)
+               else if ((*from & 0xe0) == 0xc0 && len >= 2)
                {
                        c1 = *from++ & 0x1f;
                        c2 = *from++ & 0x3f;
-                       len -= 2;
                        *to = c1 << 6;
                        *to |= c2;
+                       len -= 2;
                }
-               else if ((*from & 0xe0) == 0xe0)
+               else if ((*from & 0xe0) == 0xe0 && len >= 3)
                {
                        c1 = *from++ & 0x0f;
                        c2 = *from++ & 0x3f;
                        c3 = *from++ & 0x3f;
-                       len -= 3;
                        *to = c1 << 12;
                        *to |= c2 << 6;
                        *to |= c3;
+                       len -= 3;
                }
                else
                {
@@ -326,29 +323,29 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
 {
        int cnt = 0;
 
-       while (*from && len > 0)
+       while (len > 0 && *from)
        {
-               if (IS_LC1(*from))
+               if (IS_LC1(*from) && len >= 2)
                {
                        *to = *from++ << 16;
                        *to |= *from++;
                        len -= 2;
                }
-               else if (IS_LCPRV1(*from))
+               else if (IS_LCPRV1(*from) && len >= 3)
                {
                        from++;
                        *to = *from++ << 16;
                        *to |= *from++;
                        len -= 3;
                }
-               else if (IS_LC2(*from))
+               else if (IS_LC2(*from) && len >= 3)
                {
                        *to = *from++ << 16;
                        *to |= *from++ << 8;
                        *to |= *from++;
                        len -= 3;
                }
-               else if (IS_LCPRV2(*from))
+               else if (IS_LCPRV2(*from) && len >= 4)
                {
                        from++;
                        *to = *from++ << 16;
@@ -396,9 +393,10 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
 {
        int cnt = 0;
 
-       while (*from && len-- > 0)
+       while (len > 0 && *from)
        {
                *to++ = *from++;
+               len--;
                cnt++;
        }
        *to = 0;