Avoid character classification in regex escape parsing.
authorJeff Davis <[email protected]>
Fri, 21 Apr 2023 15:19:41 +0000 (08:19 -0700)
committerJeff Davis <[email protected]>
Fri, 21 Apr 2023 15:20:47 +0000 (08:20 -0700)
For regex escape sequences, just test directly for the relevant ASCII
characters rather than using locale-sensitive character
classification.

This fixes an assertion failure when a locale considers a non-ASCII
character, such as "൧", to be a digit.

Reported-by: Richard Guo
Discussion: https://postgr.es/m/CAMbWs49Q6UoKGeT8pBkMtJGJd+16CBFZaaWUk9Du+2ERE5g_YA@mail.gmail.com
Backpatch-through: 11

src/backend/regex/regc_lex.c

index d573032db67a610b3bd95191ad14ed79f3a8bec5..4e6fa5c61325df7ab183b00000164dac8974b3b0 100644 (file)
@@ -750,7 +750,11 @@ lexescape(struct vars *v)
 
    assert(!ATEOS());
    c = *v->now++;
-   if (!iscalnum(c))
+
+   /* if it's not alphanumeric ASCII, treat it as a plain character */
+   if (!('a' <= c && c <= 'z') &&
+       !('A' <= c && c <= 'Z') &&
+       !('0' <= c && c <= '9'))
        RETV(PLAIN, c);
 
    NOTE(REG_UNONPOSIX);
@@ -892,8 +896,11 @@ lexescape(struct vars *v)
            RETV(PLAIN, c);
            break;
        default:
-           assert(iscalpha(c));
-           FAILW(REG_EESCAPE); /* unknown alphabetic escape */
+           /*
+            * Throw an error for unrecognized ASCII alpha escape sequences,
+            * which reserves them for future use if needed.
+            */
+           FAILW(REG_EESCAPE);
            break;
    }
    assert(NOTREACHED);