diff options
author | Juha Vuolle <[email protected]> | 2024-06-07 07:24:59 +0300 |
---|---|---|
committer | Marc Mutz <[email protected]> | 2024-06-17 10:34:57 +0000 |
commit | f39b39b8c72b5c3428396d5e74cf15cafd5bbc42 (patch) | |
tree | f79c83b94df07eb852bb71aeea029e8212facb7e | |
parent | 4111059b2ee3818ab68033198fb1dd057e6d09eb (diff) |
Relax QHttpHeaders value field checks to allow UTF-8
UTF-8 doesn't per se contain illegal characters from
an HTTP header RFC point of view. UTF-8 is the dominant
character encoding outside of ASCII, and is used
directly at least with HTTP multipart headers.
[ChangeLog][QtNetwork][QHttpHeaders] Allows UTF-8 in header
values now.
Task-number: QTBUG-125985
Pick-to: 6.8 6.7
Change-Id: I2d8d8f00855c96c1d2ba190f2e27e2d48fcd4975
Reviewed-by: Marc Mutz <[email protected]>
-rw-r--r-- | src/network/access/qhttpheaders.cpp | 50 | ||||
-rw-r--r-- | tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp | 3 |
2 files changed, 4 insertions, 49 deletions
diff --git a/src/network/access/qhttpheaders.cpp b/src/network/access/qhttpheaders.cpp index c63da899a84..bfe6cb1419a 100644 --- a/src/network/access/qhttpheaders.cpp +++ b/src/network/access/qhttpheaders.cpp @@ -988,50 +988,6 @@ QDebug operator<<(QDebug debug, const QHttpHeaders &headers) } #endif -// A clarification on string encoding: -// Setters and getters only accept names and values that are Latin-1 representable: -// Either they are directly ASCII/Latin-1, or if they are UTF-X, they only use first 256 -// of the unicode points. For example using a 'β¬' (U+20AC) in value would yield a warning -// and the call is ignored. -// Furthermore the 'name' has more strict rules than the 'value' - -// TODO FIXME REMOVEME once this is merged: -// https://codereview.qt-project.org/c/qt/qtbase/+/508829 -static bool isUtf8Latin1Representable(QUtf8StringView s) noexcept -{ - // L1 encoded in UTF8 has at most the form - // - 0b0XXX'XXXX - US-ASCII - // - 0b1100'00XX 0b10XX'XXXX - at most 8 non-zero LSB bits allowed in L1 - bool inMultibyte = false; - for (unsigned char c : s) { - if (c < 128) { // US-ASCII - if (inMultibyte) - return false; // invalid sequence - } else { - // decode as UTF-8: - if ((c & 0b1110'0000) == 0b1100'0000) { // two-octet UTF-8 leader - if (inMultibyte) - return false; // invalid sequence - inMultibyte = true; - const auto bits_7_to_11 = c & 0b0001'1111; - if (bits_7_to_11 < 0b10) - return false; // invalid sequence (US-ASCII encoded in two octets) - if (bits_7_to_11 > 0b11) // more than the two LSB - return false; // outside L1 - } else if ((c & 0b1100'0000) == 0b1000'0000) { // trailing UTF-8 octet - if (!inMultibyte) - return false; // invalid sequence - inMultibyte = false; // only one continuation allowed - } else { - return false; // invalid sequence or outside of L1 - } - } - } - if (inMultibyte) - return false; // invalid sequence: premature end - return true; -} - static constexpr auto isValidHttpHeaderNameChar = [](uchar c) noexcept { // RFC 9110 Chapters "5.1 Field Names" and "5.6.2 Tokens" @@ -1106,8 +1062,10 @@ static bool headerValueValidImpl(QLatin1StringView value) noexcept static bool headerValueValidImpl(QUtf8StringView value) noexcept { - if (!isUtf8Latin1Representable(value)) // TODO FIXME see the function - return false; + // UTF-8 byte sequences are also used as values directly + // => allow them as such. UTF-8 byte sequences for characters + // outside of ASCII should all fit into obs-text (>= 0x80) + // (see isValidHttpHeaderValueChar) return std::all_of(value.begin(), value.end(), isValidHttpHeaderValueChar); } diff --git a/tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp b/tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp index 457d30feeb8..5b6be3c7b4c 100644 --- a/tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp +++ b/tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp @@ -436,9 +436,6 @@ void tst_QHttpHeaders::headerValueField() QRegularExpression re("HTTP header value contained illegal character*"); TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x08" "bar"); // BS TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x1B" "bar"); // ESC - // Disallowed UTF-8 characters - TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u8"β¬"); - TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u8"ππ΄ππ‘ππππππ©πΈπΉπππππππππππππ΄π΅π‘π’π£π"); // Disallowed UTF-16 characters TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"β¬"); TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"ππ΄ππ‘ππππππ©πΈπΉπππππππππππππ΄π΅π‘π’π£π"); |