summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuha Vuolle <[email protected]>2024-06-07 07:24:59 +0300
committerMarc Mutz <[email protected]>2024-06-17 10:34:57 +0000
commitf39b39b8c72b5c3428396d5e74cf15cafd5bbc42 (patch)
treef79c83b94df07eb852bb71aeea029e8212facb7e
parent4111059b2ee3818ab68033198fb1dd057e6d09eb (diff)
Relax QHttpHeaders value field checks to allow UTF-8
UTF-8 doesn't per se contain illegal characters from an HTTP header RFC point of view. UTF-8 is the dominant character encoding outside of ASCII, and is used directly at least with HTTP multipart headers. [ChangeLog][QtNetwork][QHttpHeaders] Allows UTF-8 in header values now. Task-number: QTBUG-125985 Pick-to: 6.8 6.7 Change-Id: I2d8d8f00855c96c1d2ba190f2e27e2d48fcd4975 Reviewed-by: Marc Mutz <[email protected]>
-rw-r--r--src/network/access/qhttpheaders.cpp50
-rw-r--r--tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp3
2 files changed, 4 insertions, 49 deletions
diff --git a/src/network/access/qhttpheaders.cpp b/src/network/access/qhttpheaders.cpp
index c63da899a84..bfe6cb1419a 100644
--- a/src/network/access/qhttpheaders.cpp
+++ b/src/network/access/qhttpheaders.cpp
@@ -988,50 +988,6 @@ QDebug operator<<(QDebug debug, const QHttpHeaders &headers)
}
#endif
-// A clarification on string encoding:
-// Setters and getters only accept names and values that are Latin-1 representable:
-// Either they are directly ASCII/Latin-1, or if they are UTF-X, they only use first 256
-// of the unicode points. For example using a '€' (U+20AC) in value would yield a warning
-// and the call is ignored.
-// Furthermore the 'name' has more strict rules than the 'value'
-
-// TODO FIXME REMOVEME once this is merged:
-// https://codereview.qt-project.org/c/qt/qtbase/+/508829
-static bool isUtf8Latin1Representable(QUtf8StringView s) noexcept
-{
- // L1 encoded in UTF8 has at most the form
- // - 0b0XXX'XXXX - US-ASCII
- // - 0b1100'00XX 0b10XX'XXXX - at most 8 non-zero LSB bits allowed in L1
- bool inMultibyte = false;
- for (unsigned char c : s) {
- if (c < 128) { // US-ASCII
- if (inMultibyte)
- return false; // invalid sequence
- } else {
- // decode as UTF-8:
- if ((c & 0b1110'0000) == 0b1100'0000) { // two-octet UTF-8 leader
- if (inMultibyte)
- return false; // invalid sequence
- inMultibyte = true;
- const auto bits_7_to_11 = c & 0b0001'1111;
- if (bits_7_to_11 < 0b10)
- return false; // invalid sequence (US-ASCII encoded in two octets)
- if (bits_7_to_11 > 0b11) // more than the two LSB
- return false; // outside L1
- } else if ((c & 0b1100'0000) == 0b1000'0000) { // trailing UTF-8 octet
- if (!inMultibyte)
- return false; // invalid sequence
- inMultibyte = false; // only one continuation allowed
- } else {
- return false; // invalid sequence or outside of L1
- }
- }
- }
- if (inMultibyte)
- return false; // invalid sequence: premature end
- return true;
-}
-
static constexpr auto isValidHttpHeaderNameChar = [](uchar c) noexcept
{
// RFC 9110 Chapters "5.1 Field Names" and "5.6.2 Tokens"
@@ -1106,8 +1062,10 @@ static bool headerValueValidImpl(QLatin1StringView value) noexcept
static bool headerValueValidImpl(QUtf8StringView value) noexcept
{
- if (!isUtf8Latin1Representable(value)) // TODO FIXME see the function
- return false;
+ // UTF-8 byte sequences are also used as values directly
+ // => allow them as such. UTF-8 byte sequences for characters
+ // outside of ASCII should all fit into obs-text (>= 0x80)
+ // (see isValidHttpHeaderValueChar)
return std::all_of(value.begin(), value.end(), isValidHttpHeaderValueChar);
}
diff --git a/tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp b/tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp
index 457d30feeb8..5b6be3c7b4c 100644
--- a/tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp
+++ b/tests/auto/network/access/qhttpheaders/tst_qhttpheaders.cpp
@@ -436,9 +436,6 @@ void tst_QHttpHeaders::headerValueField()
QRegularExpression re("HTTP header value contained illegal character*");
TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x08" "bar"); // BS
TEST_ILLEGAL_HEADER_VALUE_CHARACTER("foo\x1B" "bar"); // ESC
- // Disallowed UTF-8 characters
- TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u8"€");
- TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u8"π’œπ’΄πŸ˜πŸ‘πŽ€πŽœπ’€π’π“π“©π”Έπ”Ήπ•€π•π•Œπ•π“‚€π“‚π“ƒ€π“ƒπ“‡‹π“‡Œπ“‰π“‰‘π“‹΄π“‹΅π“Ž‘π“Ž’π“Ž£π“");
// Disallowed UTF-16 characters
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"€");
TEST_ILLEGAL_HEADER_VALUE_CHARACTER(u"π’œπ’΄πŸ˜πŸ‘πŽ€πŽœπ’€π’π“π“©π”Έπ”Ήπ•€π•π•Œπ•π“‚€π“‚π“ƒ€π“ƒπ“‡‹π“‡Œπ“‰π“‰‘π“‹΄π“‹΅π“Ž‘π“Ž’π“Ž£π“");