summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <[email protected]>2024-09-13 13:19:19 +0200
committerAllan Sandfeld Jensen <[email protected]>2024-09-17 08:27:53 +0200
commit308bca94a72f83624e2e2c92449719e06940e77f (patch)
tree35125707bb3440df20e703a5f9f1b324cc490840
parent0681e720a9851f1873ce5a5f99b5567d2b418261 (diff)
Clean-up MSVC ARM64 NEON support
Also fixing clang-cl support. Pick-to: 6.8 6.8.0 Change-Id: If2130091edfadc0cc4d4cecd95c2256522efc69a Reviewed-by: Maurice Kalinowski <[email protected]> Reviewed-by: Eirik Aavitsland <[email protected]>
-rw-r--r--src/corelib/global/qsimd_p.h53
-rw-r--r--src/corelib/text/qstring.cpp12
-rw-r--r--src/corelib/text/qstringconverter.cpp21
-rw-r--r--src/gui/painting/qcolortransform.cpp13
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp20
-rw-r--r--src/gui/painting/qpixellayout.cpp8
-rw-r--r--src/gui/painting/qrgba64_p.h6
7 files changed, 69 insertions, 64 deletions
diff --git a/src/corelib/global/qsimd_p.h b/src/corelib/global/qsimd_p.h
index b9cd296c9e8..a5dc6487c3b 100644
--- a/src/corelib/global/qsimd_p.h
+++ b/src/corelib/global/qsimd_p.h
@@ -283,6 +283,59 @@ inline uint8_t vaddv_u8(uint8x8_t v8)
}
#endif
+// Missing NEON intrinsics, needed due different type definitions:
+inline uint16x8_t qvsetq_n_u16(uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4,
+ uint16_t v5, uint16_t v6, uint16_t v7, uint16_t v8) {
+#if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG)
+ using u64 = uint64_t;
+ const uint16x8_t vmask = {
+ v1 | (v2 << 16) | (u64(v3) << 32) | (u64(v4) << 48),
+ v5 | (v6 << 16) | (u64(v7) << 32) | (u64(v8) << 48)
+ };
+#else
+ const uint16x8_t vmask = { v1, v2, v3, v4, v5, v6, v7, v8 };
+#endif
+ return vmask;
+}
+inline uint8x8_t qvset_n_u8(uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
+ uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8) {
+#if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG)
+ using u64 = uint64_t;
+ const uint8x8_t vmask = {
+ v1 | (v2 << 8) | (v3 << 16) | (v4 << 24) |
+ (u64(v5) << 32) | (u64(v6) << 40) | (u64(v7) << 48) | (u64(v8) << 56)
+ };
+#else
+ const uint8x8_t vmask = { v1, v2, v3, v4, v5, v6, v7, v8 };
+#endif
+ return vmask;
+}
+inline uint8x16_t qvsetq_n_u8(uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
+ uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8,
+ uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12,
+ uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16) {
+#if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG)
+ using u64 = uint64_t;
+ const uint8x16_t vmask = {
+ v1 | (v2 << 8) | (v3 << 16) | (v4 << 24) |
+ (u64(v5) << 32) | (u64(v6) << 40) | (u64(v7) << 48) | (u64(v8) << 56),
+ v9 | (v10 << 8) | (v11 << 16) | (v12 << 24) |
+ (u64(v13) << 32) | (u64(v14) << 40) | (u64(v15) << 48) | (u64(v16) << 56)
+ };
+#else
+ const uint8x16_t vmask = { v1, v2, v3, v4, v5, v6, v7, v8,
+ v9, v10, v11, v12, v13, v14, v15, v16};
+#endif
+ return vmask;
+}
+inline uint32x4_t qvsetq_n_u32(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+#if defined(Q_CC_MSVC) && !defined(Q_CC_CLANG)
+ return uint32x4_t{ (uint64_t(b) << 32) | a, (uint64_t(d) << 32) | c };
+#else
+ return uint32x4_t{ a, b, c, d };
+#endif
+}
#endif
#if defined(Q_PROCESSOR_ARM) && defined(__ARM_FEATURE_CRC32)
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 077d9accded..c3d1594ac6b 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -749,11 +749,7 @@ const char16_t *QtPrivate::qustrchr(QStringView str, char16_t c) noexcept
[=](qsizetype i) { return n + i; });
# endif
#elif defined(__ARM_NEON__)
-#ifdef _MSC_VER
- const uint16x8_t vmask = { 0x0008000400020001ULL, 0x0080004000200010ULL };
-#else
- const uint16x8_t vmask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
-#endif
+ const uint16x8_t vmask = qvsetq_n_u16(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
const uint16x8_t ch_vec = vdupq_n_u16(c);
for (const char16_t *next = n + 8; next <= e; n = next, next += 8) {
uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t *>(n));
@@ -1294,11 +1290,7 @@ static int ucstrncmp(const char16_t *a, const char16_t *b, size_t l)
# elif defined(__ARM_NEON__)
if (l >= 8) {
const char16_t *end = a + l;
-#ifdef _MSC_VER
- const uint16x8_t mask = { 0x0008000400020001ULL, 0x0080004000200010ULL };
-#else
- const uint16x8_t mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
-#endif
+ const uint16x8_t mask = qvsetq_n_u16( 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
while (end - a > 7) {
uint16x8_t da = vld1q_u16(reinterpret_cast<const uint16_t *>(a));
uint16x8_t db = vld1q_u16(reinterpret_cast<const uint16_t *>(b));
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index e411990a456..104c10a7674 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -351,12 +351,7 @@ static void simdCompareAscii(const qchar8_t *&src8, const qchar8_t *end8, const
static inline bool simdEncodeAscii(uchar *&dst, const char16_t *&nextAscii, const char16_t *&src, const char16_t *end)
{
uint16x8_t maxAscii = vdupq_n_u16(0x7f);
-#ifdef _MSC_VER
- uint16_t mask1t[8] = { 1, 1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
- uint16x8_t mask1 = vld1q_u16(mask1t);
-#else
- uint16x8_t mask1 = { 1, 1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
-#endif
+ uint16x8_t mask1 = qvsetq_n_u16(1, 1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 );
uint16x8_t mask2 = vshlq_n_u16(mask1, 1);
// do sixteen characters at a time
@@ -394,12 +389,7 @@ static inline bool simdDecodeAscii(char16_t *&dst, const uchar *&nextAscii, cons
{
// do eight characters at a time
uint8x8_t msb_mask = vdup_n_u8(0x80);
-#ifdef _MSC_VER
- uint8_t add_maskt[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
- uint8x8_t add_mask = vld1_u8(add_maskt);
-#else
- uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
-#endif
+ uint8x8_t add_mask = qvset_n_u8(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 );
for ( ; end - src >= 8; src += 8, dst += 8) {
uint8x8_t c = vld1_u8(src);
uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
@@ -435,12 +425,7 @@ static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end,
// do eight characters at a time
uint8x8_t msb_mask = vdup_n_u8(0x80);
-#ifdef _MSC_VER
- uint8_t add_maskt[8] = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
- uint8x8_t add_mask = vld1_u8(add_maskt);
-#else
- uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
-#endif
+ uint8x8_t add_mask = qvset_n_u8(1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
for ( ; end - src >= 8; src += 8) {
uint8x8_t c = vld1_u8(src);
uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
diff --git a/src/gui/painting/qcolortransform.cpp b/src/gui/painting/qcolortransform.cpp
index 2cbc19b9626..42315986b7a 100644
--- a/src/gui/painting/qcolortransform.cpp
+++ b/src/gui/painting/qcolortransform.cpp
@@ -662,21 +662,12 @@ static inline bool test_all_zero(uint32x4_t p)
#endif
}
-static inline uint32x4_t vsetq_u32(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
-{
-#ifdef _MSC_VER
- return uint32x4_t{ (uint64_t(b) << 32) | a, (uint64_t(d) << 32) | c };
-#else
- return uint32x4_t{ a, b, c, d };
-#endif
-}
-
template<typename T>
static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
{
constexpr bool isARGB = isArgb<T>();
const float iFF00 = 1.0f / (255 * 256);
- const uint32x4_t vRangeMax = vsetq_u32(
+ const uint32x4_t vRangeMax = qvsetq_n_u32(
isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
: d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
@@ -749,7 +740,7 @@ void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len
{
constexpr bool isARGB = isArgb<T>();
const float iFF00 = 1.0f / (255 * 256);
- const uint32x4_t vRangeMax = vsetq_u32(
+ const uint32x4_t vRangeMax = qvsetq_n_u32(
isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
: d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index f8de0a85116..9233468a878 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -1069,12 +1069,10 @@ const uint * QT_FASTCALL qt_fetchUntransformed_888_neon(uint *buffer, const Oper
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
static inline uint32x4_t vrgba2argb(uint32x4_t srcVector)
{
-#if defined(Q_PROCESSOR_ARM_64) && defined(_MSC_VER)
- const uint8x16_t rgbaMask = { 0x0704050603000102ULL, 0x0F0C0D0E0B08090AULL };
-#elif defined(Q_PROCESSOR_ARM_64)
- const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
+#if defined(Q_PROCESSOR_ARM_64)
+ const uint8x16_t rgbaMask = qvsetq_n_u8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
#else
- const uint8x8_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7 };
+ const uint8x8_t rgbaMask = qvset_n_u8(2, 1, 0, 3, 6, 5, 4, 7);
#endif
#if defined(Q_PROCESSOR_ARM_64)
srcVector = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(srcVector), rgbaMask));
@@ -1091,11 +1089,7 @@ template<bool RGBA>
static inline void convertARGBToARGB32PM_neon(uint *buffer, const uint *src, int count)
{
int i = 0;
-#ifdef _MSC_VER
- const uint8x8_t shuffleMask = { 0x0707070703030303ULL };
-#else
- const uint8x8_t shuffleMask = { 3, 3, 3, 3, 7, 7, 7, 7};
-#endif
+ const uint8x8_t shuffleMask = qvset_n_u8(3, 3, 3, 3, 7, 7, 7, 7);
const uint32x4_t blendMask = vdupq_n_u32(0xff000000);
for (; i < count - 3; i += 4) {
@@ -1147,11 +1141,7 @@ static inline void convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src
if (count <= 0)
return;
-#ifdef _MSC_VER
- const uint8x8_t shuffleMask = { 0x0707070703030303ULL };
-#else
- const uint8x8_t shuffleMask = { 3, 3, 3, 3, 7, 7, 7, 7};
-#endif
+ const uint8x8_t shuffleMask = qvset_n_u8(3, 3, 3, 3, 7, 7, 7, 7);
const uint64x2_t blendMask = vdupq_n_u64(Q_UINT64_C(0xffff000000000000));
int i = 0;
diff --git a/src/gui/painting/qpixellayout.cpp b/src/gui/painting/qpixellayout.cpp
index 31646d2f23d..dab337260d6 100644
--- a/src/gui/painting/qpixellayout.cpp
+++ b/src/gui/painting/qpixellayout.cpp
@@ -1087,12 +1087,10 @@ static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *
return;
const uint32x4_t amask = vdupq_n_u32(0xff000000);
-#if defined(Q_PROCESSOR_ARM_64) && defined(_MSC_VER)
- const uint8x16_t rgbaMask = { 0x0704050603000102ULL, 0x0F0C0D0E0B08090AULL };
-#elif defined(Q_PROCESSOR_ARM_64)
- const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
+#if defined(Q_PROCESSOR_ARM_64)
+ const uint8x16_t rgbaMask = qvsetq_n_u8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15);
#else
- const uint8x8_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7 };
+ const uint8x8_t rgbaMask = qvset_n_u8(2, 1, 0, 3, 6, 5, 4, 7);
#endif
int i = 0;
for (; i < count-3; i += 4) {
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index 6809f1d52cf..058d77f7e90 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -232,11 +232,7 @@ static inline uint toArgb32(QRgba64 rgba64)
#elif defined __ARM_NEON__
uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
-#ifdef _MSC_VER
- const uint8x8_t shuffleMask = { 0x0706010003020504ULL };
-#else
- const uint8x8_t shuffleMask = { 4, 5, 2, 3, 0, 1, 6, 7 };
-#endif
+ const uint8x8_t shuffleMask = qvset_n_u8(4, 5, 2, 3, 0, 1, 6, 7);
v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask));
#else
v = vext_u16(v, v, 3);