5 files changed, 46 insertions, 48 deletions
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 30d0aab8558..99a35345453 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -605,7 +605,7 @@ public:
     }
 };
 
-static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
+static inline uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
     uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
     t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
     t &= 0xff00ff;
@@ -619,7 +619,7 @@ static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b
 
 #if Q_PROCESSOR_WORDSIZE == 8 // 64-bit versions
 
-static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+static inline uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
     quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
     t += (((quint64(y)) | ((quint64(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
     t >>= 8;
@@ -627,7 +627,7 @@ static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b
     return (uint(t)) | (uint(t >> 24));
 }
 
-static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
+static inline uint BYTE_MUL(uint x, uint a) {
     quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
     t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
     t &= 0x00ff00ff00ff00ff;
@@ -636,7 +636,7 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
 
 #else // 32-bit versions
 
-static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+static inline uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
     uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
     t >>= 8;
     t &= 0xff00ff;
@@ -647,7 +647,7 @@ static Q_ALWAYS_INLINE uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b
     return x;
 }
 
-static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
+static inline uint BYTE_MUL(uint x, uint a) {
     uint t = (x & 0xff00ff) * a;
     t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
     t &= 0xff00ff;
@@ -660,7 +660,7 @@ static Q_ALWAYS_INLINE uint BYTE_MUL(uint x, uint a) {
 }
 #endif
 
-static Q_ALWAYS_INLINE void blend_pixel(quint32 &dst, const quint32 src)
+static inline void blend_pixel(quint32 &dst, const quint32 src)
 {
     if (src >= 0xff000000)
         dst = src;
@@ -668,7 +668,7 @@ static Q_ALWAYS_INLINE void blend_pixel(quint32 &dst, const quint32 src)
         dst = src + BYTE_MUL(dst, qAlpha(~src));
 }
 
-static Q_ALWAYS_INLINE void blend_pixel(quint32 &dst, const quint32 src, const int const_alpha)
+static inline void blend_pixel(quint32 &dst, const quint32 src, const int const_alpha)
 {
     if (const_alpha == 255)
         return blend_pixel(dst, src);
@@ -679,7 +679,7 @@ static Q_ALWAYS_INLINE void blend_pixel(quint32 &dst, const quint32 src, const i
 }
 
 #if defined(__SSE2__)
-static Q_ALWAYS_INLINE uint interpolate_4_pixels_sse2(__m128i vt, __m128i vb, uint distx, uint disty)
+static inline uint Q_DECL_VECTORCALL interpolate_4_pixels_sse2(__m128i vt, __m128i vb, uint distx, uint disty)
 {
     // First interpolate top and bottom pixels in parallel.
     vt = _mm_unpacklo_epi8(vt, _mm_setzero_si128());
@@ -720,7 +720,7 @@ static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint dis
 static constexpr inline bool hasFastInterpolate4() { return true; }
 
 #elif defined(__ARM_NEON__)
-static Q_ALWAYS_INLINE uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty)
+static inline uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty)
 {
     uint16x8_t vt16 = vmovl_u8(vreinterpret_u8_u32(vt32));
     uint16x8_t vb16 = vmovl_u8(vreinterpret_u8_u32(vb32));
@@ -844,24 +844,24 @@ static inline QRgba64 interpolate_4_pixels_rgb64(const QRgba64 t[], const QRgba6
 }
 #endif // __SSE2__
 
-static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16(uint x, uint a) {
+static inline uint BYTE_MUL_RGB16(uint x, uint a) {
     a += 1;
     uint t = (((x & 0x07e0)*a) >> 8) & 0x07e0;
     t |= (((x & 0xf81f)*(a>>2)) >> 6) & 0xf81f;
     return t;
 }
 
-static Q_ALWAYS_INLINE uint BYTE_MUL_RGB16_32(uint x, uint a) {
+static inline uint BYTE_MUL_RGB16_32(uint x, uint a) {
     uint t = (((x & 0xf81f07e0) >> 5)*a) & 0xf81f07e0;
     t |= (((x & 0x07e0f81f)*a) >> 5) & 0x07e0f81f;
     return t;
 }
 
 // qt_div_255 is a fast rounded division by 255 using an approximation that is accurate for all positive 16-bit integers
-static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE int qt_div_255(int x) { return (x + (x>>8) + 0x80) >> 8; }
-static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_257_floor(uint x) { return  (x - (x >> 8)) >> 8; }
-static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_257(uint x) { return qt_div_257_floor(x + 128); }
-static Q_DECL_CONSTEXPR Q_ALWAYS_INLINE uint qt_div_65535(uint x) { return (x + (x>>16) + 0x8000U) >> 16; }
+static Q_DECL_CONSTEXPR inline int qt_div_255(int x) { return (x + (x>>8) + 0x80) >> 8; }
+static Q_DECL_CONSTEXPR inline uint qt_div_257_floor(uint x) { return  (x - (x >> 8)) >> 8; }
+static Q_DECL_CONSTEXPR inline uint qt_div_257(uint x) { return qt_div_257_floor(x + 128); }
+static Q_DECL_CONSTEXPR inline uint qt_div_65535(uint x) { return (x + (x>>16) + 0x8000U) >> 16; }
 
 template <class T> inline void qt_memfill_template(T *dest, T color, qsizetype count)
 {
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index cc8d230fa82..8b91c590b86 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -232,7 +232,7 @@ QT_END_NAMESPACE
 QT_BEGIN_NAMESPACE
 #if QT_COMPILER_SUPPORTS_HERE(SSE4_1)
 QT_FUNCTION_TARGET(SSE2)
-Q_ALWAYS_INLINE void Q_DECL_VECTORCALL reciprocal_mul_ss(__m128 &ia, const __m128 a, float mul)
+static inline void Q_DECL_VECTORCALL reciprocal_mul_ss(__m128 &ia, const __m128 a, float mul)
 {
     ia = _mm_rcp_ss(a); // Approximate 1/a
     // Improve precision of ia using Newton-Raphson
@@ -242,7 +242,7 @@ Q_ALWAYS_INLINE void Q_DECL_VECTORCALL reciprocal_mul_ss(__m128 &ia, const __m12
 }
 
 QT_FUNCTION_TARGET(SSE4_1)
-inline QRgb qUnpremultiply_sse4(QRgb p)
+static inline QRgb qUnpremultiply_sse4(QRgb p)
 {
     const uint alpha = qAlpha(p);
     if (alpha == 255)
@@ -262,7 +262,7 @@ inline QRgb qUnpremultiply_sse4(QRgb p)
 
 template<enum QtPixelOrder PixelOrder>
 QT_FUNCTION_TARGET(SSE4_1)
-inline uint qConvertArgb32ToA2rgb30_sse4(QRgb p)
+static inline uint qConvertArgb32ToA2rgb30_sse4(QRgb p)
 {
     const uint alpha = qAlpha(p);
     if (alpha == 255)
@@ -292,7 +292,7 @@ inline uint qConvertArgb32ToA2rgb30_sse4(QRgb p)
 
 template<enum QtPixelOrder PixelOrder>
 QT_FUNCTION_TARGET(SSE4_1)
-inline uint qConvertRgba64ToRgb32_sse4(QRgba64 p)
+static inline uint qConvertRgba64ToRgb32_sse4(QRgba64 p)
 {
     if (p.isTransparent())
         return 0;
diff --git a/src/gui/painting/qpixellayout.cpp b/src/gui/painting/qpixellayout.cpp
index f573ce27422..ad6df37228b 100644
--- a/src/gui/painting/qpixellayout.cpp
+++ b/src/gui/painting/qpixellayout.cpp
@@ -219,7 +219,7 @@ inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index,
 }
 
 template<QImage::Format Format>
-static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s)
+static inline uint convertPixelToRGB32(uint s)
 {
     Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
     Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
@@ -274,7 +274,7 @@ static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, i
 }
 
 template<QImage::Format Format>
-static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s)
+static inline QRgba64 convertPixelToRGB64(uint s)
 {
     return QRgba64::fromArgb32(convertPixelToRGB32<Format>(s));
 }
@@ -298,7 +298,7 @@ static const QRgba64 *QT_FASTCALL fetchRGBToRGB64(QRgba64 *buffer, const uchar *
 }
 
 template<QImage::Format Format>
-static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s)
+static inline uint convertPixelToARGB32PM(uint s)
 {
     Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1);
     Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
@@ -365,7 +365,7 @@ static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *
 }
 
 template<QImage::Format Format>
-static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s)
+static inline QRgba64 convertPixelToRGBA64PM(uint s)
 {
     return QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(s));
 }
diff --git a/src/gui/painting/qpixellayout_p.h b/src/gui/painting/qpixellayout_p.h
index 0cbf514dede..99d98ac36e4 100644
--- a/src/gui/painting/qpixellayout_p.h
+++ b/src/gui/painting/qpixellayout_p.h
@@ -225,26 +225,26 @@ inline uint qRgbSwapRgb30(uint c)
 }
 
 #if Q_BYTE_ORDER == Q_BIG_ENDIAN
-Q_ALWAYS_INLINE quint32 RGBA2ARGB(quint32 x) {
+static inline quint32 RGBA2ARGB(quint32 x) {
     quint32 rgb = x >> 8;
     quint32 a = x << 24;
     return a | rgb;
 }
 
-Q_ALWAYS_INLINE quint32 ARGB2RGBA(quint32 x) {
+static inline quint32 ARGB2RGBA(quint32 x) {
     quint32 rgb = x << 8;
     quint32 a = x >> 24;
     return a | rgb;
 }
 #else
-Q_ALWAYS_INLINE quint32 RGBA2ARGB(quint32 x) {
+static inline quint32 RGBA2ARGB(quint32 x) {
     // RGBA8888 is ABGR32 on little endian.
     quint32 ag = x & 0xff00ff00;
     quint32 rg = x & 0x00ff00ff;
     return ag | (rg  << 16) | (rg >> 16);
 }
 
-Q_ALWAYS_INLINE quint32 ARGB2RGBA(quint32 x) {
+static inline quint32 ARGB2RGBA(quint32 x) {
     return RGBA2ARGB(x);
 }
 #endif
diff --git a/src/gui/painting/qrgba64_p.h b/src/gui/painting/qrgba64_p.h
index 0a19604c5d3..58eefad68bd 100644
--- a/src/gui/painting/qrgba64_p.h
+++ b/src/gui/painting/qrgba64_p.h
@@ -73,7 +73,7 @@ inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535)
 }
 
 #ifdef __SSE2__
-Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, __m128i va)
+static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, __m128i va)
 {
     __m128i vs = rgba64;
     vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va));
@@ -83,21 +83,19 @@ Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, __m128i va)
     vs = _mm_packs_epi32(vs, _mm_setzero_si128());
     return vs;
 }
-Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, uint alpha65535)
+static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, uint alpha65535)
 {
     const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0));
     return multiplyAlpha65535(rgba64, va);
 }
-#endif
-
-#if defined(__ARM_NEON__)
-Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535)
+#elif defined(__ARM_NEON__)
+static inline uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535)
 {
     uint32x4_t vs32 = vmull_u16(rgba64, alpha65535); // vs = vs * alpha
     vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16)
     return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16
 }
-Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535)
+static inline uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535)
 {
     uint32x4_t vs32 = vmull_n_u16(rgba64, alpha65535); // vs = vs * alpha
     vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16)
@@ -106,7 +104,7 @@ Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535
 #endif
 
 template<typename T>
-inline T multiplyAlpha255(T rgba64, uint alpha255)
+static inline T Q_DECL_VECTORCALL multiplyAlpha255(T rgba64, uint alpha255)
 {
 #if defined(__SSE2__) || defined(__ARM_NEON__)
     return multiplyAlpha65535(rgba64, alpha255 * 257);
@@ -124,14 +122,14 @@ inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
 }
 
 #if defined __SSE2__
-Q_ALWAYS_INLINE __m128i interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2)
+static inline __m128i Q_DECL_VECTORCALL interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2)
 {
     return _mm_add_epi32(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
 }
 #endif
 
 #if defined __ARM_NEON__
-Q_ALWAYS_INLINE uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
+inline uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
 {
     return vadd_u16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
 }
@@ -143,23 +141,23 @@ inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
 }
 
 #if defined __SSE2__
-Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2)
+static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2)
 {
     return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
 }
 // alpha2 below is const-ref because otherwise MSVC2015 complains that it can't 16-byte align the argument.
-Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2)
+static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2)
 {
     return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
 }
 #endif
 
 #if defined __ARM_NEON__
-Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
+inline uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
 {
     return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
 }
-Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2)
+inline uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2)
 {
     return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
 }
@@ -175,7 +173,7 @@ inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b)
 
 #if QT_COMPILER_SUPPORTS_HERE(SSE2)
 QT_FUNCTION_TARGET(SSE2)
-Q_ALWAYS_INLINE uint toArgb32(__m128i v)
+static inline uint Q_DECL_VECTORCALL toArgb32(__m128i v)
 {
     v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
     v = _mm_add_epi32(v, _mm_set1_epi32(128));
@@ -186,7 +184,7 @@ Q_ALWAYS_INLINE uint toArgb32(__m128i v)
     return _mm_cvtsi128_si32(v);
 }
 #elif defined __ARM_NEON__
-Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v)
+static inline uint toArgb32(uint16x4_t v)
 {
     v = vsub_u16(v, vrshr_n_u16(v, 8));
     v = vrshr_n_u16(v, 8);
@@ -195,7 +193,7 @@ Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v)
 }
 #endif
 
-Q_ALWAYS_INLINE uint toArgb32(QRgba64 rgba64)
+static inline uint toArgb32(QRgba64 rgba64)
 {
 #if defined __SSE2__
     __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64);
@@ -215,7 +213,7 @@ Q_ALWAYS_INLINE uint toArgb32(QRgba64 rgba64)
 #endif
 }
 
-Q_ALWAYS_INLINE uint toRgba8888(QRgba64 rgba64)
+static inline uint toRgba8888(QRgba64 rgba64)
 {
 #if defined __SSE2__
     __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64);
@@ -228,7 +226,7 @@ Q_ALWAYS_INLINE uint toRgba8888(QRgba64 rgba64)
 #endif
 }
 
-inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha)
+static inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha)
 {
     QRgba64 blend;
 #if defined(__SSE2__)
@@ -274,7 +272,7 @@ inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha)
     return blend;
 }
 
-static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src)
+static inline void blend_pixel(QRgba64 &dst, QRgba64 src)
 {
     if (src.isOpaque())
         dst = src;
@@ -282,7 +280,7 @@ static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src)
         dst = src + multiplyAlpha65535(dst, 65535 - src.alpha());
 }
 
-static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha)
+static inline void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha)
 {
     if (const_alpha == 255)
         return blend_pixel(dst, src);