Use native support for fp16 where available

Use F16C or ARM FP16 if available at compile time. Configure check added because older clang compilers have F16C defines and flags but not all the intrinsics. Change-Id: I71f358b8fd003e70ab8fcf35097414591e485112 Reviewed-by: Thiago Macieira <[email protected]>
author: Allan Sandfeld Jensen <[email protected]> 2017-02-10 14:51:12 +0100
committer: Allan Sandfeld Jensen <[email protected]> 2017-02-16 12:03:45 +0000
commit: 925a3c65297edc37e394b9837782e79d837d20d1 (patch)
tree: 62a10dac9c308b11aac2286ba0d2e66c529f41b5
parent: 5486882b5253c858cc545cc3a08f0f89af59578e (diff)
6 files changed, 95 insertions, 1 deletions
diff --git a/config.tests/common/f16c/f16c.cpp b/config.tests/common/f16c/f16c.cpp
new file mode 100644
index 00000000000..fc73e4fc55b
--- /dev/null
+++ b/config.tests/common/f16c/f16c.cpp
@@ -0,0 +1,54 @@
+/****************************************************************************
+**
+** Copyright (C) 2017 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the config.tests of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <immintrin.h>
+
+int main(int, char**)
+{
+    float f = 1.f;
+    unsigned short s = _cvtss_sh(f, 0);
+    float g = _cvtsh_ss(s);
+    bool result = f == g;
+    (void)result;
+    __m128i a = _mm_setzero_si128();
+    __m256 b = _mm256_cvtph_ps(a);
+    __m128i c = _mm256_cvtps_ph(b, 0);
+    (void)c;
+    return 0;
+}
diff --git a/config.tests/common/f16c/f16c.pro b/config.tests/common/f16c/f16c.pro
new file mode 100644
index 00000000000..6b36c99d964
--- /dev/null
+++ b/config.tests/common/f16c/f16c.pro
@@ -0,0 +1,5 @@
+SOURCES = f16c.cpp
+CONFIG -= qt dylib release debug_and_release
+CONFIG += debug console
+!defined(QMAKE_CFLAGS_F16C, "var"):error("This compiler does not support F16C")
+else:QMAKE_CXXFLAGS += $$QMAKE_CFLAGS_F16C
diff --git a/configure.json b/configure.json
index f8660e6d7fd..276bb095e5a 100644
--- a/configure.json
+++ b/configure.json
@@ -74,6 +74,7 @@
             "developer-build": "void",
             "device": "string",
             "device-option": "addString",
+            "f16c": "boolean",
             "force-asserts": { "type": "boolean", "name": "force_asserts" },
             "force-debug-info": { "type": "boolean", "name": "force_debug_info" },
             "force-pkg-config": { "type": "void", "name": "pkg-config" },
@@ -316,6 +317,11 @@
             "type": "compile",
             "test": "common/sse4_2"
         },
+        "f16c": {
+            "label": "F16C instructions",
+            "type": "compile",
+            "test": "common/f16c"
+        },
         "avx": {
             "label": "AVX instructions",
             "type": "compile",
@@ -777,6 +783,14 @@
                 { "type": "define", "name": "QT_COMPILER_SUPPORTS_AVX", "value": 1 }
             ]
         },
+        "f16c": {
+            "label": "F16C",
+            "condition": "features.avx && tests.f16c",
+            "output": [
+                "privateConfig",
+                { "type": "define", "name": "QT_COMPILER_SUPPORTS_F16C", "value": 1 }
+            ]
+        },
         "avx2": {
             "label": "AVX2",
             "condition": "features.avx && tests.avx2",
@@ -1120,7 +1134,7 @@ Configure with '-qreal float' to create a build that is binary-compatible with 5
                         {
                             "message": "AVX",
                             "type": "featureList",
-                            "args": "avx avx2",
+                            "args": "avx avx2 f16c",
                             "condition": "(arch.i386 || arch.x86_64)"
                         },
                         {
diff --git a/mkspecs/common/gcc-base.conf b/mkspecs/common/gcc-base.conf
index 529beff4aba..9ddebae506b 100644
--- a/mkspecs/common/gcc-base.conf
+++ b/mkspecs/common/gcc-base.conf
@@ -85,6 +85,7 @@ QMAKE_CFLAGS_SSE3      += -msse3
 QMAKE_CFLAGS_SSSE3     += -mssse3
 QMAKE_CFLAGS_SSE4_1    += -msse4.1
 QMAKE_CFLAGS_SSE4_2    += -msse4.2
+QMAKE_CFLAGS_F16C      += -mf16c
 QMAKE_CFLAGS_AVX       += -mavx
 QMAKE_CFLAGS_AVX2      += -mavx2
 QMAKE_CFLAGS_AVX512F   += -mavx512f
diff --git a/mkspecs/features/simd.prf b/mkspecs/features/simd.prf
index 4aafdbe5ed5..953fc52c65a 100644
--- a/mkspecs/features/simd.prf
+++ b/mkspecs/features/simd.prf
@@ -104,6 +104,7 @@ addSimdCompiler(avx512bw)
 addSimdCompiler(avx512vl)
 addSimdCompiler(avx512ifma)
 addSimdCompiler(avx512vbmi)
+addSimdCompiler(f16c)
 addSimdCompiler(neon)
 addSimdCompiler(mips_dsp)
 addSimdCompiler(mips_dspr2)
diff --git a/src/corelib/global/qfloat16.h b/src/corelib/global/qfloat16.h
index 67c062d349e..8fd2f212af2 100644
--- a/src/corelib/global/qfloat16.h
+++ b/src/corelib/global/qfloat16.h
@@ -44,6 +44,10 @@
 #include <QtCore/qmetatype.h>
 #include <string.h>
 
+#if defined __F16C__
+#include <immintrin.h>
+#endif
+
 QT_BEGIN_NAMESPACE
 
 #if 0
@@ -111,19 +115,34 @@ inline int qIntCast(qfloat16 f) Q_DECL_NOTHROW
 
 inline qfloat16::qfloat16(float f) Q_DECL_NOTHROW
 {
+#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
+    b16 = _cvtss_sh(f, 0);
+#elif defined (__ARM_FP16_FORMAT_IEEE)
+    __fp16 f16 = f;
+    memcpy(&b16, &f16, sizeof(quint16));
+#else
     quint32 u;
     memcpy(&u, &f, sizeof(quint32));
     b16 = basetable[(u >> 23) & 0x1ff]
           + ((u & 0x007fffff) >> shifttable[(u >> 23) & 0x1ff]);
+#endif
 }
 
 inline qfloat16::operator float() const Q_DECL_NOTHROW
 {
+#if defined(QT_COMPILER_SUPPORTS_F16C) && defined(__F16C__)
+    return _cvtsh_ss(b16);
+#elif defined (__ARM_FP16_FORMAT_IEEE)
+    __fp16 f16;
+    memcpy(&f16, &b16, sizeof(quint16));
+    return f16;
+#else
     quint32 u = mantissatable[offsettable[b16 >> 10] + (b16 & 0x3ff)]
                 + exponenttable[b16 >> 10];
     float f;
     memcpy(&f, &u, sizeof(quint32));
     return f;
+#endif
 }
 
 inline qfloat16::operator double() const Q_DECL_NOTHROW
author	Allan Sandfeld Jensen <[email protected]>	2017-02-10 14:51:12 +0100
committer	Allan Sandfeld Jensen <[email protected]>	2017-02-16 12:03:45 +0000
commit	925a3c65297edc37e394b9837782e79d837d20d1 (patch)
tree	62a10dac9c308b11aac2286ba0d2e66c529f41b5
parent	5486882b5253c858cc545cc3a08f0f89af59578e (diff)