From b165de7649b7c8930c638884e8385ed58b47dc2f Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Mon, 19 Oct 2020 16:08:06 -0600
Subject: [PATCH] conversions: Use ARM emulation for aarch64 (#967)

The host compiler will not always calculate reference values
the same, depending on optimization level.  It generates
instructions that do not respond to CPU rounding mode in
the same way.  Use QCOM rounding mode emulation to correctly
calculate reference values on aarch64.
---
 test_conformance/conversions/CMakeLists.txt   |  2 +-
 .../conversions/basic_test_conversions.cpp    | 36 ++++++++++++-------
 .../conversions/test_conversions.cpp          | 20 +++++++----
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt
index 8886ba0f..523b6ead 100644
--- a/test_conformance/conversions/CMakeLists.txt
+++ b/test_conformance/conversions/CMakeLists.txt
@@ -4,7 +4,7 @@ set (${MODULE_NAME}_SOURCES
       Sleep.cpp test_conversions.cpp basic_test_conversions.cpp
 )
 
-if("${CLConform_TARGET_ARCH}" STREQUAL "ARM")
+if("${CLConform_TARGET_ARCH}" STREQUAL "ARM" OR "${CLConform_TARGET_ARCH}" STREQUAL "ARM64")
     list(APPEND ${MODULE_NAME}_SOURCES fplib.cpp)
 endif()
 
diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index d32694a1..44417262 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -21,11 +21,11 @@
 
 #include "harness/mt19937.h"
 
-#if defined( __arm__ ) && defined( __GNUC__ )
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
 #include "fplib.h"
 #endif
 
-#if defined( __arm__ ) && defined( __GNUC__ )
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
 /* Rounding modes and saturation for use with qcom 64 bit to float conversion library */
     bool            qcom_sat;
     roundingMode    qcom_rm;
@@ -759,12 +759,18 @@ static void ulong2float( void *out, void *in)
     ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
 #else
     cl_ulong l = ((cl_ulong*) in)[0];
-#if defined( __arm__ ) && defined( __GNUC__ )
-    /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
-     * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+    /* ARM VFP doesn't have hardware instruction for converting from 64-bit
+     * integer to float types, hence GCC ARM uses the floating-point emulation
+     * code despite which -mfloat-abi setting it is. But the emulation code in
+     * libgcc.a has only one rounding mode (round to nearest even in this case)
      * and ignores the user rounding mode setting in hardware.
-     * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
-     * so for testing different rounding modes, we need to use alternative reference function */
+     * As a result setting rounding modes in hardware won't give correct
+     * rounding results for type covert from 64-bit integer to float using GCC
+     * for ARM compiler so for testing different rounding modes, we need to use
+     * alternative reference function. ARM64 does have an instruction, however
+     * we cannot guarantee the compiler will use it.  On all ARM architechures
+     * use emulation to calculate reference.*/
     ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
 #else
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
@@ -806,12 +812,18 @@ static void long2float( void *out, void *in)
     ((float*) out)[0] = (l == 0 ? 0.0f : result);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
 #else
     cl_long l = ((cl_long*) in)[0];
-#if defined( __arm__ ) && defined( __GNUC__ )
-    /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
-     * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+    /* ARM VFP doesn't have hardware instruction for converting from 64-bit
+     * integer to float types, hence GCC ARM uses the floating-point emulation
+     * code despite which -mfloat-abi setting it is. But the emulation code in
+     * libgcc.a has only one rounding mode (round to nearest even in this case)
      * and ignores the user rounding mode setting in hardware.
-     * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
-     * so for testing different rounding modes, we need to use alternative reference function */
+     * As a result setting rounding modes in hardware won't give correct
+     * rounding results for type covert from 64-bit integer to float using GCC
+     * for ARM compiler so for testing different rounding modes, we need to use
+     * alternative reference function. ARM64 does have an instruction, however
+     * we cannot guarantee the compiler will use it.  On all ARM architechures
+     * use emulation to calculate reference.*/
     ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
 #else
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 2af85558..87b8ead7 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -65,7 +65,7 @@
 
 #define      kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */)
 
-#if defined( __arm__ ) && defined( __GNUC__ )
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
 #include "fplib.h"
     extern bool            qcom_sat;
     extern roundingMode    qcom_rm;
@@ -884,12 +884,18 @@ cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p )
         if( info->sat )
             f = gSaturatedConversions[ outType ][ inType ];
 
-#if defined( __arm__ ) && defined( __GNUC__ )
-       /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
-        * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
-        * and ignores the user rounding mode setting in hardware.
-        * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
-        * so for testing different rounding modes, we need to use alternative reference function */
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+        /* ARM VFP doesn't have hardware instruction for converting from 64-bit
+         * integer to float types, hence GCC ARM uses the floating-point
+         * emulation code despite which -mfloat-abi setting it is. But the
+         * emulation code in libgcc.a has only one rounding mode (round to
+         * nearest even in this case) and ignores the user rounding mode setting
+         * in hardware. As a result setting rounding modes in hardware won't
+         * give correct rounding results for type covert from 64-bit integer to
+         * float using GCC for ARM compiler so for testing different rounding
+         * modes, we need to use alternative reference function. ARM64 does have
+         * an instruction, however we cannot guarantee the compiler will use it.
+         * On all ARM architechures use emulation to calculate reference.*/
         switch (round)
         {
             /* conversions to floating-point type use the current rounding mode.