From 119af24d54d1ba75b03abdf056aa5dc2d3cbefb7 Mon Sep 17 00:00:00 2001
From: Yilong Guo <yilong.guo@intel.com>
Date: Wed, 17 Dec 2025 00:37:33 +0800
Subject: [PATCH] c11_atomics: unify host half representation and conversion
 with wrapper class (#2503)

Introduce `HostHalf` wrapper class to eliminate explicit
`cl_half_from_float`
and `cl_half_to_float` conversions throughout the test code. The wrapper
provides semantic value constructors/operators and automatic
conversions,
simplifying half-precision arithmetic operations.

Key improvements:
- `HostHalf` class with operator overloading for arithmetic and
comparisons
- Type traits `is_host_atomic_fp_v` and `is_host_fp_v` for generic FP
handling
- Unified floating-point atomic operations (add/sub/min/max/exchange)
- Removed 300+ lines of half-specific conditional branches
- Consistent calculation for all FP types
---
 test_conformance/c11_atomics/common.cpp       |   8 +-
 test_conformance/c11_atomics/common.h         |  21 +-
 test_conformance/c11_atomics/host_atomics.h   | 173 ++++--
 test_conformance/c11_atomics/test_atomics.cpp | 540 ++++--------------
 4 files changed, 248 insertions(+), 494 deletions(-)
diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp
index 3be3fbc1..7bb2da76 100644
--- a/test_conformance/c11_atomics/common.cpp
+++ b/test_conformance/c11_atomics/common.cpp
@@ -194,9 +194,9 @@ template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;
 template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
 template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
 template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
-template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
+template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MinValue()
 {
-    return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode);
+    return -CL_HALF_MAX;
 }
 template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
 {
@@ -217,9 +217,9 @@ template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
 }
 template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
 template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
-template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
+template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MaxValue()
 {
-    return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
+    return CL_HALF_MAX;
 }
 template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
 {
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 09535a88..c9494c6c 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -183,7 +183,8 @@ public:
                         const std::vector<HostAtomicType> &testValues,
                         cl_uint whichDestValue)
     {
-        return expected != testValues[whichDestValue];
+        return expected
+            != static_cast<HostDataType>(testValues[whichDestValue]);
     }
     virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                               MTdata d)
@@ -911,12 +912,9 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
             + ss.str() + "] = {\n";
         ss.str("");
 
-        if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
-            == TYPE_ATOMIC_FLOAT)
-            ss << std::setprecision(10) << _startValue;
-        else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
-                 == TYPE_ATOMIC_HALF)
-            ss << cl_half_to_float(static_cast<cl_half>(_startValue));
+        if constexpr (is_host_fp_v<HostDataType>)
+            ss << std::hexfloat
+               << _startValue; // use hex format for accurate representation
         else
             ss << _startValue;
 
@@ -1305,7 +1303,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
     numDestItems = NumResults(threadCount, deviceID);
 
     destItems.resize(numDestItems);
-    for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+    for (cl_uint i = 0; i < numDestItems; i++)
+        destItems[i] = static_cast<HostAtomicType>(_startValue);
 
     // Create main buffer with atomic variables (array size dependent on
     // particular test)
@@ -1483,7 +1482,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
             std::stringstream logLine;
             logLine << "ERROR: Result " << i
                     << " from kernel does not validate! (should be " << expected
-                    << ", was " << destItems[i] << ")\n";
+                    << ", was " << static_cast<HostDataType>(destItems[i])
+                    << ")\n";
             log_error("%s", logLine.str().c_str());
             for (i = 0; i < threadCount; i++)
             {
@@ -1550,7 +1550,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
                                  // clEnqueueNDRangeKernel
     {
         /* Re-write the starting value */
-        for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+        for (size_t i = 0; i < numDestItems; i++)
+            destItems[i] = static_cast<HostAtomicType>(_startValue);
         refValues[0] = 0;
         if (deviceThreadCount > 0)
         {
diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
index fac21642..8d875bc9 100644
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -24,6 +24,8 @@
 #include "Windows.h"
 #endif
 
+extern cl_half_rounding_mode gHalfRoundingMode;
+
 //flag for test verification (good test should discover non-atomic functions and fail)
 //#define NON_ATOMIC_FUNCTIONS
 
@@ -37,6 +39,93 @@ enum TExplicitMemoryOrderType
   MEMORY_ORDER_SEQ_CST
 };
 
+// Wrapper class for half-precision
+class HostHalf {
+public:
+    // Convert from semantic values
+    HostHalf(cl_uint value = 0)
+        : value(
+            cl_half_from_float(static_cast<float>(value), gHalfRoundingMode))
+    {}
+    HostHalf(int value): HostHalf(static_cast<cl_uint>(value)) {}
+    HostHalf(float value): value(cl_half_from_float(value, gHalfRoundingMode))
+    {}
+    HostHalf(double value): HostHalf(static_cast<float>(value)) {}
+
+    // Convert to semantic values
+    operator cl_uint() const
+    {
+        return static_cast<cl_uint>(cl_half_to_float(value));
+    }
+    operator float() const { return cl_half_to_float(value); }
+    operator double() const
+    {
+        return static_cast<double>(cl_half_to_float(value));
+    }
+
+    // Construct from bit representation
+    HostHalf(cl_half value): value(value) {}
+
+    // Get the underlying bit representation
+    operator cl_half() const { return value; }
+
+    HostHalf operator-() const
+    {
+        return HostHalf(
+            cl_half_from_float(-cl_half_to_float(value), gHalfRoundingMode));
+    }
+
+#define GENERIC_OP(RetType, op)                                                \
+    RetType operator op(const HostHalf &other) const                           \
+    {                                                                          \
+        return RetType(cl_half_to_float(value)                                 \
+                           op cl_half_to_float(other.value));                  \
+    }
+
+    GENERIC_OP(bool, ==)
+    GENERIC_OP(bool, !=)
+    GENERIC_OP(bool, <)
+    GENERIC_OP(bool, <=)
+    GENERIC_OP(bool, >)
+    GENERIC_OP(bool, >=)
+    GENERIC_OP(HostHalf, +)
+    GENERIC_OP(HostHalf, -)
+    GENERIC_OP(HostHalf, *)
+    GENERIC_OP(HostHalf, /)
+#undef GENERIC_OP
+
+#define INPLACE_OP(op)                                                         \
+    HostHalf &operator op##=(const HostHalf &other)                            \
+    {                                                                          \
+        value = cl_half_from_float(cl_half_to_float(value)                     \
+                                       op cl_half_to_float(other.value),       \
+                                   gHalfRoundingMode);                         \
+        return *this;                                                          \
+    }
+    INPLACE_OP(+)
+    INPLACE_OP(-)
+    INPLACE_OP(*)
+    INPLACE_OP(/)
+#undef INPLACE_OP
+
+    friend std::ostream &operator<<(std::ostream &os, const HostHalf &hh)
+    {
+        float f = cl_half_to_float(hh.value);
+        os << f;
+        return os;
+    }
+
+private:
+    cl_half value;
+};
+
+namespace std {
+inline HostHalf abs(const HostHalf &value)
+{
+    return value < HostHalf(0) ? -value : value;
+}
+} // namespace std
+
 // host atomic types (applicable for atomic functions supported on host OS)
 #ifdef WIN32
 #define HOST_ATOMIC_INT         unsigned long
@@ -73,7 +162,7 @@ enum TExplicitMemoryOrderType
 #define HOST_UINT               cl_uint
 #define HOST_LONG               cl_long
 #define HOST_ULONG              cl_ulong
-#define HOST_HALF cl_half
+#define HOST_HALF HostHalf
 #define HOST_FLOAT              cl_float
 #define HOST_DOUBLE             cl_double
 
@@ -91,6 +180,18 @@ enum TExplicitMemoryOrderType
 
 extern cl_half_rounding_mode gHalfRoundingMode;
 
+template <typename HostAtomicType>
+constexpr bool is_host_atomic_fp_v =
+    std::disjunction_v<std::is_same<HostAtomicType, HOST_ATOMIC_HALF>,
+                       std::is_same<HostAtomicType, HOST_ATOMIC_FLOAT>,
+                       std::is_same<HostAtomicType, HOST_ATOMIC_DOUBLE>>;
+
+template <typename HostDataType>
+constexpr bool is_host_fp_v =
+    std::disjunction_v<std::is_same<HostDataType, HOST_HALF>,
+                       std::is_same<HostDataType, HOST_FLOAT>,
+                       std::is_same<HostDataType, HOST_DOUBLE>>;
+
 // host atomic functions
 void host_atomic_thread_fence(TExplicitMemoryOrderType order);
 
@@ -98,24 +199,13 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
                                         TExplicitMemoryOrderType order)
 {
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mx;
         std::lock_guard<std::mutex> lock(mx);
         CorrespondingType old_value = *a;
-        *a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
-                                gHalfRoundingMode);
-        return old_value;
-    }
-    else if constexpr (
-        std::is_same_v<
-            AtomicType,
-            HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
-    {
-        static std::mutex mx;
-        std::lock_guard<std::mutex> lock(mx);
-        CorrespondingType old_value = *a;
-        *a += c;
+        CorrespondingType new_value = old_value + c;
+        *a = static_cast<AtomicType>(new_value);
         return old_value;
     }
     else
@@ -135,24 +225,13 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
                                         TExplicitMemoryOrderType order)
 {
-    if constexpr (
-        std::is_same_v<
-            AtomicType,
-            HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mx;
         std::lock_guard<std::mutex> lock(mx);
         CorrespondingType old_value = *a;
-        *a -= c;
-        return old_value;
-    }
-    else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
-    {
-        static std::mutex mx;
-        std::lock_guard<std::mutex> lock(mx);
-        CorrespondingType old_value = *a;
-        *a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
-                                gHalfRoundingMode);
+        CorrespondingType new_value = old_value - c;
+        *a = static_cast<AtomicType>(new_value);
         return old_value;
     }
     else
@@ -173,12 +252,14 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType
                                        TExplicitMemoryOrderType order)
 {
 #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
-    if (sizeof(CorrespondingType) == 2)
-        return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a), c);
+    if constexpr (sizeof(CorrespondingType) == 2)
+        return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a),
+                                     *reinterpret_cast<SHORT *>(&c));
     else
-        return InterlockedExchange(reinterpret_cast<volatile LONG *>(a), c);
+        return InterlockedExchange(reinterpret_cast<volatile LONG *>(a),
+                                   *reinterpret_cast<LONG *>(&c));
 #elif defined(__GNUC__)
-    return __sync_lock_test_and_set(a, c);
+    return __sync_lock_test_and_set(a, *reinterpret_cast<AtomicType *>(&c));
 #else
   log_info("Host function not implemented: atomic_exchange\n");
   return 0;
@@ -195,30 +276,14 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
                                   TExplicitMemoryOrderType order_failure)
 {
     CorrespondingType tmp;
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mtx;
         std::lock_guard<std::mutex> lock(mtx);
-        tmp = *reinterpret_cast<volatile cl_half *>(a);
-
-        if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
-        {
-            *reinterpret_cast<volatile cl_half *>(a) = desired;
-            return true;
-        }
-        *expected = tmp;
-    }
-    else if constexpr (
-        std::is_same_v<
-            AtomicType,
-            HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
-    {
-        static std::mutex mtx;
-        std::lock_guard<std::mutex> lock(mtx);
-        tmp = *reinterpret_cast<volatile float *>(a);
+        tmp = static_cast<CorrespondingType>(*a);
         if (tmp == *expected)
         {
-            *a = desired;
+            *a = static_cast<AtomicType>(desired);
             return true;
         }
         *expected = tmp;
@@ -244,8 +309,8 @@ CorrespondingType host_atomic_load(volatile AtomicType *a,
                                    TExplicitMemoryOrderType order)
 {
 #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
-    if (sizeof(CorrespondingType) == 2)
-        auto prev = InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
+    if constexpr (sizeof(CorrespondingType) == 2)
+        return InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
     else
         return InterlockedExchangeAdd(reinterpret_cast<volatile LONG *>(a), 0);
 #elif defined(__GNUC__)
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index 56f350ad..b1326e28 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -94,13 +94,7 @@ public:
                                HostDataType *startRefValues,
                                cl_uint whichDestValue)
     {
-        if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
-                ._type
-            != TYPE_ATOMIC_HALF)
-            expected = (HostDataType)whichDestValue;
-        else
-            expected = cl_half_from_float(static_cast<float>(whichDestValue),
-                                          gHalfRoundingMode);
+        expected = static_cast<HostDataType>(whichDestValue);
         return true;
     }
 };
@@ -401,13 +395,7 @@ public:
                                HostDataType *startRefValues,
                                cl_uint whichDestValue)
     {
-        if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
-                ._type
-            != TYPE_ATOMIC_HALF)
-            expected = (HostDataType)whichDestValue;
-        else
-            expected = cl_half_from_float(static_cast<float>(whichDestValue),
-                                          gHalfRoundingMode);
+        expected = static_cast<HostDataType>(whichDestValue);
         return true;
     }
     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
@@ -417,25 +405,11 @@ public:
         correct = true;
         for (cl_uint i = 0; i < threadCount; i++)
         {
-            if constexpr (std::is_same_v<HostDataType, cl_half>)
+            if (refValues[i] != (HostDataType)i)
             {
-                HostDataType test = cl_half_from_float(static_cast<float>(i),
-                                                       gHalfRoundingMode);
-                if (refValues[i] != test)
-                {
-                    log_error("Invalid value for thread %u\n", (cl_uint)i);
-                    correct = false;
-                    return true;
-                }
-            }
-            else
-            {
-                if (refValues[i] != (HostDataType)i)
-                {
-                    log_error("Invalid value for thread %u\n", (cl_uint)i);
-                    correct = false;
-                    return true;
-                }
+                log_error("Invalid value for thread %u\n", (cl_uint)i);
+                correct = false;
+                return true;
             }
         }
         return true;
@@ -553,11 +527,7 @@ public:
         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
                                                                 useSVM)
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-            StartValue(cl_half_from_float(static_cast<float>(1234),
-                                          gHalfRoundingMode));
-        else
-            StartValue(123456);
+        StartValue(1234);
     }
     virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                                   cl_command_queue queue)
@@ -619,32 +589,19 @@ public:
         /* Any repeated value is treated as an error */
         std::vector<bool> tidFound(threadCount);
         bool startValueFound = false;
-        cl_uint startVal = StartValue();
-
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-            startVal = static_cast<cl_uint>(
-                cl_half_to_float(static_cast<cl_half>(StartValue())));
+        cl_uint startVal = static_cast<cl_uint>(StartValue());
 
         for (cl_uint i = 0; i <= threadCount; i++)
         {
             cl_uint value = 0;
             if (i == threadCount)
             {
-                if constexpr (!std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-                    value =
-                        (cl_uint)finalValues[0]; // additional value from atomic
-                                                 // variable (last written)
-                else
-                    value =
-                        cl_half_to_float(static_cast<cl_half>(finalValues[0]));
+                value = static_cast<cl_uint>(
+                    static_cast<HostDataType>(finalValues[0]));
             }
             else
             {
-                if constexpr (!std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-                    value = (cl_uint)refValues[i];
-                else
-                    value =
-                        cl_half_to_float(static_cast<cl_half>(refValues[i]));
+                value = static_cast<cl_uint>(refValues[i]);
             }
 
             if (value == startVal)
@@ -1201,85 +1158,24 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0), max_error(0.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             StartValue((HostDataType)0.0);
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
+
+            // Narrow down range for half to avoid overflow to infinity
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            {
+                min_range = -50.0;
+                max_range = 50.0;
+            }
         }
     }
-    template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
-    {
-        cl_half sum = 0;
-        for (auto it = begin; it != end; ++it)
-        {
-            sum = cl_half_from_float(cl_half_to_float(sum)
-                                         + cl_half_to_float(*it),
-                                     gHalfRoundingMode);
-        }
-        return cl_half_to_float(sum);
-    }
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (threadCount > ref_vals.size())
-            {
-                ref_vals.resize(threadCount);
-
-                for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = cl_half_from_float(
-                        get_random_float(min_range, max_range, d),
-                        gHalfRoundingMode);
-
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * ref_vals.size());
-
-                // Estimate highest possible summation error for given set.
-                std::vector<float> sums;
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return cl_half_to_float(a) < cl_half_to_float(b);
-                          });
-
-                sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return std::abs(cl_half_to_float(a))
-                                  < std::abs(cl_half_to_float(b));
-                          });
-
-                float precise = 0.f;
-                for (auto elem : ref_vals) precise += cl_half_to_float(elem);
-                sums.push_back(precise);
-
-                sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(sums.begin(), sums.end());
-                max_error = std::abs(sums.front() - sums.back());
-
-                // restore unsorted order
-                memcpy(ref_vals.data(), startRefValues,
-                       sizeof(HostDataType) * ref_vals.size());
-            }
-            else
-            {
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * threadCount);
-            }
-            return true;
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (threadCount > ref_vals.size())
             {
@@ -1299,11 +1195,12 @@ public:
                 std::vector<HostDataType> sums;
                 std::sort(ref_vals.begin(), ref_vals.end());
 
-                sums.push_back(
-                    std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
+                                               static_cast<HostDataType>(0.f)));
 
-                sums.push_back(
-                    std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.rbegin(),
+                                               ref_vals.rend(),
+                                               static_cast<HostDataType>(0.f)));
 
                 std::sort(ref_vals.begin(), ref_vals.end(),
                           [](HostDataType a, HostDataType b) {
@@ -1318,15 +1215,25 @@ public:
 
                 sums.push_back(precise);
 
-                sums.push_back(
-                    std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
+                                               static_cast<HostDataType>(0.f)));
 
-                sums.push_back(
-                    std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.rbegin(),
+                                               ref_vals.rend(),
+                                               static_cast<HostDataType>(0.f)));
 
                 std::sort(sums.begin(), sums.end());
+                assert(std::all_of(sums.begin(), sums.end(),
+                                   [](const HostDataType &val) {
+                                       return std::isfinite(
+                                           static_cast<double>(val));
+                                   })
+                       && "Infinite summation value detected!");
                 max_error = std::abs(sums.front() - sums.back());
 
+                log_info("Max allowed error for %u elements: %.10f\n",
+                         threadCount, max_error);
+
                 // restore unsorted order
                 memcpy(ref_vals.data(), startRefValues,
                        sizeof(HostDataType) * ref_vals.size());
@@ -1345,10 +1252,7 @@ public:
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
 
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_add" + postfix + "(&destMemory[0], ("
                 + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1382,10 +1286,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
                                   MemoryOrder());
@@ -1411,23 +1312,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    expected = cl_half_from_float(
-                        cl_half_to_float(expected)
-                            + cl_half_to_float(startRefValues[i]),
-                        gHalfRoundingMode);
-                }
-            }
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -1446,21 +1331,13 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
-                return std::abs(cl_half_to_float(expected)
-                                - cl_half_to_float(testValues[whichDestValue]))
-                    > max_error;
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
-        {
-            if (whichDestValue == 0)
-                return std::abs((HostDataType)expected
-                                - testValues[whichDestValue])
+                return std::abs(
+                           static_cast<double>(expected
+                                               - static_cast<HostDataType>(
+                                                   testValues[whichDestValue])))
                     > max_error;
         }
         return CBasicTestMemOrderScope<
@@ -1471,10 +1348,7 @@ public:
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
@@ -1534,10 +1408,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }
@@ -1657,8 +1528,8 @@ template <> double kahan_sub<double>(const std::vector<double> &nums)
     double compensation = 0.0;
     for (double num : nums)
     {
-        double y = num - compensation;
-        double t = sum - y;
+        double y = -num - compensation;
+        double t = sum + y;
         compensation = (t - sum) - y;
         sum = t;
     }
@@ -1685,14 +1556,18 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0), max_error(0.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             StartValue(0);
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
+
+            // Narrow down range for half to avoid overflow to infinity
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            {
+                min_range = -50.0;
+                max_range = 50.0;
+            }
         }
     }
     template <typename Iterator>
@@ -1702,25 +1577,10 @@ public:
         for (auto it = begin; it != end; ++it) res = res - *it;
         return res;
     }
-    template <typename Iterator>
-    float subtract_halfs(Iterator begin, Iterator end)
-    {
-        cl_half res = 0;
-        for (auto it = begin; it != end; ++it)
-        {
-            res = cl_half_from_float(cl_half_to_float(res)
-                                         - cl_half_to_float(*it),
-                                     gHalfRoundingMode);
-        }
-        return cl_half_to_float(res);
-    }
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (threadCount > ref_vals.size())
             {
@@ -1736,105 +1596,36 @@ public:
                 // Estimate highest possible subtraction error for given set.
                 std::vector<HostDataType> sums;
                 std::sort(ref_vals.begin(), ref_vals.end());
+                sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+                sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
 
+                std::sort(ref_vals.begin(), ref_vals.end(),
+                          [](HostDataType a, HostDataType b) {
+                              return std::abs(a) < std::abs(b);
+                          });
+
+                double precise = 0.0;
                 if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
-                {
-                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-
-                    sums.push_back(
-                        subtract(ref_vals.rbegin(), ref_vals.rend()));
-
-                    std::sort(ref_vals.begin(), ref_vals.end(),
-                              [](double a, double b) {
-                                  return std::abs(a) < std::abs(b);
-                              });
-
-                    double precise = kahan_sub(ref_vals);
-                    sums.push_back(precise);
-
-                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-
-                    sums.push_back(
-                        subtract(ref_vals.rbegin(), ref_vals.rend()));
-
-                    std::sort(sums.begin(), sums.end());
-                    max_error = std::abs((double)sums.front() - sums.back());
-                }
+                    precise = kahan_sub(ref_vals);
                 else
-                {
-                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-                    sums.push_back(
-                        subtract(ref_vals.rbegin(), ref_vals.rend()));
-
-                    std::sort(ref_vals.begin(), ref_vals.end(),
-                              [](float a, float b) {
-                                  return std::abs(a) < std::abs(b);
-                              });
-
-                    double precise = 0.0;
                     for (auto elem : ref_vals) precise += double(elem);
-                    sums.push_back(precise);
-                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-                    sums.push_back(
-                        subtract(ref_vals.rbegin(), ref_vals.rend()));
-
-                    std::sort(sums.begin(), sums.end());
-                    max_error =
-                        std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
-                }
-
-                // restore unsorted order
-                memcpy(ref_vals.data(), startRefValues,
-                       sizeof(HostDataType) * ref_vals.size());
-            }
-            else
-            {
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * threadCount);
-            }
-            return true;
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (threadCount > ref_vals.size())
-            {
-                ref_vals.resize(threadCount);
-                for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = cl_half_from_float(
-                        get_random_float(min_range, max_range, d),
-                        gHalfRoundingMode);
-
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * ref_vals.size());
-
-                // Estimate highest possible summation error for given set.
-                std::vector<float> sums;
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return cl_half_to_float(a) < cl_half_to_float(b);
-                          });
-
-                sums.push_back(
-                    subtract_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(
-                    subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return std::abs(cl_half_to_float(a))
-                                  < std::abs(cl_half_to_float(b));
-                          });
-
-                float precise = 0.f;
-                for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
                 sums.push_back(precise);
-                sums.push_back(
-                    subtract_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(
-                    subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
+
+                sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+                sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
 
                 std::sort(sums.begin(), sums.end());
+                assert(std::all_of(sums.begin(), sums.end(),
+                                   [](const HostDataType &val) {
+                                       return std::isfinite(
+                                           static_cast<double>(val));
+                                   })
+                       && "Infinite subtraction value detected!");
                 max_error = std::abs(sums.front() - sums.back());
+
+                log_info("Max allowed error for %u elements: %.10f\n",
+                         threadCount, max_error);
+
                 // restore unsorted order
                 memcpy(ref_vals.data(), startRefValues,
                        sizeof(HostDataType) * ref_vals.size());
@@ -1853,10 +1644,7 @@ public:
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
 
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_sub" + postfix + "(&destMemory[0], ("
                 + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1878,10 +1666,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
                                   MemoryOrder());
@@ -1903,29 +1688,12 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
                     expected -= startRefValues[i];
         }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    expected = cl_half_from_float(
-                        cl_half_to_float(expected)
-                            - cl_half_to_float(startRefValues[i]),
-                        gHalfRoundingMode);
-                }
-            }
-        }
         else
         {
             for (cl_uint i = 0; i < threadCount; i++)
@@ -1938,21 +1706,13 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
-                return std::abs((HOST_ATOMIC_FLOAT)expected
-                                - testValues[whichDestValue])
-                    > max_error;
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-                return std::abs(cl_half_to_float(expected)
-                                - cl_half_to_float(testValues[whichDestValue]))
+                return std::abs(
+                           static_cast<double>(expected
+                                               - static_cast<HostDataType>(
+                                                   testValues[whichDestValue])))
                     > max_error;
         }
         return CBasicTestMemOrderScope<
@@ -2023,10 +1783,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }
@@ -2947,10 +2704,7 @@ public:
           min_range(-999.0), max_range(999.0)
     {
         StartValue(DataType().MaxValue());
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
@@ -2960,10 +2714,7 @@ public:
     {
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_min" + postfix
                 + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2982,10 +2733,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_min(&destMemory[0], oldValues[tid],
                                   MemoryOrder());
@@ -3040,19 +2788,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    if (cl_half_to_float(startRefValues[i])
-                        < cl_half_to_float(expected))
-                        expected = startRefValues[i];
-                }
-            }
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -3072,9 +2808,7 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -3089,17 +2823,16 @@ public:
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
             {
                 if (refValues[i] != StartValue())
                 {
-                    log_error("Thread %d found %d mismatch(es)\n", i,
-                              (cl_uint)refValues[i]);
+                    log_error(
+                        "Thread %d found %lf mismatch(es), start value=%lf\n",
+                        i, (double)refValues[i], (double)StartValue());
                     correct = false;
                 }
             }
@@ -3156,10 +2889,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }
@@ -3287,21 +3017,11 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        StartValue(DataType().MinValue());
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
-            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-                StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode));
-            else
-                StartValue(-DataType().MaxValue());
-        }
-        else
-        {
-            StartValue(DataType().MinValue());
         }
     }
     std::string ProgramCore() override
@@ -3330,10 +3050,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_max(&destMemory[0], oldValues[tid],
                                   MemoryOrder());
@@ -3349,23 +3066,12 @@ public:
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             for (cl_uint i = 0; i < threadCount; i++)
             {
-                startRefValues[i] = cl_half_from_float(
-                    get_random_float(min_range, max_range, d),
-                    gHalfRoundingMode);
-            }
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
-        {
-            for (cl_uint i = 0; i < threadCount; i++)
-            {
-                startRefValues[i] = get_random_float(min_range, max_range, d);
+                startRefValues[i] = static_cast<HostDataType>(
+                    get_random_float(min_range, max_range, d));
             }
         }
         else
@@ -3388,19 +3094,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    if (cl_half_to_float(startRefValues[i])
-                        > cl_half_to_float(expected))
-                        expected = startRefValues[i];
-                }
-            }
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -3420,9 +3114,7 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -3437,17 +3129,16 @@ public:
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
             {
                 if (refValues[i] != StartValue())
                 {
-                    log_error("Thread %d found %d mismatch(es)\n", i,
-                              (cl_uint)refValues[i]);
+                    log_error(
+                        "Thread %d found %lf mismatch(es), start value=%lf\n",
+                        i, (double)refValues[i], (double)StartValue());
                     correct = false;
                 }
             }
@@ -3504,10 +3195,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }