From 62972418c3c9c9a06e84738c926f278f59a518f2 Mon Sep 17 00:00:00 2001
From: Marcin Hajder <marcin.hajder@gmail.com>
Date: Wed, 22 Oct 2025 16:01:48 +0200
Subject: [PATCH] Added support for cl_ext_float_atomics in CBasicTestFetchAdd
 with atomic_half (#2350)

Related to https://github.com/KhronosGroup/OpenCL-CTS/issues/2142,
according to the work plan, extending CBasicTestFetchAdd with support
for atomic_half.

I wasn't able to test that PR completely due to missing
`CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT`/`CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT`
capabilities for atomic_half. I appreciate reviewers' attention, thanks.
---
 test_conformance/c11_atomics/host_atomics.h   |  13 +-
 test_conformance/c11_atomics/test_atomics.cpp | 125 ++++++++++++++++--
 2 files changed, 123 insertions(+), 15 deletions(-)
diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
index 4471897b..27560822 100644
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -99,7 +99,16 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
                                         TExplicitMemoryOrderType order)
 {
-    if constexpr (
+    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
+    {
+        static std::mutex mx;
+        std::lock_guard<std::mutex> lock(mx);
+        CorrespondingType old_value = *a;
+        *a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
+                                gHalfRoundingMode);
+        return old_value;
+    }
+    else if constexpr (
         std::is_same_v<
             AtomicType,
             HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
@@ -112,7 +121,7 @@ CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingTyp
     }
     else
     {
-#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
+#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
         return InterlockedExchangeAdd(a, c);
 #elif defined(__GNUC__)
         return __sync_fetch_and_add(a, c);
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index d73bb6b8..0712f109 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -417,7 +417,7 @@ public:
         correct = true;
         for (cl_uint i = 0; i < threadCount; i++)
         {
-            if constexpr (std::is_same<HostDataType, cl_half>::value)
+            if constexpr (std::is_same_v<HostDataType, cl_half>)
             {
                 HostDataType test = cl_half_from_float(static_cast<float>(i),
                                                        gHalfRoundingMode);
@@ -1204,17 +1204,79 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
         {
             StartValue((HostDataType)0.0);
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
         }
     }
+    template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
+    {
+        cl_half sum = 0;
+        for (auto it = begin; it != end; ++it)
+        {
+            sum = cl_half_from_float(cl_half_to_float(sum)
+                                         + cl_half_to_float(*it),
+                                     gHalfRoundingMode);
+        }
+        return cl_half_to_float(sum);
+    }
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (threadCount > ref_vals.size())
+            {
+                ref_vals.resize(threadCount);
+
+                for (cl_uint i = 0; i < threadCount; i++)
+                    ref_vals[i] = cl_half_from_float(
+                        get_random_float(min_range, max_range, d),
+                        gHalfRoundingMode);
+
+                memcpy(startRefValues, ref_vals.data(),
+                       sizeof(HostDataType) * ref_vals.size());
+
+                // Estimate highest possible summation error for given set.
+                std::vector<float> sums;
+                std::sort(ref_vals.begin(), ref_vals.end(),
+                          [](cl_half a, cl_half b) {
+                              return cl_half_to_float(a) < cl_half_to_float(b);
+                          });
+
+                sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
+                sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
+
+                std::sort(ref_vals.begin(), ref_vals.end(),
+                          [](cl_half a, cl_half b) {
+                              return std::abs(cl_half_to_float(a))
+                                  < std::abs(cl_half_to_float(b));
+                          });
+
+                float precise = 0.f;
+                for (auto elem : ref_vals) precise += cl_half_to_float(elem);
+                sums.push_back(precise);
+
+                sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
+                sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
+
+                std::sort(sums.begin(), sums.end());
+                max_error = std::abs(sums.front() - sums.back());
+
+                // restore unsorted order
+                memcpy(ref_vals.data(), startRefValues,
+                       sizeof(HostDataType) * ref_vals.size());
+            }
+            else
+            {
+                memcpy(startRefValues, ref_vals.data(),
+                       sizeof(HostDataType) * threadCount);
+            }
+            return true;
+        }
+        else if constexpr (
             std::is_same_v<
                 HostDataType,
                 HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
@@ -1286,7 +1348,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             return "  atomic_fetch_add" + postfix + "(&destMemory[0], ("
                 + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1323,7 +1385,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
                                   MemoryOrder());
@@ -1349,7 +1411,20 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (whichDestValue == 0)
+            {
+                for (cl_uint i = 0; i < threadCount; i++)
+                {
+                    expected = cl_half_from_float(
+                        cl_half_to_float(expected)
+                            + cl_half_to_float(startRefValues[i]),
+                        gHalfRoundingMode);
+                }
+            }
+        }
+        else if constexpr (
             std::is_same_v<
                 HostDataType,
                 HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
@@ -1371,10 +1446,17 @@ public:
                              const std::vector<HostAtomicType> &testValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (whichDestValue == 0)
+                return std::abs(cl_half_to_float(expected)
+                                - cl_half_to_float(testValues[whichDestValue]))
+                    > max_error;
+        }
+        else if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_DOUBLE> || std::is_same<HostDataType, HOST_FLOAT>::value)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (whichDestValue == 0)
                 return std::abs((HostDataType)expected
@@ -1389,8 +1471,10 @@ public:
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if (std::is_same<HostDataType, HOST_DOUBLE>::value
-            || std::is_same<HostDataType, HOST_FLOAT>::value)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
@@ -1413,7 +1497,17 @@ public:
     int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
+        {
+            if (LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0;
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
         {
             if (LocalMemory()
                 && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1443,7 +1537,7 @@ public:
         if constexpr (
             std::is_same_v<
                 HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             return threadCount;
         }
@@ -1478,6 +1572,11 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
+            TYPE_ATOMIC_HALF, useSVM);
+        EXECUTE_TEST(error,
+                     test_half.Execute(deviceID, context, queue, num_elements));
+
         CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
             TYPE_ATOMIC_DOUBLE, useSVM);
         EXECUTE_TEST(
@@ -1737,7 +1836,7 @@ public:
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        if (std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)