Added support for cl_ext_float_atomics in CBasicTestFetchAdd with atomic_float (#2345)

Related to #2142, according to the work plan, extending CBasicTestFetchAdd with support for atomic_float.
2026-03-19 06:09:01 +00:00 · 2025-09-02 17:38:56 +02:00
parent d417d7670d
commit fbba22770d
4 changed files with 253 additions and 42 deletions
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -74,9 +74,11 @@ extern int
    gMaxDeviceThreads; // maximum number of threads executed on OCL device
 extern cl_device_atomic_capabilities gAtomicMemCap,
    gAtomicFenceCap; // atomic memory and fence capabilities for this device
 extern cl_half_rounding_mode gHalfRoundingMode;
 extern bool gFloatAtomicsSupported;
 extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
 extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
 extern const char *
 get_memory_order_type_name(TExplicitMemoryOrderType orderType);
@@ -174,6 +176,13 @@ public:
    {
        return false;
    }
    virtual bool
    IsTestNotAsExpected(const HostDataType &expected,
                        const std::vector<HostAtomicType> &testValues,
                        cl_uint whichDestValue)
    {
        return expected != testValues[whichDestValue];
    }
    virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                              MTdata d)
    {
@@ -1449,7 +1458,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
                           startRefValues.size() ? &startRefValues[0] : 0, i))
            break; // no expected value function provided
-        if (expected != destItems[i])
+        if (IsTestNotAsExpected(expected, destItems, i))
        {
            std::stringstream logLine;
            logLine << "ERROR: Result " << i
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -17,6 +17,7 @@
 #define HOST_ATOMICS_H_
 #include "harness/testHarness.h"
 #include <mutex>
 #ifdef WIN32
 #include "Windows.h"
@@ -94,14 +95,25 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
                                        TExplicitMemoryOrderType order)
 {
    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
    {
        static std::mutex mx;
        std::lock_guard<std::mutex> lock(mx);
        CorrespondingType old_value = *a;
        *a += c;
        return old_value;
    }
    else
    {
 #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
-  return InterlockedExchangeAdd(a, c);
+        return InterlockedExchangeAdd(a, c);
 #elif defined(__GNUC__)
-  return __sync_fetch_and_add(a, c);
+        return __sync_fetch_and_add(a, c);
 #else
-  log_info("Host function not implemented: atomic_fetch_add\n");
+        log_info("Host function not implemented: atomic_fetch_add\n");
-  return 0;
+        return 0;
 #endif
    }
 }
 template <typename AtomicType, typename CorrespondingType>
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
 cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
 bool gFloatAtomicsSupported = false;
 cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
 cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
 test_status InitCL(cl_device_id device) {
    auto version = get_device_cl_version(device);
@@ -132,6 +133,11 @@ test_status InitCL(cl_device_id device) {
    if (is_extension_available(device, "cl_ext_float_atomics"))
    {
        gFloatAtomicsSupported = true;
        cl_int error = clGetDeviceInfo(
            device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
            sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
        test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
        if (is_extension_available(device, "cl_khr_fp16"))
        {
            cl_int error = clGetDeviceInfo(
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -16,10 +16,13 @@
 #include "harness/testHarness.h"
 #include "harness/kernelHelpers.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "common.h"
 #include "host_atomics.h"
 #include <algorithm>
 #include <numeric>
 #include <sstream>
 #include <vector>
@@ -1163,61 +1166,233 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak)
 template <typename HostAtomicType, typename HostDataType>
 class CBasicTestFetchAdd
    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
    double min_range;
    double max_range;
    double max_error_fp32;
    std::vector<HostDataType> ref_vals;
 public:
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
    using CBasicTestMemOrderScope<HostAtomicType,
                                  HostDataType>::MemoryOrderScopeStr;
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
    CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
-                                                                useSVM)
+                                                                useSVM),
-    {}
+          min_range(-999.0), max_range(999.0), max_error_fp32(0.0)
-    virtual std::string ProgramCore()
+    {
        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
        {
            StartValue(0.f);
            CBasicTestMemOrderScope<HostAtomicType,
                                    HostDataType>::OldValueCheck(false);
        }
    }
    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                      MTdata d) override
    {
        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
        {
            if (threadCount > ref_vals.size())
            {
                ref_vals.resize(threadCount);
                for (cl_uint i = 0; i < threadCount; i++)
                    ref_vals[i] = get_random_float(min_range, max_range, d);
                memcpy(startRefValues, ref_vals.data(),
                       sizeof(HostDataType) * ref_vals.size());
                // Estimate highest possible summation error for given set.
                std::vector<HostDataType> sums;
                std::sort(ref_vals.begin(), ref_vals.end());
                sums.push_back(
                    std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
                sums.push_back(
                    std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
                std::sort(
                    ref_vals.begin(), ref_vals.end(),
                    [](float a, float b) { return std::abs(a) < std::abs(b); });
                double precise = 0.0;
                for (auto elem : ref_vals) precise += double(elem);
                sums.push_back(precise);
                sums.push_back(
                    std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
                sums.push_back(
                    std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
                std::sort(sums.begin(), sums.end());
                max_error_fp32 =
                    std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
                // restore unsorted order
                memcpy(ref_vals.data(), startRefValues,
                       sizeof(HostDataType) * ref_vals.size());
            }
            else
            {
                memcpy(startRefValues, ref_vals.data(),
                       sizeof(HostDataType) * threadCount);
            }
            return true;
        }
        return false;
    }
    std::string ProgramCore() override
    {
        std::string memoryOrderScope = MemoryOrderScopeStr();
        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-        return "  oldValues[tid] = atomic_fetch_add" + postfix
+
-            + "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
-            + ")tid + 3" + memoryOrderScope + ");\n" + "  atomic_fetch_add"
+        {
-            + postfix + "(&destMemory[0], ("
+            return "  atomic_fetch_add" + postfix + "(&destMemory[0], ("
-            + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
+                + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
-            + ");\n"
+                + memoryOrderScope + ");\n"
-              "  atomic_fetch_add"
+                + "  oldValues[tid] = atomic_fetch_add" + postfix
-            + postfix + "(&destMemory[0], ("
+                + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
-            + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
+                + ")0" + memoryOrderScope + ");\n";
-            + ");\n"
+        }
-              "  atomic_fetch_add"
+        else
-            + postfix + "(&destMemory[0], (("
+        {
-            + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
+            return "  oldValues[tid] = atomic_fetch_add" + postfix
-            + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope
+                + "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
-            + ");\n";
+                + ")tid + 3" + memoryOrderScope + ");\n" + "  atomic_fetch_add"
                + postfix + "(&destMemory[0], ("
                + DataType().AddSubOperandTypeName() + ")tid + 3"
                + memoryOrderScope
                + ");\n"
                  "  atomic_fetch_add"
                + postfix + "(&destMemory[0], ("
                + DataType().AddSubOperandTypeName() + ")tid + 3"
                + memoryOrderScope
                + ");\n"
                  "  atomic_fetch_add"
                + postfix + "(&destMemory[0], (("
                + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
                + DataType().AddSubOperandTypeName() + ")-1)*8"
                + memoryOrderScope + ");\n";
        }
    }
-    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+    void HostFunction(cl_uint tid, cl_uint threadCount,
-                              volatile HostAtomicType *destMemory,
+                      volatile HostAtomicType *destMemory,
-                              HostDataType *oldValues)
+                      HostDataType *oldValues) override
    {
-        oldValues[tid] = host_atomic_fetch_add(
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
-            &destMemory[0], (HostDataType)tid + 3, MemoryOrder());
+        {
-        host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
+            host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
-                              MemoryOrder());
+                                  MemoryOrder());
-        host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
+            oldValues[tid] = host_atomic_fetch_add(
-                              MemoryOrder());
+                &destMemory[tid], (HostDataType)0, MemoryOrder());
-        host_atomic_fetch_add(&destMemory[0],
+        }
-                              ((HostDataType)tid + 3)
+        else
-                                  << (sizeof(HostDataType) - 1) * 8,
+        {
-                              MemoryOrder());
+            oldValues[tid] = host_atomic_fetch_add(
                &destMemory[0], (HostDataType)tid + 3, MemoryOrder());
            host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
                                  MemoryOrder());
            host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
                                  MemoryOrder());
            host_atomic_fetch_add(
                &destMemory[0],
                (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
                MemoryOrder());
        }
    }
-    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+    bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
-                               HostDataType *startRefValues,
+                       HostDataType *startRefValues,
-                               cl_uint whichDestValue)
+                       cl_uint whichDestValue) override
    {
        expected = StartValue();
-        for (cl_uint i = 0; i < threadCount; i++)
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
-            expected += ((HostDataType)i + 3) * 3
+        {
-                + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
+            if (whichDestValue == 0)
                for (cl_uint i = 0; i < threadCount; i++)
                    expected += startRefValues[i];
        }
        else
        {
            for (cl_uint i = 0; i < threadCount; i++)
                expected += ((HostDataType)i + 3) * 3
                    + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
        }
        return true;
    }
    bool IsTestNotAsExpected(const HostDataType &expected,
                             const std::vector<HostAtomicType> &testValues,
                             cl_uint whichDestValue) override
    {
        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
        {
            if (whichDestValue == 0)
                return std::abs((HOST_ATOMIC_FLOAT)expected
                                - testValues[whichDestValue])
                    > max_error_fp32;
        }
        return CBasicTestMemOrderScope<
            HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                               testValues,
                                                               whichDestValue);
    }
    bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                    HostAtomicType *finalValues) override
    {
        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
        {
            correct = true;
            for (cl_uint i = 1; i < threadCount; i++)
            {
                if (refValues[i] != StartValue())
                {
                    log_error("Thread %d found %d mismatch(es)\n", i,
                              (cl_uint)refValues[i]);
                    correct = false;
                }
            }
            return !correct;
        }
        return CBasicTestMemOrderScope<HostAtomicType,
                                       HostDataType>::VerifyRefs(correct,
                                                                 threadCount,
                                                                 refValues,
                                                                 finalValues);
    }
    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                          cl_command_queue queue) override
    {
        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
        {
            if (LocalMemory()
                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
                return 0; // skip test - not applicable
            if (!LocalMemory()
                && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
                return 0;
        }
        return CBasicTestMemOrderScope<
            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
                                                             queue);
    }
    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
    {
        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
        {
            return threadCount;
        }
        return CBasicTestMemOrderScope<HostAtomicType,
                                       HostDataType>::NumResults(threadCount,
                                                                 deviceID);
    }
 };
 static int test_atomic_fetch_add_generic(cl_device_id deviceID,
@@ -1242,6 +1417,15 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
        TYPE_ATOMIC_ULONG, useSVM);
    EXECUTE_TEST(error,
                 test_ulong.Execute(deviceID, context, queue, num_elements));
    if (gFloatAtomicsSupported)
    {
        CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
            TYPE_ATOMIC_FLOAT, useSVM);
        EXECUTE_TEST(
            error, test_float.Execute(deviceID, context, queue, num_elements));
    }
    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
    {
        CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>