Added support for cl_ext_float_atomics in CBasicTestFetchMin/Max with atomic_float (#2353)

Related to #2142, according to the work plan, extending `CBasicTestFetchMin`/`CBasicTestFetchMax` with support for atomic_float.
2026-03-19 06:09:01 +00:00 · 2025-09-09 17:45:35 +02:00
parent a0bd81d574
commit df61cad39f
4 changed files with 348 additions and 64 deletions
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -24,8 +24,9 @@

 #include "CL/cl_half.h"

-#include <vector>
+#include <iomanip>
 #include <sstream>
+#include <vector>

 #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
 #define MAX_HOST_THREADS GetThreadCount()
@@ -892,14 +893,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
        header += std::string("__global volatile ") + aTypeName + " destMemory["
            + ss.str() + "] = {\n";
        ss.str("");
-
        if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
-            != TYPE_ATOMIC_HALF)
-            ss << _startValue;
-        else
+            == TYPE_ATOMIC_FLOAT)
+            ss << std::setprecision(10) << _startValue;
+        else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
+                 == TYPE_ATOMIC_HALF)
            ss << static_cast<HostDataType>(
                cl_half_to_float(static_cast<cl_half>(_startValue)));
-
+        else
+            ss << _startValue;
        for (cl_uint i = 0; i < maxNumDestItems; i++)
        {
            if (aTypeName == "atomic_flag")
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -172,19 +172,34 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
                                  TExplicitMemoryOrderType order_success,
                                  TExplicitMemoryOrderType order_failure)
 {
-  CorrespondingType tmp;
-#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
-  tmp = InterlockedCompareExchange(a, desired, *expected);
+    CorrespondingType tmp;
+    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
+    {
+        static std::mutex mtx;
+        std::lock_guard<std::mutex> lock(mtx);
+        tmp = *reinterpret_cast<volatile float *>(a);
+        if (tmp == *expected)
+        {
+            *reinterpret_cast<volatile float *>(a) = desired;
+            return true;
+        }
+        *expected = tmp;
+    }
+    else
+    {
+#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
+
+        tmp = InterlockedCompareExchange(a, desired, *expected);
 #elif defined(__GNUC__)
-  tmp = __sync_val_compare_and_swap(a, *expected, desired);
+        tmp = __sync_val_compare_and_swap(a, *expected, desired);
 #else
-  log_info("Host function not implemented: atomic_compare_exchange\n");
-  tmp = 0;
+        log_info("Host function not implemented: atomic_compare_exchange\n");
+        tmp = 0;
 #endif
-  if(tmp == *expected)
-    return true;
-  *expected = tmp;
-  return false;
+        if (tmp == *expected) return true;
+        *expected = tmp;
+    }
+    return false;
 }

 template <typename AtomicType, typename CorrespondingType>
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -138,6 +138,7 @@ test_status InitCL(cl_device_id device) {
            device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
            sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
        test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
+
        if (is_extension_available(device, "cl_khr_fp16"))
        {
            cl_int error = clGetDeviceInfo(
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -2608,54 +2608,178 @@ REGISTER_TEST(svm_atomic_fetch_xor2)
 template <typename HostAtomicType, typename HostDataType>
 class CBasicTestFetchMin
    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+    double min_range;
+    double max_range;
+
 public:
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
    using CBasicTestMemOrderScope<HostAtomicType,
                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
    CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
-                                                                useSVM)
+                                                                useSVM),
+          min_range(-999.0), max_range(999.0)
    {
        StartValue(DataType().MaxValue());
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
    }
-    virtual std::string ProgramCore()
+    std::string ProgramCore() override
    {
        std::string memoryOrderScope = MemoryOrderScopeStr();
        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-        return "  oldValues[tid] = atomic_fetch_min" + postfix
-            + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
-    }
-    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
-                              volatile HostAtomicType *destMemory,
-                              HostDataType *oldValues)
-    {
-        oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid],
-                                               MemoryOrder());
-    }
-    virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
-                              MTdata d)
-    {
-        for (cl_uint i = 0; i < threadCount; i++)
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
        {
-            startRefValues[i] = genrand_int32(d);
-            if (sizeof(HostDataType) >= 8)
-                startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
+            return "  atomic_fetch_min" + postfix
+                + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
+                + "  oldValues[tid] = atomic_fetch_min" + postfix
+                + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
+                + ")0" + memoryOrderScope + ");\n";
+        }
+        else
+        {
+            return "  oldValues[tid] = atomic_fetch_min" + postfix
+                + "(&destMemory[0], oldValues[tid] " + memoryOrderScope
+                + ");\n";
+        }
+    }
+    void HostFunction(cl_uint tid, cl_uint threadCount,
+                      volatile HostAtomicType *destMemory,
+                      HostDataType *oldValues) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            host_atomic_fetch_min(&destMemory[0], oldValues[tid],
+                                  MemoryOrder());
+            oldValues[tid] = host_atomic_fetch_min(
+                &destMemory[tid], (HostDataType)0, MemoryOrder());
+        }
+        else
+        {
+            oldValues[tid] = host_atomic_fetch_min(
+                &destMemory[0], oldValues[tid], MemoryOrder());
+        }
+    }
+    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                      MTdata d) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            for (cl_uint i = 0; i < threadCount; i++)
+            {
+                startRefValues[i] = get_random_float(min_range, max_range, d);
+            }
+        }
+        else
+        {
+            for (cl_uint i = 0; i < threadCount; i++)
+            {
+                startRefValues[i] = genrand_int32(d);
+                if (sizeof(HostDataType) >= 8)
+                {
+                    cl_ulong v = startRefValues[i];
+                    v |= (cl_ulong)genrand_int32(d) << 16;
+                    startRefValues[i] = v;
+                }
+            }
        }
        return true;
    }
-    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
-                               HostDataType *startRefValues,
-                               cl_uint whichDestValue)
+    bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                       HostDataType *startRefValues,
+                       cl_uint whichDestValue) override
    {
        expected = StartValue();
-        for (cl_uint i = 0; i < threadCount; i++)
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
        {
-            if (startRefValues[i] < expected) expected = startRefValues[i];
+            if (whichDestValue == 0)
+                for (cl_uint i = 0; i < threadCount; i++)
+                    if (startRefValues[i] < expected)
+                        expected = startRefValues[i];
+        }
+        else
+        {
+            for (cl_uint i = 0; i < threadCount; i++)
+            {
+                if (startRefValues[i] < expected) expected = startRefValues[i];
+            }
        }
        return true;
    }
+    bool IsTestNotAsExpected(const HostDataType &expected,
+                             const std::vector<HostAtomicType> &testValues,
+                             cl_uint whichDestValue) override
+    {
+        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        {
+            if (whichDestValue == 0)
+                return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
+                    IsTestNotAsExpected(expected, testValues, whichDestValue);
+            return false; // ignore all but 0 which stores final result
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
+                                                               testValues,
+                                                               whichDestValue);
+    }
+    bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
+                    HostAtomicType *finalValues) override
+    {
+        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        {
+            correct = true;
+            for (cl_uint i = 1; i < threadCount; i++)
+            {
+                for (cl_uint i = 1; i < threadCount; i++)
+                {
+                    log_error("Thread %d found %d mismatch(es)\n", i,
+                              (cl_uint)refValues[i]);
+                    correct = false;
+                }
+            }
+            return !correct;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::VerifyRefs(correct,
+                                                                 threadCount,
+                                                                 refValues,
+                                                                 finalValues);
+    }
+    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            if (LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0;
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            return threadCount;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::NumResults(threadCount,
+                                                                 deviceID);
+    }
 };

 static int test_atomic_fetch_min_generic(cl_device_id deviceID,
@@ -2680,6 +2804,15 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,
        TYPE_ATOMIC_ULONG, useSVM);
    EXECUTE_TEST(error,
                 test_ulong.Execute(deviceID, context, queue, num_elements));
+
+    if (gFloatAtomicsSupported)
+    {
+        CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
+            TYPE_ATOMIC_FLOAT, useSVM);
+        EXECUTE_TEST(
+            error, test_float.Execute(deviceID, context, queue, num_elements));
+    }
+
    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
    {
        CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
@@ -2742,54 +2875,178 @@ REGISTER_TEST(svm_atomic_fetch_min)
 template <typename HostAtomicType, typename HostDataType>
 class CBasicTestFetchMax
    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+    double min_range;
+    double max_range;
+
 public:
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
    using CBasicTestMemOrderScope<HostAtomicType,
                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
    CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
-                                                                useSVM)
+                                                                useSVM),
+          min_range(-999.0), max_range(999.0)
    {
        StartValue(DataType().MinValue());
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
    }
-    virtual std::string ProgramCore()
+    std::string ProgramCore() override
    {
        std::string memoryOrderScope = MemoryOrderScopeStr();
        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-        return "  oldValues[tid] = atomic_fetch_max" + postfix
-            + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
-    }
-    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
-                              volatile HostAtomicType *destMemory,
-                              HostDataType *oldValues)
-    {
-        oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid],
-                                               MemoryOrder());
-    }
-    virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
-                              MTdata d)
-    {
-        for (cl_uint i = 0; i < threadCount; i++)
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
        {
-            startRefValues[i] = genrand_int32(d);
-            if (sizeof(HostDataType) >= 8)
-                startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
+            return "  atomic_fetch_max" + postfix
+                + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
+                + "  oldValues[tid] = atomic_fetch_max" + postfix
+                + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
+                + ")0" + memoryOrderScope + ");\n";
+        }
+        else
+        {
+            return "  oldValues[tid] = atomic_fetch_max" + postfix
+                + "(&destMemory[0], oldValues[tid] " + memoryOrderScope
+                + ");\n";
+        }
+    }
+    void HostFunction(cl_uint tid, cl_uint threadCount,
+                      volatile HostAtomicType *destMemory,
+                      HostDataType *oldValues) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            host_atomic_fetch_max(&destMemory[0], oldValues[tid],
+                                  MemoryOrder());
+            oldValues[tid] = host_atomic_fetch_max(
+                &destMemory[tid], (HostDataType)0, MemoryOrder());
+        }
+        else
+        {
+            oldValues[tid] = host_atomic_fetch_max(
+                &destMemory[0], oldValues[tid], MemoryOrder());
+        }
+    }
+    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                      MTdata d) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            for (cl_uint i = 0; i < threadCount; i++)
+            {
+                startRefValues[i] = get_random_float(min_range, max_range, d);
+            }
+        }
+        else
+        {
+            for (cl_uint i = 0; i < threadCount; i++)
+            {
+                startRefValues[i] = genrand_int32(d);
+                if (sizeof(HostDataType) >= 8)
+                {
+                    cl_ulong v = startRefValues[i];
+                    v |= (cl_ulong)genrand_int32(d) << 16;
+                    startRefValues[i] = v;
+                }
+            }
        }
        return true;
    }
-    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
-                               HostDataType *startRefValues,
-                               cl_uint whichDestValue)
+    bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                       HostDataType *startRefValues,
+                       cl_uint whichDestValue) override
    {
        expected = StartValue();
-        for (cl_uint i = 0; i < threadCount; i++)
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
        {
-            if (startRefValues[i] > expected) expected = startRefValues[i];
+            if (whichDestValue == 0)
+                for (cl_uint i = 0; i < threadCount; i++)
+                    if (startRefValues[i] > expected)
+                        expected = startRefValues[i];
+        }
+        else
+        {
+            for (cl_uint i = 0; i < threadCount; i++)
+            {
+                if (startRefValues[i] > expected) expected = startRefValues[i];
+            }
        }
        return true;
    }
+    bool IsTestNotAsExpected(const HostDataType &expected,
+                             const std::vector<HostAtomicType> &testValues,
+                             cl_uint whichDestValue) override
+    {
+        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        {
+            if (whichDestValue == 0)
+                return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
+                    IsTestNotAsExpected(expected, testValues, whichDestValue);
+            return false; // ignore all but 0 which stores final result
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
+                                                               testValues,
+                                                               whichDestValue);
+    }
+    bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
+                    HostAtomicType *finalValues) override
+    {
+        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        {
+            correct = true;
+            for (cl_uint i = 1; i < threadCount; i++)
+            {
+                if (refValues[i] != StartValue())
+                {
+                    log_error("Thread %d found %d mismatch(es)\n", i,
+                              (cl_uint)refValues[i]);
+                    correct = false;
+                }
+            }
+            return !correct;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::VerifyRefs(correct,
+                                                                 threadCount,
+                                                                 refValues,
+                                                                 finalValues);
+    }
+    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            if (LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0;
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        {
+            return threadCount;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::NumResults(threadCount,
+                                                                 deviceID);
+    }
 };

 static int test_atomic_fetch_max_generic(cl_device_id deviceID,
@@ -2814,6 +3071,15 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID,
        TYPE_ATOMIC_ULONG, useSVM);
    EXECUTE_TEST(error,
                 test_ulong.Execute(deviceID, context, queue, num_elements));
+
+    if (gFloatAtomicsSupported)
+    {
+        CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
+            TYPE_ATOMIC_FLOAT, useSVM);
+        EXECUTE_TEST(
+            error, test_float.Execute(deviceID, context, queue, num_elements));
+    }
+
    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
    {
        CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>