Added support for cl_ext_float_atomics in CBasicTestFetchMin/Max with atomic_half (#2357)

Related to #2142, according to the work plan, extending CBasicTestFetchMin/CBasicTestFetchMax with support for atomic_half.
2026-03-19 06:09:01 +00:00 · 2025-10-21 17:43:05 +02:00
parent 34745bd936
commit 940c8bb973
4 changed files with 167 additions and 25 deletions
--- a/test_conformance/c11_atomics/common.cpp
+++ b/test_conformance/c11_atomics/common.cpp
@@ -194,14 +194,28 @@ template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;
 template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
 template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
 template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() {return 0;}
-template<> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue() {return CL_FLT_MIN;}
+template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
+{
+    return cl_half_from_float(CL_HALF_MIN, gHalfRoundingMode);
+}
+template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
+{
+    return CL_FLT_MIN;
+}
 template<> cl_double AtomicTypeExtendedInfo<cl_double>::MinValue() {return CL_DBL_MIN;}

 template<> cl_int AtomicTypeExtendedInfo<cl_int>::MaxValue() {return CL_INT_MAX;}
 template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue() {return CL_UINT_MAX;}
 template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
 template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
-template<> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue() {return CL_FLT_MAX;}
+template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
+{
+    return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
+}
+template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
+{
+    return CL_FLT_MAX;
+}
 template<> cl_double AtomicTypeExtendedInfo<cl_double>::MaxValue() {return CL_DBL_MAX;}

 cl_int getSupportedMemoryOrdersAndScopes(
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -894,15 +894,16 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
        header += std::string("__global volatile ") + aTypeName + " destMemory["
            + ss.str() + "] = {\n";
        ss.str("");
+
        if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
            == TYPE_ATOMIC_FLOAT)
            ss << std::setprecision(10) << _startValue;
        else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
                 == TYPE_ATOMIC_HALF)
-            ss << static_cast<HostDataType>(
-                cl_half_to_float(static_cast<cl_half>(_startValue)));
+            ss << cl_half_to_float(static_cast<cl_half>(_startValue));
        else
            ss << _startValue;
+
        for (cl_uint i = 0; i < maxNumDestItems; i++)
        {
            if (aTypeName == "atomic_flag")
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -176,7 +176,20 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
                                  TExplicitMemoryOrderType order_failure)
 {
    CorrespondingType tmp;
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
+    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
+    {
+        static std::mutex mtx;
+        std::lock_guard<std::mutex> lock(mtx);
+        tmp = *reinterpret_cast<volatile cl_half *>(a);
+
+        if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
+        {
+            *reinterpret_cast<volatile cl_half *>(a) = desired;
+            return true;
+        }
+        *expected = tmp;
+    }
+    else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
    {
        static std::mutex mtx;
        std::lock_guard<std::mutex> lock(mtx);
@@ -191,7 +204,6 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
    else
    {
 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
-
        tmp = InterlockedCompareExchange(a, desired, *expected);
 #elif defined(__GNUC__)
        tmp = __sync_val_compare_and_swap(a, *expected, desired);
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -2687,7 +2687,10 @@ public:
          min_range(-999.0), max_range(999.0)
    {
        StartValue(DataType().MaxValue());
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            CBasicTestMemOrderScope<HostAtomicType,
                                    HostDataType>::OldValueCheck(false);
@@ -2697,7 +2700,10 @@ public:
    {
        std::string memoryOrderScope = MemoryOrderScopeStr();
        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            return "  atomic_fetch_min" + postfix
                + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2716,7 +2722,10 @@ public:
                      volatile HostAtomicType *destMemory,
                      HostDataType *oldValues) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            host_atomic_fetch_min(&destMemory[0], oldValues[tid],
                                  MemoryOrder());
@@ -2732,7 +2741,16 @@ public:
    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                      MTdata d) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            for (cl_uint i = 0; i < threadCount; i++)
+            {
+                startRefValues[i] = cl_half_from_float(
+                    get_random_float(min_range, max_range, d),
+                    gHalfRoundingMode);
+            }
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            for (cl_uint i = 0; i < threadCount; i++)
            {
@@ -2759,7 +2777,19 @@ public:
                       cl_uint whichDestValue) override
    {
        expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (whichDestValue == 0)
+            {
+                for (cl_uint i = 0; i < threadCount; i++)
+                {
+                    if (cl_half_to_float(startRefValues[i])
+                        < cl_half_to_float(expected))
+                        expected = startRefValues[i];
+                }
+            }
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            if (whichDestValue == 0)
                for (cl_uint i = 0; i < threadCount; i++)
@@ -2779,7 +2809,9 @@ public:
                             const std::vector<HostAtomicType> &testValues,
                             cl_uint whichDestValue) override
    {
-        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        if (std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
        {
            if (whichDestValue == 0)
                return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -2794,7 +2826,9 @@ public:
    bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                    HostAtomicType *finalValues) override
    {
-        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        if (std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
        {
            correct = true;
            for (cl_uint i = 1; i < threadCount; i++)
@@ -2817,7 +2851,19 @@ public:
    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                          cl_command_queue queue) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0;
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            if (LocalMemory()
                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
@@ -2835,7 +2881,10 @@ public:
    }
    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            return threadCount;
        }
@@ -2870,6 +2919,11 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,

    if (gFloatAtomicsSupported)
    {
+        CBasicTestFetchMin<HOST_ATOMIC_HALF, HOST_HALF> test_half(
+            TYPE_ATOMIC_HALF, useSVM);
+        EXECUTE_TEST(error,
+                     test_half.Execute(deviceID, context, queue, num_elements));
+
        CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
            TYPE_ATOMIC_FLOAT, useSVM);
        EXECUTE_TEST(
@@ -2953,18 +3007,31 @@ public:
                                                                useSVM),
          min_range(-999.0), max_range(999.0)
    {
-        StartValue(DataType().MinValue());
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            CBasicTestMemOrderScope<HostAtomicType,
                                    HostDataType>::OldValueCheck(false);
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+                StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode));
+            else
+                StartValue(-DataType().MaxValue());
+        }
+        else
+        {
+            StartValue(DataType().MinValue());
        }
    }
    std::string ProgramCore() override
    {
        std::string memoryOrderScope = MemoryOrderScopeStr();
        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            return "  atomic_fetch_max" + postfix
                + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2983,7 +3050,10 @@ public:
                      volatile HostAtomicType *destMemory,
                      HostDataType *oldValues) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            host_atomic_fetch_max(&destMemory[0], oldValues[tid],
                                  MemoryOrder());
@@ -2999,7 +3069,16 @@ public:
    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                      MTdata d) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            for (cl_uint i = 0; i < threadCount; i++)
+            {
+                startRefValues[i] = cl_half_from_float(
+                    get_random_float(min_range, max_range, d),
+                    gHalfRoundingMode);
+            }
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            for (cl_uint i = 0; i < threadCount; i++)
            {
@@ -3026,7 +3105,19 @@ public:
                       cl_uint whichDestValue) override
    {
        expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (whichDestValue == 0)
+            {
+                for (cl_uint i = 0; i < threadCount; i++)
+                {
+                    if (cl_half_to_float(startRefValues[i])
+                        > cl_half_to_float(expected))
+                        expected = startRefValues[i];
+                }
+            }
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            if (whichDestValue == 0)
                for (cl_uint i = 0; i < threadCount; i++)
@@ -3046,7 +3137,9 @@ public:
                             const std::vector<HostAtomicType> &testValues,
                             cl_uint whichDestValue) override
    {
-        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        if (std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
        {
            if (whichDestValue == 0)
                return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
@@ -3061,7 +3154,9 @@ public:
    bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                    HostAtomicType *finalValues) override
    {
-        if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
+        if (std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
        {
            correct = true;
            for (cl_uint i = 1; i < threadCount; i++)
@@ -3084,7 +3179,19 @@ public:
    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                          cl_command_queue queue) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0;
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            if (LocalMemory()
                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
@@ -3102,7 +3209,10 @@ public:
    }
    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            return threadCount;
        }
@@ -3137,6 +3247,11 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID,

    if (gFloatAtomicsSupported)
    {
+        CBasicTestFetchMax<HOST_ATOMIC_HALF, HOST_HALF> test_half(
+            TYPE_ATOMIC_HALF, useSVM);
+        EXECUTE_TEST(error,
+                     test_half.Execute(deviceID, context, queue, num_elements));
+
        CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
            TYPE_ATOMIC_FLOAT, useSVM);
        EXECUTE_TEST(