Added support for cl_ext_float_atomics in CBasicTestFetchAddSpecialFloats with atomic_half (#2386)

Related to #2142, according to the work plan, extending CBasicTestFetchAddSpecialFloats with support for atomic_half.
2026-03-24 15:59:03 +00:00 · 2026-01-27 17:46:57 +01:00
parent 584f27afd4
commit 6f38c799c1
3 changed files with 266 additions and 18 deletions
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1418,6 +1418,219 @@ public:
    }
 };

+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAddSpecialFloats
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+
+    std::vector<HostDataType> ref_vals;
+
+public:
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::DeclaredInProgram;
+    CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            // StartValue is used as an index divisor in the following test
+            // logic. It is set to the number of special values, which allows
+            // threads to be mapped deterministically onto the input data array.
+            // This enables repeated add operations arranged so that every
+            // special value is added to every other one (“all-to-all”).
+
+            auto spec_vals = GetSpecialValues();
+            StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
+    }
+
+    static std::vector<HostDataType> &GetSpecialValues()
+    {
+        static std::vector<HostDataType> special_values;
+        if (special_values.empty())
+        {
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            {
+                special_values = {
+                    0xffff, 0x0000, 0x7c00, /*INFINITY*/
+                    0xfc00, /*-INFINITY*/
+                    0x8000, /*-0*/
+                    0x7bff, /*HALF_MAX*/
+                    0x0400, /*HALF_MIN*/
+                    0x3c00, /* 1 */
+                    0xbc00, /* -1 */
+                    0x3555, /*nearest value to 1/3*/
+                    0x3bff, /*largest number less than one*/
+                    0xc000, /* -2 */
+                    0xfbff, /* -HALF_MAX */
+                    0x8400, /* -HALF_MIN */
+                    0x4248, /* M_PI_H */
+                    0xc248, /* -M_PI_H */
+                    0xbbff, /* Largest negative fraction */
+                };
+
+                if (0 != (CL_FP_DENORM & gHalfFPConfig))
+                {
+                    special_values.push_back(0x0001 /* Smallest denormal */);
+                    special_values.push_back(0x03ff /* Largest denormal */);
+                }
+            }
+        }
+
+        return special_values;
+    }
+
+    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                      MTdata d) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (threadCount > ref_vals.size())
+            {
+                ref_vals.assign(threadCount, 0);
+                auto spec_vals = GetSpecialValues();
+
+                cl_uint total_cnt = 0;
+                while (total_cnt < threadCount)
+                {
+                    cl_uint block_cnt =
+                        std::min((cl_int)(threadCount - total_cnt),
+                                 (cl_int)spec_vals.size());
+                    memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
+                           sizeof(HostDataType) * block_cnt);
+                    total_cnt += block_cnt;
+                }
+            }
+
+            memcpy(startRefValues, ref_vals.data(),
+                   sizeof(HostDataType) * threadCount);
+
+            return true;
+        }
+        return false;
+    }
+    std::string ProgramCore() override
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            // The start_value variable (set by StartValue) is used
+            // as a divisor of the thread index when selecting the operand for
+            // atomic_fetch_add. This groups threads into blocks corresponding
+            // to the number of special values and implements an “all-to-all”
+            // addition pattern. As a result, each destination element is
+            // updated using different combinations of input values, enabling
+            // consistent comparison between host and device execution.
+
+            return std::string(DataType().AddSubOperandTypeName())
+                + " start_value = atomic_load_explicit(destMemory+tid, "
+                  "memory_order_relaxed, memory_scope_work_group);\n"
+                  "  atomic_store_explicit(destMemory+tid, oldValues[tid], "
+                  "memory_order_relaxed, memory_scope_work_group);\n"
+                  "  atomic_fetch_add"
+                + postfix + "(&destMemory[tid], ("
+                + DataType().AddSubOperandTypeName()
+                + ")oldValues[tid/(int)start_value]" + memoryOrderScope
+                + ");\n";
+        }
+    }
+    void HostFunction(cl_uint tid, cl_uint threadCount,
+                      volatile HostAtomicType *destMemory,
+                      HostDataType *oldValues) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            auto spec_vals = GetSpecialValues();
+            host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
+                              MEMORY_ORDER_SEQ_CST);
+            host_atomic_fetch_add(
+                &destMemory[tid],
+                (HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder());
+        }
+    }
+    bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                       HostDataType *startRefValues,
+                       cl_uint whichDestValue) override
+    {
+        expected = StartValue();
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            auto spec_vals = GetSpecialValues();
+            expected = cl_half_from_float(
+                cl_half_to_float(startRefValues[whichDestValue])
+                    + cl_half_to_float(
+                        startRefValues[whichDestValue / spec_vals.size()]),
+                gHalfRoundingMode);
+        }
+
+        return true;
+    }
+
+    bool IsTestNotAsExpected(const HostDataType &expected,
+                             const std::vector<HostAtomicType> &testValues,
+                             cl_uint whichDestValue) override
+    {
+
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            return static_cast<cl_half>(expected) != testValues[whichDestValue];
+        }
+
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
+                                                               testValues,
+                                                               whichDestValue);
+    }
+
+    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (DeclaredInProgram()) return 0; // skip test - not applicable
+
+            if (LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            return threadCount;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::NumResults(threadCount,
+                                                                 deviceID);
+    }
+};
+
 static int test_atomic_fetch_add_generic(cl_device_id deviceID,
                                         cl_context context,
                                         cl_command_queue queue,
@@ -1443,6 +1656,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,

    if (gFloatAtomicsSupported)
    {
+        auto spec_vals_halfs =
+            CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
+                                            HOST_HALF>::GetSpecialValues();
+
+        CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF, HOST_HALF>
+            test_spec_half(TYPE_ATOMIC_HALF, useSVM);
+        EXECUTE_TEST(error,
+                     test_spec_half.Execute(deviceID, context, queue,
+                                            spec_vals_halfs.size()
+                                                * spec_vals_halfs.size()));
+
        CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
            TYPE_ATOMIC_HALF, useSVM);
        EXECUTE_TEST(error,