Added support for cl_ext_float_atomics in CBasicTestFetchSub with atomic_double (#2368)

Related to #2142, according to the work plan, extending CBasicTestFetchSub with support for atomic_double.
2026-03-19 06:09:01 +00:00 · 2025-12-02 17:39:16 +01:00
parent 2bb364bb1d
commit 0cca0ee869
3 changed files with 102 additions and 25 deletions
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -135,7 +135,10 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
                                        TExplicitMemoryOrderType order)
 {
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
+    if constexpr (
+        std::is_same_v<
+            AtomicType,
+            HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
    {
        static std::mutex mx;
        std::lock_guard<std::mutex> lock(mx);
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -134,7 +134,6 @@ test_status InitCL(cl_device_id device) {
    if (is_extension_available(device, "cl_ext_float_atomics"))
    {
        gFloatAtomicsSupported = true;
-
        if (is_extension_available(device, "cl_khr_fp64"))
        {
            cl_int error = clGetDeviceInfo(
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1647,6 +1647,23 @@ REGISTER_TEST(svm_atomic_fetch_add)
                                         true);
 }

+template <typename T> double kahan_sub(const std::vector<T> &nums)
+{
+    return 0.0;
+}
+template <> double kahan_sub<double>(const std::vector<double> &nums)
+{
+    double sum = 0.0;
+    double compensation = 0.0;
+    for (double num : nums)
+    {
+        double y = num - compensation;
+        double t = sum - y;
+        compensation = (t - sum) - y;
+        sum = t;
+    }
+    return sum;
+}
 template <typename HostAtomicType, typename HostDataType>
 class CBasicTestFetchSub
    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
@@ -1671,7 +1688,7 @@ public:
        if constexpr (
            std::is_same_v<
                HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
        {
            StartValue(0);
            CBasicTestMemOrderScope<HostAtomicType,
@@ -1700,13 +1717,18 @@ public:
    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                      MTdata d) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            if (threadCount > ref_vals.size())
            {
                ref_vals.resize(threadCount);
+
                for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = get_random_float(min_range, max_range, d);
+                    ref_vals[i] = (HostDataType)get_random_double(min_range,
+                                                                  max_range, d);

                memcpy(startRefValues, ref_vals.data(),
                       sizeof(HostDataType) * ref_vals.size());
@@ -1714,22 +1736,52 @@ public:
                // Estimate highest possible subtraction error for given set.
                std::vector<HostDataType> sums;
                std::sort(ref_vals.begin(), ref_vals.end());
-                sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));

-                std::sort(
-                    ref_vals.begin(), ref_vals.end(),
-                    [](float a, float b) { return std::abs(a) < std::abs(b); });
+                if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+                {
+                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+
+                    sums.push_back(
+                        subtract(ref_vals.rbegin(), ref_vals.rend()));
+
+                    std::sort(ref_vals.begin(), ref_vals.end(),
+                              [](double a, double b) {
+                                  return std::abs(a) < std::abs(b);
+                              });
+
+                    double precise = kahan_sub(ref_vals);
+                    sums.push_back(precise);
+
+                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+
+                    sums.push_back(
+                        subtract(ref_vals.rbegin(), ref_vals.rend()));
+
+                    std::sort(sums.begin(), sums.end());
+                    max_error = std::abs((double)sums.front() - sums.back());
+                }
+                else
+                {
+                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
+                    sums.push_back(
+                        subtract(ref_vals.rbegin(), ref_vals.rend()));
+
+                    std::sort(ref_vals.begin(), ref_vals.end(),
+                              [](float a, float b) {
+                                  return std::abs(a) < std::abs(b);
+                              });

                    double precise = 0.0;
                    for (auto elem : ref_vals) precise += double(elem);
                    sums.push_back(precise);
                    sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
+                    sums.push_back(
+                        subtract(ref_vals.rbegin(), ref_vals.rend()));

                    std::sort(sums.begin(), sums.end());
                    max_error =
                        std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
+                }

                // restore unsorted order
                memcpy(ref_vals.data(), startRefValues,
@@ -1742,7 +1794,7 @@ public:
            }
            return true;
        }
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
        {
            if (threadCount > ref_vals.size())
            {
@@ -1804,7 +1856,7 @@ public:
        if constexpr (
            std::is_same_v<
                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            return "  atomic_fetch_sub" + postfix + "(&destMemory[0], ("
                + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1829,7 +1881,7 @@ public:
        if constexpr (
            std::is_same_v<
                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
                                  MemoryOrder());
@@ -1851,7 +1903,11 @@ public:
                       cl_uint whichDestValue) override
    {
        expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            if (whichDestValue == 0)
                for (cl_uint i = 0; i < threadCount; i++)
@@ -1882,7 +1938,10 @@ public:
                             const std::vector<HostAtomicType> &testValues,
                             cl_uint whichDestValue) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            if (whichDestValue == 0)
                return std::abs((HOST_ATOMIC_FLOAT)expected
@@ -1927,7 +1986,18 @@ public:
    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                          cl_command_queue queue) override
    {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+        {
+            if (LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
+                    == 0)
+                return 0;
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            if (LocalMemory()
                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1956,7 +2026,7 @@ public:
        if constexpr (
            std::is_same_v<
                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
        {
            return threadCount;
        }
@@ -1991,6 +2061,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,

    if (gFloatAtomicsSupported)
    {
+        CBasicTestFetchSub<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+            TYPE_ATOMIC_DOUBLE, useSVM);
+        EXECUTE_TEST(
+            error, test_double.Execute(deviceID, context, queue, num_elements));
+
        CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
            TYPE_ATOMIC_FLOAT, useSVM);
        EXECUTE_TEST(