From 51445f3743e9026886bb802326c82b77484497a3 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 7 Oct 2025 17:42:02 +0200 Subject: [PATCH] Added support for cl_ext_float_atomics in CBaseTestFetchAdd with atomic_double (#2347) Related to https://github.com/KhronosGroup/OpenCL-CTS/issues/2142, according to the work plan, extending CBasicTestFetchAdd with support for atomic_double. --- test_conformance/c11_atomics/common.h | 1 + test_conformance/c11_atomics/host_atomics.h | 5 +- test_conformance/c11_atomics/main.cpp | 8 ++ test_conformance/c11_atomics/test_atomics.cpp | 105 ++++++++++++++---- 4 files changed, 97 insertions(+), 22 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index d321819f..fe2bd37d 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -79,6 +79,7 @@ extern cl_device_atomic_capabilities gAtomicMemCap, extern cl_half_rounding_mode gHalfRoundingMode; extern bool gFloatAtomicsSupported; extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; +extern cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps; extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps; extern const char * diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index d9482fb7..aabbfdde 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -99,7 +99,10 @@ template CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + AtomicType, + HOST_ATOMIC_FLOAT> || std::is_same_v) { static std::mutex mx; std::lock_guard lock(mx); diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 485445f7..78291f06 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap, cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; bool gFloatAtomicsSupported = false; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; +cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0; test_status InitCL(cl_device_id device) { @@ -133,6 +134,13 @@ test_status InitCL(cl_device_id device) { if (is_extension_available(device, "cl_ext_float_atomics")) { gFloatAtomicsSupported = true; + if (is_extension_available(device, "cl_khr_fp64")) + { + cl_int error = clGetDeviceInfo( + device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr); + test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + } cl_int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index b51f4461..2a99ffa2 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1163,13 +1163,30 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak) num_elements, true); } +template double kahan_sum(const std::vector &nums) +{ + return 0.0; +} +template <> double kahan_sum(const std::vector &nums) +{ + double sum = 0.0; + double compensation = 0.0; + for (double num : nums) + { + double y = num - compensation; + double t = sum + y; + compensation = (t - sum) - y; + sum = t; + } + return sum; +} template class CBasicTestFetchAdd : public CBasicTestMemOrderScope { double min_range; double max_range; - double max_error_fp32; + double max_error; std::vector ref_vals; public: @@ -1182,11 +1199,14 @@ public: CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM), - min_range(-999.0), max_range(999.0), max_error_fp32(0.0) + min_range(-999.0), max_range(999.0), max_error(0.0) { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_FLOAT> || std::is_same_v) { - StartValue(0.f); + StartValue((HostDataType)0.0); CBasicTestMemOrderScope::OldValueCheck(false); } @@ -1194,14 +1214,21 @@ public: bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_FLOAT> || std::is_same_v) { if (threadCount > ref_vals.size()) { ref_vals.resize(threadCount); for (cl_uint i = 0; i < threadCount; i++) - ref_vals[i] = get_random_float(min_range, max_range, d); + if constexpr (std::is_same_v) + ref_vals[i] = + get_random_double(min_range, max_range, d); + else + ref_vals[i] = get_random_float(min_range, max_range, d); memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * ref_vals.size()); @@ -1216,12 +1243,17 @@ public: sums.push_back( std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); - std::sort( - ref_vals.begin(), ref_vals.end(), - [](float a, float b) { return std::abs(a) < std::abs(b); }); + std::sort(ref_vals.begin(), ref_vals.end(), + [](HostDataType a, HostDataType b) { + return std::abs(a) < std::abs(b); + }); double precise = 0.0; - for (auto elem : ref_vals) precise += double(elem); + if constexpr (std::is_same_v) + precise = kahan_sum(ref_vals); + else + for (auto elem : ref_vals) precise += double(elem); + sums.push_back(precise); sums.push_back( @@ -1231,8 +1263,7 @@ public: std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); std::sort(sums.begin(), sums.end()); - max_error_fp32 = - std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back()); + max_error = std::abs(sums.front() - sums.back()); // restore unsorted order memcpy(ref_vals.data(), startRefValues, @@ -1252,7 +1283,10 @@ public: std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { return " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" @@ -1286,7 +1320,10 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); @@ -1312,7 +1349,10 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) @@ -1331,12 +1371,15 @@ public: const std::vector &testValues, cl_uint whichDestValue) override { - if (std::is_same::value) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same::value) { if (whichDestValue == 0) - return std::abs((HOST_ATOMIC_FLOAT)expected + return std::abs((HostDataType)expected - testValues[whichDestValue]) - > max_error_fp32; + > max_error; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, @@ -1346,7 +1389,8 @@ public: bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { - if (std::is_same::value) + if (std::is_same::value + || std::is_same::value) { correct = true; for (cl_uint i = 1; i < threadCount; i++) @@ -1369,7 +1413,18 @@ public: int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) + == 0) + return 0; + } + else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) @@ -1385,7 +1440,10 @@ public: } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { return threadCount; } @@ -1420,6 +1478,11 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchAdd test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST( + error, test_double.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(