From 5673883005aad3f8bec6fa64e1bfb663e85ee1af Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 24 Feb 2026 17:37:29 +0100 Subject: [PATCH] Added support for cl_ext_float_atomics in CBasicTestFetchAddSpecialFloats with atomic_double (#2388) Related to #2142, according to the work plan, extending CBasicTestFetchAddSpecialFloats with support for atomic_double. --- test_conformance/c11_atomics/common.h | 8 +- test_conformance/c11_atomics/main.cpp | 7 +- test_conformance/c11_atomics/test_atomics.cpp | 98 ++++++++++++++----- 3 files changed, 86 insertions(+), 27 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 5f917949..6a8a0082 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -77,6 +77,7 @@ extern int extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device +extern cl_device_fp_config gDoubleFPConfig; extern cl_device_fp_config gFloatFPConfig; extern cl_device_fp_config gHalfFPConfig; @@ -924,12 +925,15 @@ CBasicTest::ProgramHeader(cl_uint maxNumDestItems) + ss.str() + "] = {\n"; ss.str(""); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_ATOMIC_DOUBLE> || std::is_same_v) { if (std::isinf(_startValue)) ss << (_startValue < 0 ? "-" : "") << "INFINITY"; else if (std::isnan(_startValue)) - ss << "0.0f / 0.0f"; + ss << "0.0 / 0.0"; else ss << std::setprecision( std::numeric_limits::max_digits10) diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 7d636151..ee83dfdb 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -31,7 +31,7 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device - +cl_device_fp_config gDoubleFPConfig = 0; cl_device_fp_config gFloatFPConfig = 0; cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; bool gFloatAtomicsSupported = false; @@ -143,6 +143,11 @@ test_status InitCL(cl_device_id device) { device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + + error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, + sizeof(gDoubleFPConfig), &gDoubleFPConfig, + NULL); + test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); } cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index df6d1e58..06f2fd9e 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1442,8 +1442,10 @@ public: // threads to be mapped deterministically onto the input data array. // This enables repeated add operations arranged so that every // special value is added to every other one (“all-to-all”). - - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { auto spec_vals = GetSpecialValues(); StartValue(spec_vals.size()); @@ -1452,7 +1454,6 @@ public: } else if constexpr (std::is_same_v) { - auto spec_vals = GetSpecialValues(); StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode)); CBasicTestMemOrderScope &GetSpecialValues() { - const float test_value_zero = 0.0f; - const float test_value_minus_zero = -0.0f; - const float test_value_without_fraction = 2.0f; - const float test_value_with_fraction = 2.2f; - static std::vector special_values; - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { + const HostDataType test_value_zero = + static_cast(0.0f); + const HostDataType test_value_minus_zero = + static_cast(-0.0f); + const HostDataType test_value_without_fraction = + static_cast(2.0f); + const HostDataType test_value_with_fraction = + static_cast(2.2f); + if (special_values.empty()) { special_values = { @@ -1488,10 +1496,21 @@ public: std::numeric_limits::max(), }; - if (0 != (CL_FP_DENORM & gFloatFPConfig)) + if constexpr (std::is_same_v) { - special_values.push_back( - std::numeric_limits::denorm_min()); + if (0 != (CL_FP_DENORM & gDoubleFPConfig)) + { + special_values.push_back( + std::numeric_limits::denorm_min()); + } + } + else if constexpr (std::is_same_v) + { + if (0 != (CL_FP_DENORM & gFloatFPConfig)) + { + special_values.push_back( + std::numeric_limits::denorm_min()); + } } } } @@ -1524,7 +1543,6 @@ public: } } } - return special_values; } @@ -1534,7 +1552,7 @@ public: if constexpr ( std::is_same_v< HostDataType, - HOST_HALF> || std::is_same_v) + HOST_HALF> || std::is_same_v || std::is_same_v) { if (threadCount > ref_vals.size()) { @@ -1568,7 +1586,7 @@ public: if constexpr ( std::is_same_v< HostDataType, - HOST_HALF> || std::is_same_v) + HOST_HALF> || std::is_same_v || std::is_same_v) { // The start_value variable (set by StartValue) is used // as a divisor of the thread index when selecting the operand for @@ -1597,7 +1615,7 @@ public: if constexpr ( std::is_same_v< HostDataType, - HOST_HALF> || std::is_same_v) + HOST_HALF> || std::is_same_v || std::is_same_v) { auto spec_vals = GetSpecialValues(); host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], @@ -1612,8 +1630,10 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { auto spec_vals = GetSpecialValues(); expected = startRefValues[whichDestValue] @@ -1635,17 +1655,17 @@ public: const std::vector &testValues, cl_uint whichDestValue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + return static_cast(expected) != testValues[whichDestValue]; + } + else { if (std::isnan(testValues[whichDestValue]) && std::isnan(expected)) return false; else return expected != testValues[whichDestValue]; } - else if constexpr (std::is_same_v) - { - return static_cast(expected) != testValues[whichDestValue]; - } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, @@ -1655,6 +1675,25 @@ public: int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) + == 0) + return 0; + + if (!CBasicTestMemOrderScope::LocalMemory() + && CBasicTestMemOrderScope::DeclaredInProgram()) + { + if ((gDoubleFPConfig & CL_FP_INF_NAN) == 0) return 0; + } + } if constexpr (std::is_same_v) { if (LocalMemory() @@ -1702,7 +1741,7 @@ public: if constexpr ( std::is_same_v< HostDataType, - HOST_HALF> || std::is_same_v) + HOST_HALF> || std::is_same_v || std::is_same_v) { return threadCount; } @@ -1737,6 +1776,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + auto spec_vals_fp64 = + CBasicTestFetchAddSpecialFloats::GetSpecialValues(); + + CBasicTestFetchAddSpecialFloats + test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_spec_double.Execute(deviceID, context, queue, + spec_vals_fp64.size() + * spec_vals_fp64.size())); + auto spec_vals_fp32 = CBasicTestFetchAddSpecialFloats::GetSpecialValues();