From 9d3ae4c734382016dc0cd789f399b27519ad99dc Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 17 Feb 2026 17:41:46 +0100 Subject: [PATCH] Added support for cl_ext_float_atomics in CBasicTestFetchAddSpecialFloats with atomic_float (#2371) Related to #2142, according to the work plan, extending CBasicTestFetchAddSpecialFloats with support for atomic_float. --- test_conformance/c11_atomics/common.h | 19 ++- test_conformance/c11_atomics/main.cpp | 9 ++ test_conformance/c11_atomics/test_atomics.cpp | 128 +++++++++++++++--- 3 files changed, 132 insertions(+), 24 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 5bf4cd0b..5f917949 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -22,13 +22,13 @@ #include "host_atomics.h" -#include "CL/cl_half.h" - #include #include #include #include +#include "CL/cl_half.h" + #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_HOST_THREADS GetThreadCount() @@ -76,9 +76,8 @@ extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device -extern cl_half_rounding_mode gHalfRoundingMode; -extern bool gFloatAtomicsSupported; -extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; + +extern cl_device_fp_config gFloatFPConfig; extern cl_device_fp_config gHalfFPConfig; extern cl_half_rounding_mode gHalfRoundingMode; @@ -183,6 +182,7 @@ public: { return false; } + virtual bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, @@ -926,7 +926,14 @@ CBasicTest::ProgramHeader(cl_uint maxNumDestItems) if constexpr (std::is_same_v) { - ss << std::setprecision(10) << _startValue; + if (std::isinf(_startValue)) + ss << (_startValue < 0 ? "-" : "") << "INFINITY"; + else if (std::isnan(_startValue)) + ss << "0.0f / 0.0f"; + else + ss << std::setprecision( + std::numeric_limits::max_digits10) + << _startValue; } else if constexpr (std::is_same_v) { diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 1d20bb47..7d636151 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -31,6 +31,8 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device + +cl_device_fp_config gFloatFPConfig = 0; cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; bool gFloatAtomicsSupported = false; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; @@ -148,6 +150,13 @@ test_status InitCL(cl_device_id device) { sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); + error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, + sizeof(gFloatFPConfig), &gFloatFPConfig, NULL); + test_error_ret( + error, + "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)", + TEST_FAIL); + if (is_extension_available(device, "cl_khr_fp16")) { cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 135574d8..df6d1e58 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1437,13 +1437,21 @@ public: : CBasicTestMemOrderScope(dataType, useSVM) { - if constexpr (std::is_same_v) + // StartValue is used as an index divisor in the following test + // logic. It is set to the number of special values, which allows + // threads to be mapped deterministically onto the input data array. + // This enables repeated add operations arranged so that every + // special value is added to every other one (“all-to-all”). + + if constexpr (std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + StartValue(spec_vals.size()); + CBasicTestMemOrderScope::OldValueCheck(false); + } + else if constexpr (std::is_same_v) { - // StartValue is used as an index divisor in the following test - // logic. It is set to the number of special values, which allows - // threads to be mapped deterministically onto the input data array. - // This enables repeated add operations arranged so that every - // special value is added to every other one (“all-to-all”). auto spec_vals = GetSpecialValues(); StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode)); @@ -1454,10 +1462,42 @@ public: static std::vector &GetSpecialValues() { + const float test_value_zero = 0.0f; + const float test_value_minus_zero = -0.0f; + const float test_value_without_fraction = 2.0f; + const float test_value_with_fraction = 2.2f; + static std::vector special_values; - if (special_values.empty()) + if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) + if (special_values.empty()) + { + special_values = { + static_cast(test_value_minus_zero), + static_cast(test_value_zero), + static_cast(test_value_without_fraction), + static_cast(test_value_with_fraction), + std::numeric_limits::infinity(), + std::numeric_limits::quiet_NaN(), + std::numeric_limits::signaling_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::signaling_NaN(), + std::numeric_limits::lowest(), + std::numeric_limits::min(), + std::numeric_limits::max(), + }; + + if (0 != (CL_FP_DENORM & gFloatFPConfig)) + { + special_values.push_back( + std::numeric_limits::denorm_min()); + } + } + } + else if constexpr (std::is_same_v) + { + if (special_values.empty()) { special_values = { 0xffff, 0x0000, 0x7c00, /*INFINITY*/ @@ -1491,7 +1531,10 @@ public: bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { if (threadCount > ref_vals.size()) { @@ -1522,7 +1565,10 @@ public: std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { // The start_value variable (set by StartValue) is used // as a divisor of the thread index when selecting the operand for @@ -1548,7 +1594,10 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { auto spec_vals = GetSpecialValues(); host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], @@ -1563,7 +1612,14 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) + + if constexpr (std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + expected = startRefValues[whichDestValue] + + startRefValues[whichDestValue / spec_vals.size()]; + } + else if constexpr (std::is_same_v) { auto spec_vals = GetSpecialValues(); expected = cl_half_from_float( @@ -1575,13 +1631,18 @@ public: return true; } - bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, cl_uint whichDestValue) override { - - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (std::isnan(testValues[whichDestValue]) && std::isnan(expected)) + return false; + else + return expected != testValues[whichDestValue]; + } + else if constexpr (std::is_same_v) { return static_cast(expected) != testValues[whichDestValue]; } @@ -1591,11 +1652,28 @@ public: testValues, whichDestValue); } - int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + + if (!CBasicTestMemOrderScope::LocalMemory() + && CBasicTestMemOrderScope::DeclaredInProgram()) + { + if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0; + } + } + else if constexpr (std::is_same_v) { if (DeclaredInProgram()) return 0; // skip test - not applicable @@ -1621,7 +1699,10 @@ public: } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { return threadCount; } @@ -1656,6 +1737,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + auto spec_vals_fp32 = + CBasicTestFetchAddSpecialFloats::GetSpecialValues(); + + CBasicTestFetchAddSpecialFloats + test_spec_float(TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST(error, + test_spec_float.Execute(deviceID, context, queue, + spec_vals_fp32.size() + * spec_vals_fp32.size())); + auto spec_vals_halfs = CBasicTestFetchAddSpecialFloats::GetSpecialValues();