From 940c8bb973692ad227a20d7a79694282f527ec2d Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 21 Oct 2025 17:43:05 +0200 Subject: [PATCH] Added support for cl_ext_float_atomics in CBasicTestFetchMin/Max with atomic_half (#2357) Related to #2142, according to the work plan, extending CBasicTestFetchMin/CBasicTestFetchMax with support for atomic_half. --- test_conformance/c11_atomics/common.cpp | 18 ++- test_conformance/c11_atomics/common.h | 5 +- test_conformance/c11_atomics/host_atomics.h | 16 +- test_conformance/c11_atomics/test_atomics.cpp | 153 +++++++++++++++--- 4 files changed, 167 insertions(+), 25 deletions(-) diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp index 414d877b..4838c347 100644 --- a/test_conformance/c11_atomics/common.cpp +++ b/test_conformance/c11_atomics/common.cpp @@ -194,14 +194,28 @@ template<> cl_int AtomicTypeExtendedInfo::MinValue() {return CL_INT_MIN; template<> cl_uint AtomicTypeExtendedInfo::MinValue() {return 0;} template<> cl_long AtomicTypeExtendedInfo::MinValue() {return CL_LONG_MIN;} template<> cl_ulong AtomicTypeExtendedInfo::MinValue() {return 0;} -template<> cl_float AtomicTypeExtendedInfo::MinValue() {return CL_FLT_MIN;} +template <> cl_half AtomicTypeExtendedInfo::MinValue() +{ + return cl_half_from_float(CL_HALF_MIN, gHalfRoundingMode); +} +template <> cl_float AtomicTypeExtendedInfo::MinValue() +{ + return CL_FLT_MIN; +} template<> cl_double AtomicTypeExtendedInfo::MinValue() {return CL_DBL_MIN;} template<> cl_int AtomicTypeExtendedInfo::MaxValue() {return CL_INT_MAX;} template<> cl_uint AtomicTypeExtendedInfo::MaxValue() {return CL_UINT_MAX;} template<> cl_long AtomicTypeExtendedInfo::MaxValue() {return CL_LONG_MAX;} template<> cl_ulong AtomicTypeExtendedInfo::MaxValue() {return CL_ULONG_MAX;} -template<> cl_float AtomicTypeExtendedInfo::MaxValue() {return CL_FLT_MAX;} +template <> cl_half AtomicTypeExtendedInfo::MaxValue() +{ + return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode); +} +template <> cl_float AtomicTypeExtendedInfo::MaxValue() +{ + return CL_FLT_MAX; +} template<> cl_double AtomicTypeExtendedInfo::MaxValue() {return CL_DBL_MAX;} cl_int getSupportedMemoryOrdersAndScopes( diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index fe2bd37d..aee5173d 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -894,15 +894,16 @@ CBasicTest::ProgramHeader(cl_uint maxNumDestItems) header += std::string("__global volatile ") + aTypeName + " destMemory[" + ss.str() + "] = {\n"; ss.str(""); + if (CBasicTest::DataType()._type == TYPE_ATOMIC_FLOAT) ss << std::setprecision(10) << _startValue; else if (CBasicTest::DataType()._type == TYPE_ATOMIC_HALF) - ss << static_cast( - cl_half_to_float(static_cast(_startValue))); + ss << cl_half_to_float(static_cast(_startValue)); else ss << _startValue; + for (cl_uint i = 0; i < maxNumDestItems; i++) { if (aTypeName == "atomic_flag") diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index aabbfdde..4471897b 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -176,7 +176,20 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp TExplicitMemoryOrderType order_failure) { CorrespondingType tmp; - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + static std::mutex mtx; + std::lock_guard lock(mtx); + tmp = *reinterpret_cast(a); + + if (cl_half_to_float(tmp) == cl_half_to_float(*expected)) + { + *reinterpret_cast(a) = desired; + return true; + } + *expected = tmp; + } + else if constexpr (std::is_same_v) { static std::mutex mtx; std::lock_guard lock(mtx); @@ -191,7 +204,6 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp else { #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) - tmp = InterlockedCompareExchange(a, desired, *expected); #elif defined(__GNUC__) tmp = __sync_val_compare_and_swap(a, *expected, desired); diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 40cc8d27..d73bb6b8 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -2687,7 +2687,10 @@ public: min_range(-999.0), max_range(999.0) { StartValue(DataType().MaxValue()); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { CBasicTestMemOrderScope::OldValueCheck(false); @@ -2697,7 +2700,10 @@ public: { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { return " atomic_fetch_min" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" @@ -2716,7 +2722,10 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder()); @@ -2732,7 +2741,16 @@ public: bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = cl_half_from_float( + get_random_float(min_range, max_range, d), + gHalfRoundingMode); + } + } + else if constexpr (std::is_same_v) { for (cl_uint i = 0; i < threadCount; i++) { @@ -2759,7 +2777,19 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + { + for (cl_uint i = 0; i < threadCount; i++) + { + if (cl_half_to_float(startRefValues[i]) + < cl_half_to_float(expected)) + expected = startRefValues[i]; + } + } + } + else if constexpr (std::is_same_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) @@ -2779,7 +2809,9 @@ public: const std::vector &testValues, cl_uint whichDestValue) override { - if (std::is_same::value) + if (std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same::value) { if (whichDestValue == 0) return CBasicTestMemOrderScope:: @@ -2794,7 +2826,9 @@ public: bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { - if (std::is_same::value) + if (std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same::value) { correct = true; for (cl_uint i = 1; i < threadCount; i++) @@ -2817,7 +2851,19 @@ public: int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + } + else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) @@ -2835,7 +2881,10 @@ public: } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { return threadCount; } @@ -2870,6 +2919,11 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchMin test_half( + TYPE_ATOMIC_HALF, useSVM); + EXECUTE_TEST(error, + test_half.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( @@ -2953,18 +3007,31 @@ public: useSVM), min_range(-999.0), max_range(999.0) { - StartValue(DataType().MinValue()); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { CBasicTestMemOrderScope::OldValueCheck(false); + if constexpr (std::is_same_v) + StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode)); + else + StartValue(-DataType().MaxValue()); + } + else + { + StartValue(DataType().MinValue()); } } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { return " atomic_fetch_max" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" @@ -2983,7 +3050,10 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder()); @@ -2999,7 +3069,16 @@ public: bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = cl_half_from_float( + get_random_float(min_range, max_range, d), + gHalfRoundingMode); + } + } + else if constexpr (std::is_same_v) { for (cl_uint i = 0; i < threadCount; i++) { @@ -3026,7 +3105,19 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + { + for (cl_uint i = 0; i < threadCount; i++) + { + if (cl_half_to_float(startRefValues[i]) + > cl_half_to_float(expected)) + expected = startRefValues[i]; + } + } + } + else if constexpr (std::is_same_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) @@ -3046,7 +3137,9 @@ public: const std::vector &testValues, cl_uint whichDestValue) override { - if (std::is_same::value) + if (std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same::value) { if (whichDestValue == 0) return CBasicTestMemOrderScope:: @@ -3061,7 +3154,9 @@ public: bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { - if (std::is_same::value) + if (std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same::value) { correct = true; for (cl_uint i = 1; i < threadCount; i++) @@ -3084,7 +3179,19 @@ public: int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + } + else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) @@ -3102,7 +3209,10 @@ public: } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { return threadCount; } @@ -3137,6 +3247,11 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchMax test_half( + TYPE_ATOMIC_HALF, useSVM); + EXECUTE_TEST(error, + test_half.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(