From fbba22770ddf84c5ac1e471e18134ed0bdd376a1 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 2 Sep 2025 17:38:56 +0200 Subject: [PATCH] Added support for cl_ext_float_atomics in CBasicTestFetchAdd with atomic_float (#2345) Related to #2142, according to the work plan, extending CBasicTestFetchAdd with support for atomic_float. --- test_conformance/c11_atomics/common.h | 11 +- test_conformance/c11_atomics/host_atomics.h | 20 +- test_conformance/c11_atomics/main.cpp | 6 + test_conformance/c11_atomics/test_atomics.cpp | 258 +++++++++++++++--- 4 files changed, 253 insertions(+), 42 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 3cab98ce..1fca36b8 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -74,9 +74,11 @@ extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device extern cl_device_atomic_capabilities gAtomicMemCap, gAtomicFenceCap; // atomic memory and fence capabilities for this device + extern cl_half_rounding_mode gHalfRoundingMode; extern bool gFloatAtomicsSupported; extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; +extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps; extern const char * get_memory_order_type_name(TExplicitMemoryOrderType orderType); @@ -174,6 +176,13 @@ public: { return false; } + virtual bool + IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) + { + return expected != testValues[whichDestValue]; + } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { @@ -1449,7 +1458,7 @@ int CBasicTest::ExecuteSingleTest( startRefValues.size() ? &startRefValues[0] : 0, i)) break; // no expected value function provided - if (expected != destItems[i]) + if (IsTestNotAsExpected(expected, destItems, i)) { std::stringstream logLine; logLine << "ERROR: Result " << i diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index e5b1d328..3bc88a23 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -17,6 +17,7 @@ #define HOST_ATOMICS_H_ #include "harness/testHarness.h" +#include #ifdef WIN32 #include "Windows.h" @@ -94,14 +95,25 @@ template CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { + if constexpr (std::is_same_v) + { + static std::mutex mx; + std::lock_guard lock(mx); + CorrespondingType old_value = *a; + *a += c; + return old_value; + } + else + { #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - return InterlockedExchangeAdd(a, c); + return InterlockedExchangeAdd(a, c); #elif defined(__GNUC__) - return __sync_fetch_and_add(a, c); + return __sync_fetch_and_add(a, c); #else - log_info("Host function not implemented: atomic_fetch_add\n"); - return 0; + log_info("Host function not implemented: atomic_fetch_add\n"); + return 0; #endif + } } template diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 40972b26..f089d6da 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap, cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; bool gFloatAtomicsSupported = false; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; +cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0; test_status InitCL(cl_device_id device) { auto version = get_device_cl_version(device); @@ -132,6 +133,11 @@ test_status InitCL(cl_device_id device) { if (is_extension_available(device, "cl_ext_float_atomics")) { gFloatAtomicsSupported = true; + + cl_int error = clGetDeviceInfo( + device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, + sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); + test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); if (is_extension_available(device, "cl_khr_fp16")) { cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index a08a0daf..a2be0549 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -16,10 +16,13 @@ #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "harness/typeWrappers.h" +#include "harness/conversions.h" #include "common.h" #include "host_atomics.h" +#include +#include #include #include @@ -1163,61 +1166,233 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak) template class CBasicTestFetchAdd : public CBasicTestMemOrderScope { + + double min_range; + double max_range; + double max_error_fp32; + std::vector ref_vals; + public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) - {} - virtual std::string ProgramCore() + useSVM), + min_range(-999.0), max_range(999.0), max_error_fp32(0.0) + { + if constexpr (std::is_same_v) + { + StartValue(0.f); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.resize(threadCount); + + for (cl_uint i = 0; i < threadCount; i++) + ref_vals[i] = get_random_float(min_range, max_range, d); + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * ref_vals.size()); + + // Estimate highest possible summation error for given set. + std::vector sums; + std::sort(ref_vals.begin(), ref_vals.end()); + + sums.push_back( + std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f)); + + sums.push_back( + std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); + + std::sort( + ref_vals.begin(), ref_vals.end(), + [](float a, float b) { return std::abs(a) < std::abs(b); }); + + double precise = 0.0; + for (auto elem : ref_vals) precise += double(elem); + sums.push_back(precise); + + sums.push_back( + std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f)); + + sums.push_back( + std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); + + std::sort(sums.begin(), sums.end()); + max_error_fp32 = + std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back()); + + // restore unsorted order + memcpy(ref_vals.data(), startRefValues, + sizeof(HostDataType) * ref_vals.size()); + } + else + { + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + } + return true; + } + return false; + } + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_add" + postfix - + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() - + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" - + postfix + "(&destMemory[0], (" - + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope - + ");\n" - " atomic_fetch_add" - + postfix + "(&destMemory[0], (" - + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope - + ");\n" - " atomic_fetch_add" - + postfix + "(&destMemory[0], ((" - + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" - + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope - + ");\n"; + + if constexpr (std::is_same_v) + { + return " atomic_fetch_add" + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_add" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_add" + postfix + + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], ((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8" + + memoryOrderScope + ");\n"; + } } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override { - oldValues[tid] = host_atomic_fetch_add( - &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, - MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, - MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], - ((HostDataType)tid + 3) - << (sizeof(HostDataType) - 1) * 8, - MemoryOrder()); + if constexpr (std::is_same_v) + { + host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_add( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = host_atomic_fetch_add( + &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add( + &destMemory[0], + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), + MemoryOrder()); + } } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) - expected += ((HostDataType)i + 3) * 3 - + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + expected += startRefValues[i]; + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + expected += ((HostDataType)i + 3) * 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + } + return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if (std::is_same::value) + { + if (whichDestValue == 0) + return std::abs((HOST_ATOMIC_FLOAT)expected + - testValues[whichDestValue]) + > max_error_fp32; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_add_generic(cl_device_id deviceID, @@ -1242,6 +1417,15 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchAdd test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAdd