diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 5f33441b..5d12b2ab 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -22,6 +22,7 @@ #include "host_atomics.h" +#include #include #include #include @@ -96,6 +97,37 @@ extern cl_int getSupportedMemoryOrdersAndScopes( cl_device_id device, std::vector &memoryOrders, std::vector &memoryScopes); +union FloatIntUnion { + float f; + uint32_t i; +}; + +template bool is_qnan(const HostDataType &value) +{ + if constexpr (std::is_same_v) + { + FloatIntUnion u; + u.f = value; + if ((u.i & 0x7F800000) != 0x7F800000) return false; + return (u.i & 0x00400000) != 0; + } + else + return std::isnan(value); +} + +template bool is_snan(const HostDataType &value) +{ + if constexpr (std::is_same_v) + { + FloatIntUnion u; + u.f = value; + if ((u.i & 0x7F800000) != 0x7F800000) return false; + return (u.i & 0x00400000) == 0; + } + else + return std::isnan(value); +} + class AtomicTypeInfo { public: TExplicitAtomicType _type; @@ -187,6 +219,7 @@ public: virtual bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) { return expected @@ -928,7 +961,7 @@ CBasicTest::ProgramHeader(cl_uint maxNumDestItems) if constexpr ( std::is_same_v< HostDataType, - HOST_ATOMIC_DOUBLE> || std::is_same_v) + HOST_DOUBLE> || std::is_same_v) { if (std::isinf(_startValue)) ss << (_startValue < 0 ? "-" : "") << "INFINITY"; @@ -1505,7 +1538,7 @@ int CBasicTest::ExecuteSingleTest( startRefValues.size() ? &startRefValues[0] : 0, i)) break; // no expected value function provided - if (IsTestNotAsExpected(expected, destItems, i)) + if (IsTestNotAsExpected(expected, destItems, startRefValues, i)) { std::stringstream logLine; logLine << "ERROR: Result " << i diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index 8d875bc9..4a65d8da 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -18,6 +18,7 @@ #include "harness/testHarness.h" #include + #include "CL/cl_half.h" #ifdef WIN32 diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 06f2fd9e..d3dd3703 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1329,6 +1329,7 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) @@ -1343,6 +1344,7 @@ public: return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, @@ -1653,6 +1655,7 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (std::is_same_v) @@ -1670,6 +1673,7 @@ public: return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, @@ -1776,38 +1780,23 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { - auto spec_vals_fp64 = - CBasicTestFetchAddSpecialFloats::GetSpecialValues(); - CBasicTestFetchAddSpecialFloats test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, - test_spec_double.Execute(deviceID, context, queue, - spec_vals_fp64.size() - * spec_vals_fp64.size())); - - auto spec_vals_fp32 = - CBasicTestFetchAddSpecialFloats::GetSpecialValues(); + EXECUTE_TEST( + error, + test_spec_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAddSpecialFloats test_spec_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, - test_spec_float.Execute(deviceID, context, queue, - spec_vals_fp32.size() - * spec_vals_fp32.size())); - - auto spec_vals_halfs = - CBasicTestFetchAddSpecialFloats::GetSpecialValues(); + EXECUTE_TEST( + error, + test_spec_float.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAddSpecialFloats test_spec_half(TYPE_ATOMIC_HALF, useSVM); - EXECUTE_TEST(error, - test_spec_half.Execute(deviceID, context, queue, - spec_vals_halfs.size() - * spec_vals_halfs.size())); + EXECUTE_TEST( + error, + test_spec_half.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_half( TYPE_ATOMIC_HALF, useSVM); @@ -2070,6 +2059,7 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) @@ -2084,6 +2074,7 @@ public: return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, @@ -3172,18 +3163,21 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) { if (whichDestValue == 0) return CBasicTestMemOrderScope:: - IsTestNotAsExpected(expected, testValues, whichDestValue); + IsTestNotAsExpected(expected, testValues, startRefValues, + whichDestValue); return false; // ignore all but 0 which stores final result } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, @@ -3265,6 +3259,251 @@ public: } }; +template +class CBasicTestFetchMinSpecialFloats + : public CBasicTestMemOrderScope { + + std::vector ref_vals; + +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::LocalMemory; + CBasicTestFetchMinSpecialFloats(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + // StartValue is used as an index divisor in the following test + // logic. It is set to the number of special values, which allows + // threads to be mapped deterministically onto the input data array. + // This enables repeated add operations arranged so that every + // special value is added to every other one (“all-to-all”). + + if constexpr (std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + StartValue(spec_vals.size()); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + + static std::vector &GetSpecialValues() + { + static std::vector special_values; + if constexpr (std::is_same_v) + { + const HostDataType test_value_zero = + static_cast(0.0f); + const HostDataType test_value_minus_zero = + static_cast(-0.0f); + const HostDataType test_value_without_fraction = + static_cast(2.0f); + const HostDataType test_value_with_fraction = + static_cast(2.2f); + + if (special_values.empty()) + { + special_values = { + static_cast(test_value_minus_zero), + static_cast(test_value_zero), + static_cast(test_value_without_fraction), + static_cast(test_value_with_fraction), + std::numeric_limits::infinity(), + std::numeric_limits::quiet_NaN(), + std::numeric_limits::signaling_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::signaling_NaN(), + std::numeric_limits::lowest(), + std::numeric_limits::min(), + std::numeric_limits::max(), + }; + + if (0 != (CL_FP_DENORM & gFloatFPConfig)) + { + special_values.push_back( + std::numeric_limits::denorm_min()); + } + } + } + + return special_values; + } + + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.assign(threadCount, 0); + auto spec_vals = GetSpecialValues(); + + cl_uint total_cnt = 0; + while (total_cnt < threadCount) + { + cl_uint block_cnt = + std::min((cl_int)(threadCount - total_cnt), + (cl_int)spec_vals.size()); + memcpy(&ref_vals.at(total_cnt), spec_vals.data(), + sizeof(HostDataType) * block_cnt); + total_cnt += block_cnt; + } + } + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + + return true; + } + return false; + } + std::string ProgramCore() override + { + // The start_value variable (set by StartValue) is used + // as a divisor of the thread index when selecting the operand for + // atomic_fetch_add. This groups threads into blocks corresponding + // to the number of special values and implements an “all-to-all” + // addition pattern. As a result, each destination element is + // updated using different combinations of input values, enabling + // consistent comparison between host and device execution. + + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(DataType().AddSubOperandTypeName()) + + " start_value = atomic_load_explicit(destMemory+tid, " + "memory_order_relaxed, memory_scope_work_group);\n" + " atomic_store_explicit(destMemory+tid, oldValues[tid], " + "memory_order_relaxed, memory_scope_work_group);\n" + " atomic_fetch_min" + + postfix + "(&destMemory[tid], (" + + DataType().AddSubOperandTypeName() + + ")oldValues[tid/(int)start_value]" + memoryOrderScope + ");\n"; + } + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override + { + auto spec_vals = GetSpecialValues(); + host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], + MEMORY_ORDER_SEQ_CST); + host_atomic_fetch_min(&destMemory[tid], + (HostDataType)oldValues[tid / spec_vals.size()], + MemoryOrder()); + } + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override + { + expected = StartValue(); + if constexpr (std::is_same_v) + { + auto spec_vals = GetSpecialValues(); + expected = + std::min(startRefValues[whichDestValue], + startRefValues[whichDestValue / spec_vals.size()]); + } + return true; + } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + const std::vector &startRefValues, + cl_uint whichDestValue) override + { + if (testValues[whichDestValue] != expected) + { + auto spec_vals = GetSpecialValues(); + // special cases + // min(-0, +0) = min(+0, -0) = +0 or -0, + if (((startRefValues[whichDestValue] == -0.f) + && (startRefValues[whichDestValue / spec_vals.size()] == 0.f)) + || ((startRefValues[whichDestValue] == 0.f) + && (startRefValues[whichDestValue / spec_vals.size()] + == -0.f))) + return false; + else if (is_qnan(startRefValues[whichDestValue / spec_vals.size()]) + || is_qnan(startRefValues[whichDestValue])) + { + // min(x, qNaN) = min(qNaN, x) = x, + // min(qNaN, qNaN) = qNaN, + if (is_qnan(startRefValues[whichDestValue / spec_vals.size()]) + && is_qnan(startRefValues[whichDestValue])) + return !is_qnan(testValues[whichDestValue]); + else if (is_qnan( + startRefValues[whichDestValue / spec_vals.size()])) + return !std::isnan(testValues[whichDestValue]) + && testValues[whichDestValue] + != startRefValues[whichDestValue]; // NaN != NaN always + // true + else + return !std::isnan(testValues[whichDestValue]) + && testValues[whichDestValue] + != startRefValues[whichDestValue / spec_vals.size()]; + } + else if (is_snan(startRefValues[whichDestValue / spec_vals.size()]) + || is_snan(startRefValues[whichDestValue])) + { + // min(x, sNaN) = min(sNaN, x) = NaN or x, and + // min(NaN, sNaN) = min(sNaN, NaN) = NaN + if (std::isnan(testValues[whichDestValue]) + || testValues[whichDestValue] + == startRefValues[whichDestValue] + || testValues[whichDestValue] + == startRefValues[whichDestValue / spec_vals.size()]) + return false; + } + } + + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + startRefValues, + whichDestValue); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) + == 0) + return 0; + + if (!CBasicTestMemOrderScope::LocalMemory() + && CBasicTestMemOrderScope::DeclaredInProgram()) + { + if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0; + } + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } +}; + static int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, @@ -3290,6 +3529,12 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchMinSpecialFloats + test_spec_float(TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, + test_spec_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( @@ -3478,18 +3723,21 @@ public: } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, + const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) { if (whichDestValue == 0) return CBasicTestMemOrderScope:: - IsTestNotAsExpected(expected, testValues, whichDestValue); + IsTestNotAsExpected(expected, testValues, startRefValues, + whichDestValue); return false; // ignore all but 0 which stores final result } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, + startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,