diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index 3bc88a23..e1787849 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -19,6 +19,8 @@ #include "harness/testHarness.h" #include +#include "CL/cl_half.h" + #ifdef WIN32 #include "Windows.h" #endif @@ -88,6 +90,8 @@ enum TExplicitMemoryOrderType #define HOST_FLAG cl_int +extern cl_half_rounding_mode gHalfRoundingMode; + // host atomic functions void host_atomic_thread_fence(TExplicitMemoryOrderType order); @@ -120,14 +124,26 @@ template CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { -#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32)) - return InterlockedExchangeSubtract(a, c); + if constexpr (std::is_same_v) + { + static std::mutex mx; + std::lock_guard lock(mx); + CorrespondingType old_value = *a; + *a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)), + gHalfRoundingMode); + return old_value; + } + else + { +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + return InterlockedExchangeSubtract(a, c); #elif defined(__GNUC__) - return __sync_fetch_and_sub(a, c); + return __sync_fetch_and_sub(a, c); #else - log_info("Host function not implemented: atomic_fetch_sub\n"); - return 0; + log_info("Host function not implemented: atomic_fetch_sub\n"); + return 0; #endif + } } template diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index a2be0549..73599dc1 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1488,46 +1488,239 @@ REGISTER_TEST(svm_atomic_fetch_add) template class CBasicTestFetchSub : public CBasicTestMemOrderScope { + + double min_range; + double max_range; + double max_error; + std::vector ref_vals; + public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, - useSVM) - {} - virtual std::string ProgramCore() + useSVM), + min_range(-999.0), max_range(999.0), max_error(0.0) + { + if constexpr (std::is_same_v) + { + StartValue(0); + CBasicTestMemOrderScope::OldValueCheck(false); + } + } + template + float subtract_halfs(Iterator begin, Iterator end) + { + cl_half res = 0; + for (auto it = begin; it != end; ++it) + { + res = cl_half_from_float(cl_half_to_float(res) + - cl_half_to_float(*it), + gHalfRoundingMode); + } + return cl_half_to_float(res); + } + bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) override + { + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.resize(threadCount); + + for (cl_uint i = 0; i < threadCount; i++) + ref_vals[i] = cl_half_from_float( + get_random_float(min_range, max_range, d), + gHalfRoundingMode); + + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * ref_vals.size()); + + // Estimate highest possible summation error for given set. + std::vector sums; + std::sort(ref_vals.begin(), ref_vals.end(), + [](cl_half a, cl_half b) { + return cl_half_to_float(a) < cl_half_to_float(b); + }); + + sums.push_back( + subtract_halfs(ref_vals.begin(), ref_vals.end())); + sums.push_back( + subtract_halfs(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(ref_vals.begin(), ref_vals.end(), + [](cl_half a, cl_half b) { + return std::abs(cl_half_to_float(a)) + < std::abs(cl_half_to_float(b)); + }); + + float precise = 0.f; + for (auto elem : ref_vals) precise -= cl_half_to_float(elem); + sums.push_back(precise); + + sums.push_back( + subtract_halfs(ref_vals.begin(), ref_vals.end())); + sums.push_back( + subtract_halfs(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(sums.begin(), sums.end()); + max_error = std::abs(sums.front() - sums.back()); + + // restore unsorted order + memcpy(ref_vals.data(), startRefValues, + sizeof(HostDataType) * ref_vals.size()); + } + else + { + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + } + return true; + } + return false; + } + std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return " oldValues[tid] = atomic_fetch_sub" + postfix - + "(&destMemory[0], tid + 3 +(((" - + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" - + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope - + ");\n"; + + if constexpr (std::is_same_v) + { + return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + + memoryOrderScope + ");\n" + + " oldValues[tid] = atomic_fetch_sub" + postfix + + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + + ")0" + memoryOrderScope + ");\n"; + } + else + { + return " oldValues[tid] = atomic_fetch_sub" + postfix + + "(&destMemory[0], tid + 3 +(((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8)" + + memoryOrderScope + ");\n"; + } } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, - volatile HostAtomicType *destMemory, - HostDataType *oldValues) + void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) override { - oldValues[tid] = host_atomic_fetch_sub( - &destMemory[0], - (HostDataType)tid + 3 - + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), - MemoryOrder()); + if constexpr (std::is_same_v) + { + host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], + MemoryOrder()); + oldValues[tid] = host_atomic_fetch_sub( + &destMemory[tid], (HostDataType)0, MemoryOrder()); + } + else + { + oldValues[tid] = + host_atomic_fetch_sub(&destMemory[0], + (HostDataType)tid + 3 + + (((HostDataType)tid + 3) + << (sizeof(HostDataType) - 1) * 8), + MemoryOrder()); + } } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, - HostDataType *startRefValues, - cl_uint whichDestValue) + bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) override { expected = StartValue(); - for (cl_uint i = 0; i < threadCount; i++) - expected -= (HostDataType)i + 3 - + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + { + for (cl_uint i = 0; i < threadCount; i++) + { + expected = cl_half_from_float( + cl_half_to_float(expected) + - cl_half_to_float(startRefValues[i]), + gHalfRoundingMode); + } + } + } + else + { + for (cl_uint i = 0; i < threadCount; i++) + expected -= (HostDataType)i + 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + } return true; } + bool IsTestNotAsExpected(const HostDataType &expected, + const std::vector &testValues, + cl_uint whichDestValue) override + { + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + return std::abs(cl_half_to_float(expected) + - cl_half_to_float(testValues[whichDestValue])) + > max_error; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, + testValues, + whichDestValue); + } + bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, + HostAtomicType *finalValues) override + { + if (std::is_same::value) + { + correct = true; + for (cl_uint i = 1; i < threadCount; i++) + { + if (refValues[i] != StartValue()) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return !correct; + } + return CBasicTestMemOrderScope::VerifyRefs(correct, + threadCount, + refValues, + finalValues); + } + int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) override + { + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override + { + if constexpr (std::is_same_v) + { + return threadCount; + } + return CBasicTestMemOrderScope::NumResults(threadCount, + deviceID); + } }; static int test_atomic_fetch_sub_generic(cl_device_id deviceID, @@ -1552,6 +1745,15 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID, TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); + + if (gFloatAtomicsSupported) + { + CBasicTestFetchSub test_half( + TYPE_ATOMIC_HALF, useSVM); + EXECUTE_TEST(error, + test_half.Execute(deviceID, context, queue, num_elements)); + } + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchSub