diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index a0588ef4..9a33f26c 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -18,7 +18,6 @@ #include "harness/testHarness.h" #include - #include "CL/cl_half.h" #ifdef WIN32 @@ -136,7 +135,15 @@ template CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + static std::mutex mx; + std::lock_guard lock(mx); + CorrespondingType old_value = *a; + *a -= c; + return old_value; + } + else if constexpr (std::is_same_v) { static std::mutex mx; std::lock_guard lock(mx); diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 78291f06..e2f1888f 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -134,6 +134,7 @@ test_status InitCL(cl_device_id device) { if (is_extension_available(device, "cl_ext_float_atomics")) { gFloatAtomicsSupported = true; + if (is_extension_available(device, "cl_khr_fp64")) { cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 5a553a06..f46520ca 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1668,7 +1668,10 @@ public: useSVM), min_range(-999.0), max_range(999.0), max_error(0.0) { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_FLOAT> || std::is_same_v) { StartValue(0); CBasicTestMemOrderScope + HostDataType subtract(Iterator begin, Iterator end) + { + HostDataType res = 0; + for (auto it = begin; it != end; ++it) res = res - *it; + return res; + } + template float subtract_halfs(Iterator begin, Iterator end) { cl_half res = 0; @@ -1690,12 +1700,53 @@ public: bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) { if (threadCount > ref_vals.size()) { ref_vals.resize(threadCount); + for (cl_uint i = 0; i < threadCount; i++) + ref_vals[i] = get_random_float(min_range, max_range, d); + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * ref_vals.size()); + + // Estimate highest possible subtraction error for given set. + std::vector sums; + std::sort(ref_vals.begin(), ref_vals.end()); + sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); + sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); + + std::sort( + ref_vals.begin(), ref_vals.end(), + [](float a, float b) { return std::abs(a) < std::abs(b); }); + + double precise = 0.0; + for (auto elem : ref_vals) precise += double(elem); + sums.push_back(precise); + sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); + sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(sums.begin(), sums.end()); + max_error = + std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back()); + + // restore unsorted order + memcpy(ref_vals.data(), startRefValues, + sizeof(HostDataType) * ref_vals.size()); + } + else + { + memcpy(startRefValues, ref_vals.data(), + sizeof(HostDataType) * threadCount); + } + return true; + } + if constexpr (std::is_same_v) + { + if (threadCount > ref_vals.size()) + { + ref_vals.resize(threadCount); for (cl_uint i = 0; i < threadCount; i++) ref_vals[i] = cl_half_from_float( get_random_float(min_range, max_range, d), @@ -1725,7 +1776,6 @@ public: float precise = 0.f; for (auto elem : ref_vals) precise -= cl_half_to_float(elem); sums.push_back(precise); - sums.push_back( subtract_halfs(ref_vals.begin(), ref_vals.end())); sums.push_back( @@ -1733,7 +1783,6 @@ public: std::sort(sums.begin(), sums.end()); max_error = std::abs(sums.front() - sums.back()); - // restore unsorted order memcpy(ref_vals.data(), startRefValues, sizeof(HostDataType) * ref_vals.size()); @@ -1752,7 +1801,10 @@ public: std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" @@ -1774,7 +1826,10 @@ public: volatile HostAtomicType *destMemory, HostDataType *oldValues) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); @@ -1796,7 +1851,13 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + for (cl_uint i = 0; i < threadCount; i++) + expected -= startRefValues[i]; + } + else if constexpr (std::is_same_v) { if (whichDestValue == 0) { @@ -1821,7 +1882,14 @@ public: const std::vector &testValues, cl_uint whichDestValue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (whichDestValue == 0) + return std::abs((HOST_ATOMIC_FLOAT)expected + - testValues[whichDestValue]) + > max_error; + } + else if constexpr (std::is_same_v) { if (whichDestValue == 0) return std::abs(cl_half_to_float(expected) @@ -1859,7 +1927,17 @@ public: int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; + } + else if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) @@ -1875,7 +1953,10 @@ public: } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_HALF> || std::is_same_v) { return threadCount; } @@ -1910,6 +1991,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchSub test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST( + error, test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error,