diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index 9a33f26c..fac21642 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -135,7 +135,10 @@ template CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, TExplicitMemoryOrderType order) { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + AtomicType, + HOST_ATOMIC_DOUBLE> || std::is_same_v) { static std::mutex mx; std::lock_guard lock(mx); diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index e2f1888f..78291f06 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -134,7 +134,6 @@ test_status InitCL(cl_device_id device) { if (is_extension_available(device, "cl_ext_float_atomics")) { gFloatAtomicsSupported = true; - if (is_extension_available(device, "cl_khr_fp64")) { cl_int error = clGetDeviceInfo( diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index f46520ca..56f350ad 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1647,6 +1647,23 @@ REGISTER_TEST(svm_atomic_fetch_add) true); } +template double kahan_sub(const std::vector &nums) +{ + return 0.0; +} +template <> double kahan_sub(const std::vector &nums) +{ + double sum = 0.0; + double compensation = 0.0; + for (double num : nums) + { + double y = num - compensation; + double t = sum - y; + compensation = (t - sum) - y; + sum = t; + } + return sum; +} template class CBasicTestFetchSub : public CBasicTestMemOrderScope { @@ -1671,7 +1688,7 @@ public: if constexpr ( std::is_same_v< HostDataType, - HOST_FLOAT> || std::is_same_v) + HOST_DOUBLE> || std::is_same_v || std::is_same_v) { StartValue(0); CBasicTestMemOrderScope) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { if (threadCount > ref_vals.size()) { ref_vals.resize(threadCount); + for (cl_uint i = 0; i < threadCount; i++) - ref_vals[i] = get_random_float(min_range, max_range, d); + ref_vals[i] = (HostDataType)get_random_double(min_range, + max_range, d); memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * ref_vals.size()); @@ -1714,22 +1736,52 @@ public: // Estimate highest possible subtraction error for given set. std::vector sums; std::sort(ref_vals.begin(), ref_vals.end()); - sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); - sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); - std::sort( - ref_vals.begin(), ref_vals.end(), - [](float a, float b) { return std::abs(a) < std::abs(b); }); + if constexpr (std::is_same_v) + { + sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); - double precise = 0.0; - for (auto elem : ref_vals) precise += double(elem); - sums.push_back(precise); - sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); - sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); + sums.push_back( + subtract(ref_vals.rbegin(), ref_vals.rend())); - std::sort(sums.begin(), sums.end()); - max_error = - std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back()); + std::sort(ref_vals.begin(), ref_vals.end(), + [](double a, double b) { + return std::abs(a) < std::abs(b); + }); + + double precise = kahan_sub(ref_vals); + sums.push_back(precise); + + sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); + + sums.push_back( + subtract(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(sums.begin(), sums.end()); + max_error = std::abs((double)sums.front() - sums.back()); + } + else + { + sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); + sums.push_back( + subtract(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(ref_vals.begin(), ref_vals.end(), + [](float a, float b) { + return std::abs(a) < std::abs(b); + }); + + double precise = 0.0; + for (auto elem : ref_vals) precise += double(elem); + sums.push_back(precise); + sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); + sums.push_back( + subtract(ref_vals.rbegin(), ref_vals.rend())); + + std::sort(sums.begin(), sums.end()); + max_error = + std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back()); + } // restore unsorted order memcpy(ref_vals.data(), startRefValues, @@ -1742,7 +1794,7 @@ public: } return true; } - if constexpr (std::is_same_v) + else if constexpr (std::is_same_v) { if (threadCount > ref_vals.size()) { @@ -1804,7 +1856,7 @@ public: if constexpr ( std::is_same_v< HostDataType, - HOST_HALF> || std::is_same_v) + HOST_DOUBLE> || std::is_same_v || std::is_same_v) { return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" @@ -1829,7 +1881,7 @@ public: if constexpr ( std::is_same_v< HostDataType, - HOST_HALF> || std::is_same_v) + HOST_DOUBLE> || std::is_same_v || std::is_same_v) { host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); @@ -1851,7 +1903,11 @@ public: cl_uint whichDestValue) override { expected = StartValue(); - if constexpr (std::is_same_v) + + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) @@ -1882,7 +1938,10 @@ public: const std::vector &testValues, cl_uint whichDestValue) override { - if constexpr (std::is_same_v) + if constexpr ( + std::is_same_v< + HostDataType, + HOST_DOUBLE> || std::is_same_v) { if (whichDestValue == 0) return std::abs((HOST_ATOMIC_FLOAT)expected @@ -1927,7 +1986,18 @@ public: int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) + { + if (LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) + return 0; // skip test - not applicable + + if (!LocalMemory() + && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) + == 0) + return 0; + } + else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) @@ -1956,7 +2026,7 @@ public: if constexpr ( std::is_same_v< HostDataType, - HOST_HALF> || std::is_same_v) + HOST_DOUBLE> || std::is_same_v || std::is_same_v) { return threadCount; } @@ -1991,6 +2061,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID, if (gFloatAtomicsSupported) { + CBasicTestFetchSub test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST( + error, test_double.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(