Added support for cl_ext_float_atomics in CBasicTestFetchSub with atomic_double (#2368)

Related to #2142, according to the work plan, extending
CBasicTestFetchSub with support for atomic_double.
This commit is contained in:
Marcin Hajder
2025-12-02 17:39:16 +01:00
committed by GitHub
parent 2bb364bb1d
commit 0cca0ee869
3 changed files with 102 additions and 25 deletions

View File

@@ -135,7 +135,10 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);

View File

@@ -134,7 +134,6 @@ test_status InitCL(cl_device_id device) {
if (is_extension_available(device, "cl_ext_float_atomics"))
{
gFloatAtomicsSupported = true;
if (is_extension_available(device, "cl_khr_fp64"))
{
cl_int error = clGetDeviceInfo(

View File

@@ -1647,6 +1647,23 @@ REGISTER_TEST(svm_atomic_fetch_add)
true);
}
template <typename T> double kahan_sub(const std::vector<T> &nums)
{
return 0.0;
}
template <> double kahan_sub<double>(const std::vector<double> &nums)
{
double sum = 0.0;
double compensation = 0.0;
for (double num : nums)
{
double y = num - compensation;
double t = sum - y;
compensation = (t - sum) - y;
sum = t;
}
return sum;
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchSub
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
@@ -1671,7 +1688,7 @@ public:
if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
{
StartValue(0);
CBasicTestMemOrderScope<HostAtomicType,
@@ -1700,13 +1717,18 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = get_random_float(min_range, max_range, d);
ref_vals[i] = (HostDataType)get_random_double(min_range,
max_range, d);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
@@ -1714,22 +1736,52 @@ public:
// Estimate highest possible subtraction error for given set.
std::vector<HostDataType> sums;
std::sort(ref_vals.begin(), ref_vals.end());
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(
ref_vals.begin(), ref_vals.end(),
[](float a, float b) { return std::abs(a) < std::abs(b); });
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(
subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(ref_vals.begin(), ref_vals.end(),
[](double a, double b) {
return std::abs(a) < std::abs(b);
});
double precise = kahan_sub(ref_vals);
sums.push_back(precise);
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(
subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(sums.begin(), sums.end());
max_error = std::abs((double)sums.front() - sums.back());
}
else
{
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(
subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(ref_vals.begin(), ref_vals.end(),
[](float a, float b) {
return std::abs(a) < std::abs(b);
});
double precise = 0.0;
for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise);
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
sums.push_back(
subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(sums.begin(), sums.end());
max_error =
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
}
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
@@ -1742,7 +1794,7 @@ public:
}
return true;
}
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (threadCount > ref_vals.size())
{
@@ -1804,7 +1856,7 @@ public:
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1829,7 +1881,7 @@ public:
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder());
@@ -1851,7 +1903,11 @@ public:
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
@@ -1882,7 +1938,10 @@ public:
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
return std::abs((HOST_ATOMIC_FLOAT)expected
@@ -1927,7 +1986,18 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1956,7 +2026,7 @@ public:
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
return threadCount;
}
@@ -1991,6 +2061,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported)
{
CBasicTestFetchSub<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(