Added support for cl_ext_float_atomics in CBaseTestFetchAdd with atomic_double (#2347)

Related to https://github.com/KhronosGroup/OpenCL-CTS/issues/2142,
according to the work plan, extending CBasicTestFetchAdd with support
for atomic_double.
This commit is contained in:
Marcin Hajder
2025-10-07 17:42:02 +02:00
committed by GitHub
parent 93c37f17fc
commit 51445f3743
4 changed files with 97 additions and 22 deletions

View File

@@ -79,6 +79,7 @@ extern cl_device_atomic_capabilities gAtomicMemCap,
extern cl_half_rounding_mode gHalfRoundingMode; extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported; extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps; extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
extern cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps;
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps; extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
extern const char * extern const char *

View File

@@ -99,7 +99,10 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c, CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order) TExplicitMemoryOrderType order)
{ {
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
AtomicType,
HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
{ {
static std::mutex mx; static std::mutex mx;
std::lock_guard<std::mutex> lock(mx); std::lock_guard<std::mutex> lock(mx);

View File

@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false; bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0;
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
test_status InitCL(cl_device_id device) { test_status InitCL(cl_device_id device) {
@@ -133,6 +134,13 @@ test_status InitCL(cl_device_id device) {
if (is_extension_available(device, "cl_ext_float_atomics")) if (is_extension_available(device, "cl_ext_float_atomics"))
{ {
gFloatAtomicsSupported = true; gFloatAtomicsSupported = true;
if (is_extension_available(device, "cl_khr_fp64"))
{
cl_int error = clGetDeviceInfo(
device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
}
cl_int error = clGetDeviceInfo( cl_int error = clGetDeviceInfo(
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,

View File

@@ -1163,13 +1163,30 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak)
num_elements, true); num_elements, true);
} }
template <typename T> double kahan_sum(const std::vector<T> &nums)
{
return 0.0;
}
template <> double kahan_sum<double>(const std::vector<double> &nums)
{
double sum = 0.0;
double compensation = 0.0;
for (double num : nums)
{
double y = num - compensation;
double t = sum + y;
compensation = (t - sum) - y;
sum = t;
}
return sum;
}
template <typename HostAtomicType, typename HostDataType> template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchAdd class CBasicTestFetchAdd
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> { : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range; double min_range;
double max_range; double max_range;
double max_error_fp32; double max_error;
std::vector<HostDataType> ref_vals; std::vector<HostDataType> ref_vals;
public: public:
@@ -1182,11 +1199,14 @@ public:
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM), useSVM),
min_range(-999.0), max_range(999.0), max_error_fp32(0.0) min_range(-999.0), max_range(999.0), max_error(0.0)
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
StartValue(0.f); StartValue((HostDataType)0.0);
CBasicTestMemOrderScope<HostAtomicType, CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false); HostDataType>::OldValueCheck(false);
} }
@@ -1194,13 +1214,20 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override MTdata d) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{ {
if (threadCount > ref_vals.size()) if (threadCount > ref_vals.size())
{ {
ref_vals.resize(threadCount); ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
ref_vals[i] =
get_random_double(min_range, max_range, d);
else
ref_vals[i] = get_random_float(min_range, max_range, d); ref_vals[i] = get_random_float(min_range, max_range, d);
memcpy(startRefValues, ref_vals.data(), memcpy(startRefValues, ref_vals.data(),
@@ -1216,12 +1243,17 @@ public:
sums.push_back( sums.push_back(
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
std::sort( std::sort(ref_vals.begin(), ref_vals.end(),
ref_vals.begin(), ref_vals.end(), [](HostDataType a, HostDataType b) {
[](float a, float b) { return std::abs(a) < std::abs(b); }); return std::abs(a) < std::abs(b);
});
double precise = 0.0; double precise = 0.0;
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
precise = kahan_sum(ref_vals);
else
for (auto elem : ref_vals) precise += double(elem); for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise); sums.push_back(precise);
sums.push_back( sums.push_back(
@@ -1231,8 +1263,7 @@ public:
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f)); std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
std::sort(sums.begin(), sums.end()); std::sort(sums.begin(), sums.end());
max_error_fp32 = max_error = std::abs(sums.front() - sums.back());
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
// restore unsorted order // restore unsorted order
memcpy(ref_vals.data(), startRefValues, memcpy(ref_vals.data(), startRefValues,
@@ -1252,7 +1283,10 @@ public:
std::string memoryOrderScope = MemoryOrderScopeStr(); std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return " atomic_fetch_add" + postfix + "(&destMemory[0], (" return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]" + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1286,7 +1320,10 @@ public:
volatile HostAtomicType *destMemory, volatile HostAtomicType *destMemory,
HostDataType *oldValues) override HostDataType *oldValues) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder()); MemoryOrder());
@@ -1312,7 +1349,10 @@ public:
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
expected = StartValue(); expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
@@ -1331,12 +1371,15 @@ public:
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same<HostDataType, HOST_FLOAT>::value)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
return std::abs((HOST_ATOMIC_FLOAT)expected return std::abs((HostDataType)expected
- testValues[whichDestValue]) - testValues[whichDestValue])
> max_error_fp32; > max_error;
} }
return CBasicTestMemOrderScope< return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
@@ -1346,7 +1389,8 @@ public:
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override HostAtomicType *finalValues) override
{ {
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value) if (std::is_same<HostDataType, HOST_DOUBLE>::value
|| std::is_same<HostDataType, HOST_FLOAT>::value)
{ {
correct = true; correct = true;
for (cl_uint i = 1; i < threadCount; i++) for (cl_uint i = 1; i < threadCount; i++)
@@ -1369,7 +1413,18 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context, int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override cl_command_queue queue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (LocalMemory() if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1385,7 +1440,10 @@ public:
} }
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>) if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return threadCount; return threadCount;
} }
@@ -1420,6 +1478,11 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported) if (gFloatAtomicsSupported)
{ {
CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float( CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM); TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST( EXECUTE_TEST(