Added support for cl_ext_float_atomics in CBasicTestFetchSub with atomic_float (#2367)

Related to #2142, according to the work plan, extending
CBasicTestFetchSub with support for atomic_float.
This commit is contained in:
Marcin Hajder
2025-10-28 16:41:24 +01:00
committed by GitHub
parent 52ba127f79
commit d0aa95be2f
3 changed files with 106 additions and 12 deletions

View File

@@ -18,7 +18,6 @@
#include "harness/testHarness.h" #include "harness/testHarness.h"
#include <mutex> #include <mutex>
#include "CL/cl_half.h" #include "CL/cl_half.h"
#ifdef WIN32 #ifdef WIN32
@@ -136,7 +135,15 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c, CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order) TExplicitMemoryOrderType order)
{ {
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a -= c;
return old_value;
}
else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{ {
static std::mutex mx; static std::mutex mx;
std::lock_guard<std::mutex> lock(mx); std::lock_guard<std::mutex> lock(mx);

View File

@@ -134,6 +134,7 @@ test_status InitCL(cl_device_id device) {
if (is_extension_available(device, "cl_ext_float_atomics")) if (is_extension_available(device, "cl_ext_float_atomics"))
{ {
gFloatAtomicsSupported = true; gFloatAtomicsSupported = true;
if (is_extension_available(device, "cl_khr_fp64")) if (is_extension_available(device, "cl_khr_fp64"))
{ {
cl_int error = clGetDeviceInfo( cl_int error = clGetDeviceInfo(

View File

@@ -1668,7 +1668,10 @@ public:
useSVM), useSVM),
min_range(-999.0), max_range(999.0), max_error(0.0) min_range(-999.0), max_range(999.0), max_error(0.0)
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
{ {
StartValue(0); StartValue(0);
CBasicTestMemOrderScope<HostAtomicType, CBasicTestMemOrderScope<HostAtomicType,
@@ -1676,6 +1679,13 @@ public:
} }
} }
template <typename Iterator> template <typename Iterator>
HostDataType subtract(Iterator begin, Iterator end)
{
HostDataType res = 0;
for (auto it = begin; it != end; ++it) res = res - *it;
return res;
}
template <typename Iterator>
float subtract_halfs(Iterator begin, Iterator end) float subtract_halfs(Iterator begin, Iterator end)
{ {
cl_half res = 0; cl_half res = 0;
@@ -1690,12 +1700,53 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override MTdata d) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (threadCount > ref_vals.size()) if (threadCount > ref_vals.size())
{ {
ref_vals.resize(threadCount); ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = get_random_float(min_range, max_range, d);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
// Estimate highest possible subtraction error for given set.
std::vector<HostDataType> sums;
std::sort(ref_vals.begin(), ref_vals.end());
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(
ref_vals.begin(), ref_vals.end(),
[](float a, float b) { return std::abs(a) < std::abs(b); });
double precise = 0.0;
for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise);
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(sums.begin(), sums.end());
max_error =
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size());
}
else
{
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
}
return true;
}
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++) for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = cl_half_from_float( ref_vals[i] = cl_half_from_float(
get_random_float(min_range, max_range, d), get_random_float(min_range, max_range, d),
@@ -1725,7 +1776,6 @@ public:
float precise = 0.f; float precise = 0.f;
for (auto elem : ref_vals) precise -= cl_half_to_float(elem); for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
sums.push_back(precise); sums.push_back(precise);
sums.push_back( sums.push_back(
subtract_halfs(ref_vals.begin(), ref_vals.end())); subtract_halfs(ref_vals.begin(), ref_vals.end()));
sums.push_back( sums.push_back(
@@ -1733,7 +1783,6 @@ public:
std::sort(sums.begin(), sums.end()); std::sort(sums.begin(), sums.end());
max_error = std::abs(sums.front() - sums.back()); max_error = std::abs(sums.front() - sums.back());
// restore unsorted order // restore unsorted order
memcpy(ref_vals.data(), startRefValues, memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size()); sizeof(HostDataType) * ref_vals.size());
@@ -1752,7 +1801,10 @@ public:
std::string memoryOrderScope = MemoryOrderScopeStr(); std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]" + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1774,7 +1826,10 @@ public:
volatile HostAtomicType *destMemory, volatile HostAtomicType *destMemory,
HostDataType *oldValues) override HostDataType *oldValues) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder()); MemoryOrder());
@@ -1796,7 +1851,13 @@ public:
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
expected = StartValue(); expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
expected -= startRefValues[i];
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
{ {
@@ -1821,7 +1882,14 @@ public:
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (whichDestValue == 0)
return std::abs((HOST_ATOMIC_FLOAT)expected
- testValues[whichDestValue])
> max_error;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (whichDestValue == 0) if (whichDestValue == 0)
return std::abs(cl_half_to_float(expected) return std::abs(cl_half_to_float(expected)
@@ -1859,7 +1927,17 @@ public:
int ExecuteSingleTest(cl_device_id deviceID, cl_context context, int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override cl_command_queue queue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (LocalMemory() if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1875,7 +1953,10 @@ public:
} }
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return threadCount; return threadCount;
} }
@@ -1910,6 +1991,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported) if (gFloatAtomicsSupported)
{ {
CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half( CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM); TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error, EXECUTE_TEST(error,