Added support for cl_ext_float_atomics in CBasicTestFetchSub with atomic_half (#2366)

Related to #2142, according to the work plan, extending
CBasicTestFetchSub with support for atomic_half.

I wasn't able to test that PR entirely due to missing
CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT/CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT
capabilities for atomic_half. I appreciate reviewers' attention, thanks.
This commit is contained in:
Marcin Hajder
2025-09-09 17:40:50 +02:00
committed by GitHub
parent 913e6e4388
commit 1aeca1360b
2 changed files with 245 additions and 27 deletions

View File

@@ -19,6 +19,8 @@
#include "harness/testHarness.h"
#include <mutex>
#include "CL/cl_half.h"
#ifdef WIN32
#include "Windows.h"
#endif
@@ -88,6 +90,8 @@ enum TExplicitMemoryOrderType
#define HOST_FLAG cl_int
extern cl_half_rounding_mode gHalfRoundingMode;
// host atomic functions
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
@@ -120,14 +124,26 @@ template <typename AtomicType, typename CorrespondingType>
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
TExplicitMemoryOrderType order)
{
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
return InterlockedExchangeSubtract(a, c);
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
{
static std::mutex mx;
std::lock_guard<std::mutex> lock(mx);
CorrespondingType old_value = *a;
*a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
gHalfRoundingMode);
return old_value;
}
else
{
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
return InterlockedExchangeSubtract(a, c);
#elif defined(__GNUC__)
return __sync_fetch_and_sub(a, c);
return __sync_fetch_and_sub(a, c);
#else
log_info("Host function not implemented: atomic_fetch_sub\n");
return 0;
log_info("Host function not implemented: atomic_fetch_sub\n");
return 0;
#endif
}
}
template <typename AtomicType, typename CorrespondingType>