Added support for cl_ext_float_atomics in CBasicTestFetchAddSpecialFloats with atomic_half (#2386)

Related to #2142, according to the work plan, extending
CBasicTestFetchAddSpecialFloats with support for atomic_half.
This commit is contained in:
Marcin Hajder
2026-01-27 17:46:57 +01:00
committed by GitHub
parent 584f27afd4
commit 6f38c799c1
3 changed files with 266 additions and 18 deletions

View File

@@ -25,6 +25,7 @@
#include "CL/cl_half.h"
#include <iomanip>
#include <limits>
#include <sstream>
#include <vector>
@@ -75,6 +76,10 @@ extern int
gMaxDeviceThreads; // maximum number of threads executed on OCL device
extern cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device
extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
extern cl_device_fp_config gHalfFPConfig;
extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported;
@@ -154,12 +159,12 @@ public:
return 0;
}
CBasicTest(TExplicitAtomicType dataType, bool useSVM)
: CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType),
_useSVM(useSVM), _startValue(255), _localMemory(false),
_declaredInProgram(false), _usedInFunction(false),
_genericAddrSpace(false), _oldValueCheck(true),
_localRefValues(false), _maxGroupSize(0), _passCount(0),
_iterations(gInternalIterations)
: CTest(), _dataType(dataType), _useSVM(useSVM), _startValue(255),
_localMemory(false), _declaredInProgram(false),
_usedInFunction(false), _genericAddrSpace(false),
_oldValueCheck(true), _localRefValues(false), _maxGroupSize(0),
_passCount(0), _iterations(gInternalIterations),
_maxDeviceThreads(MAX_DEVICE_THREADS), _deviceThreads(0)
{}
virtual ~CBasicTest()
{
@@ -240,12 +245,12 @@ public:
cl_command_queue queue)
{
int error = 0;
DeclaredInProgram(false);
SetDeclaredInProgram(false);
EXECUTE_TEST(error,
ExecuteForEachPointerType(deviceID, context, queue));
if (!UseSVM())
{
DeclaredInProgram(true);
SetDeclaredInProgram(true);
EXECUTE_TEST(error,
ExecuteForEachPointerType(deviceID, context, queue));
}
@@ -256,13 +261,13 @@ public:
cl_command_queue queue)
{
int error = 0;
if (_maxDeviceThreads > 0 && !UseSVM())
if (_deviceThreads > 0 && !UseSVM())
{
SetLocalMemory(true);
EXECUTE_TEST(
error, ExecuteForEachDeclarationType(deviceID, context, queue));
}
if (_maxDeviceThreads + MaxHostThreads() > 0)
if (_deviceThreads + MaxHostThreads() > 0)
{
SetLocalMemory(false);
EXECUTE_TEST(
@@ -271,7 +276,7 @@ public:
return error;
}
virtual int Execute(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
cl_command_queue queue, int num_elements) override
{
if (sizeof(HostAtomicType) != DataType().Size(deviceID))
{
@@ -311,7 +316,12 @@ public:
if (UseSVM()) return 0;
_maxDeviceThreads = 0;
}
if (_maxDeviceThreads + MaxHostThreads() == 0) return 0;
_deviceThreads = (num_elements > 0)
? std::min(cl_uint(num_elements), _maxDeviceThreads)
: _maxDeviceThreads;
if (_deviceThreads + MaxHostThreads() == 0) return 0;
return ExecuteForEachParameterSet(deviceID, context, queue);
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
@@ -324,7 +334,7 @@ public:
{
return AtomicTypeExtendedInfo<HostDataType>(_dataType);
}
cl_uint _maxDeviceThreads;
virtual cl_uint MaxHostThreads()
{
if (UseSVM() || gHost)
@@ -421,7 +431,7 @@ public:
HostDataType StartValue() { return _startValue; }
void SetLocalMemory(bool local) { _localMemory = local; }
bool LocalMemory() { return _localMemory; }
void DeclaredInProgram(bool declaredInProgram)
void SetDeclaredInProgram(bool declaredInProgram)
{
_declaredInProgram = declaredInProgram;
}
@@ -478,6 +488,8 @@ private:
cl_uint _currentGroupSize;
cl_uint _passCount;
const cl_int _iterations;
cl_uint _maxDeviceThreads;
cl_uint _deviceThreads;
};
template <typename HostAtomicType, typename HostDataType>
@@ -912,9 +924,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
+ ss.str() + "] = {\n";
ss.str("");
if constexpr (is_host_fp_v<HostDataType>)
ss << std::hexfloat
<< _startValue; // use hex format for accurate representation
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
ss << std::setprecision(10) << _startValue;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
ss << std::setprecision(std::numeric_limits<float>::max_digits10)
<< cl_half_to_float(_startValue);
}
else
ss << _startValue;
@@ -1151,7 +1169,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
MTdata d;
size_t typeSize = DataType().Size(deviceID);
deviceThreadCount = _maxDeviceThreads;
deviceThreadCount = _deviceThreads;
hostThreadCount = MaxHostThreads();
threadCount = deviceThreadCount + hostThreadCount;