Added support for cl_ext_float_atomics in CBasicTestFetchAddSpecialFloats with atomic_float (#2371)

Related to #2142, according to the work plan, extending
CBasicTestFetchAddSpecialFloats with support for atomic_float.
This commit is contained in:
Marcin Hajder
2026-02-17 17:41:46 +01:00
committed by GitHub
parent 50ba8c8c1c
commit 9d3ae4c734
3 changed files with 132 additions and 24 deletions

View File

@@ -22,13 +22,13 @@
#include "host_atomics.h" #include "host_atomics.h"
#include "CL/cl_half.h"
#include <iomanip> #include <iomanip>
#include <limits> #include <limits>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
#include "CL/cl_half.h"
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads) #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
#define MAX_HOST_THREADS GetThreadCount() #define MAX_HOST_THREADS GetThreadCount()
@@ -76,9 +76,8 @@ extern int
gMaxDeviceThreads; // maximum number of threads executed on OCL device gMaxDeviceThreads; // maximum number of threads executed on OCL device
extern cl_device_atomic_capabilities gAtomicMemCap, extern cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device gAtomicFenceCap; // atomic memory and fence capabilities for this device
extern cl_half_rounding_mode gHalfRoundingMode;
extern bool gFloatAtomicsSupported; extern cl_device_fp_config gFloatFPConfig;
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
extern cl_device_fp_config gHalfFPConfig; extern cl_device_fp_config gHalfFPConfig;
extern cl_half_rounding_mode gHalfRoundingMode; extern cl_half_rounding_mode gHalfRoundingMode;
@@ -183,6 +182,7 @@ public:
{ {
return false; return false;
} }
virtual bool virtual bool
IsTestNotAsExpected(const HostDataType &expected, IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
@@ -926,7 +926,14 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
ss << std::setprecision(10) << _startValue; if (std::isinf(_startValue))
ss << (_startValue < 0 ? "-" : "") << "INFINITY";
else if (std::isnan(_startValue))
ss << "0.0f / 0.0f";
else
ss << std::setprecision(
std::numeric_limits<HostDataType>::max_digits10)
<< _startValue;
} }
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>) else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {

View File

@@ -31,6 +31,8 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio
int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
cl_device_atomic_capabilities gAtomicMemCap, cl_device_atomic_capabilities gAtomicMemCap,
gAtomicFenceCap; // atomic memory and fence capabilities for this device gAtomicFenceCap; // atomic memory and fence capabilities for this device
cl_device_fp_config gFloatFPConfig = 0;
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
bool gFloatAtomicsSupported = false; bool gFloatAtomicsSupported = false;
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0; cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
@@ -148,6 +150,13 @@ test_status InitCL(cl_device_id device) {
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr); sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL); test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
sizeof(gFloatFPConfig), &gFloatFPConfig, NULL);
test_error_ret(
error,
"Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)",
TEST_FAIL);
if (is_extension_available(device, "cl_khr_fp16")) if (is_extension_available(device, "cl_khr_fp16"))
{ {
cl_int error = clGetDeviceInfo( cl_int error = clGetDeviceInfo(

View File

@@ -1437,13 +1437,21 @@ public:
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM) useSVM)
{ {
if constexpr (std::is_same_v<HostDataType, HOST_HALF>) // StartValue is used as an index divisor in the following test
// logic. It is set to the number of special values, which allows
// threads to be mapped deterministically onto the input data array.
// This enables repeated add operations arranged so that every
// special value is added to every other one (“all-to-all”).
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
auto spec_vals = GetSpecialValues();
StartValue(spec_vals.size());
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
// StartValue is used as an index divisor in the following test
// logic. It is set to the number of special values, which allows
// threads to be mapped deterministically onto the input data array.
// This enables repeated add operations arranged so that every
// special value is added to every other one (“all-to-all”).
auto spec_vals = GetSpecialValues(); auto spec_vals = GetSpecialValues();
StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode)); StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
@@ -1454,10 +1462,42 @@ public:
static std::vector<HostDataType> &GetSpecialValues() static std::vector<HostDataType> &GetSpecialValues()
{ {
const float test_value_zero = 0.0f;
const float test_value_minus_zero = -0.0f;
const float test_value_without_fraction = 2.0f;
const float test_value_with_fraction = 2.2f;
static std::vector<HostDataType> special_values; static std::vector<HostDataType> special_values;
if (special_values.empty()) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if constexpr (std::is_same_v<HostDataType, HOST_HALF>) if (special_values.empty())
{
special_values = {
static_cast<HostDataType>(test_value_minus_zero),
static_cast<HostDataType>(test_value_zero),
static_cast<HostDataType>(test_value_without_fraction),
static_cast<HostDataType>(test_value_with_fraction),
std::numeric_limits<HostDataType>::infinity(),
std::numeric_limits<HostDataType>::quiet_NaN(),
std::numeric_limits<HostDataType>::signaling_NaN(),
-std::numeric_limits<HostDataType>::infinity(),
-std::numeric_limits<HostDataType>::quiet_NaN(),
-std::numeric_limits<HostDataType>::signaling_NaN(),
std::numeric_limits<HostDataType>::lowest(),
std::numeric_limits<HostDataType>::min(),
std::numeric_limits<HostDataType>::max(),
};
if (0 != (CL_FP_DENORM & gFloatFPConfig))
{
special_values.push_back(
std::numeric_limits<HostDataType>::denorm_min());
}
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (special_values.empty())
{ {
special_values = { special_values = {
0xffff, 0x0000, 0x7c00, /*INFINITY*/ 0xffff, 0x0000, 0x7c00, /*INFINITY*/
@@ -1491,7 +1531,10 @@ public:
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override MTdata d) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
if (threadCount > ref_vals.size()) if (threadCount > ref_vals.size())
{ {
@@ -1522,7 +1565,10 @@ public:
std::string memoryOrderScope = MemoryOrderScopeStr(); std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (std::is_same_v<HostDataType, HOST_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
// The start_value variable (set by StartValue) is used // The start_value variable (set by StartValue) is used
// as a divisor of the thread index when selecting the operand for // as a divisor of the thread index when selecting the operand for
@@ -1548,7 +1594,10 @@ public:
volatile HostAtomicType *destMemory, volatile HostAtomicType *destMemory,
HostDataType *oldValues) override HostDataType *oldValues) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
auto spec_vals = GetSpecialValues(); auto spec_vals = GetSpecialValues();
host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
@@ -1563,7 +1612,14 @@ public:
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
expected = StartValue(); expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
auto spec_vals = GetSpecialValues();
expected = startRefValues[whichDestValue]
+ startRefValues[whichDestValue / spec_vals.size()];
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
auto spec_vals = GetSpecialValues(); auto spec_vals = GetSpecialValues();
expected = cl_half_from_float( expected = cl_half_from_float(
@@ -1575,13 +1631,18 @@ public:
return true; return true;
} }
bool IsTestNotAsExpected(const HostDataType &expected, bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues, const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override cl_uint whichDestValue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
if constexpr (std::is_same_v<HostDataType, HOST_HALF>) {
if (std::isnan(testValues[whichDestValue]) && std::isnan(expected))
return false;
else
return expected != testValues[whichDestValue];
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
return static_cast<cl_half>(expected) != testValues[whichDestValue]; return static_cast<cl_half>(expected) != testValues[whichDestValue];
} }
@@ -1591,11 +1652,28 @@ public:
testValues, testValues,
whichDestValue); whichDestValue);
} }
int ExecuteSingleTest(cl_device_id deviceID, cl_context context, int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override cl_command_queue queue) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_HALF>) if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
if (!CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::LocalMemory()
&& CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::DeclaredInProgram())
{
if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{ {
if (DeclaredInProgram()) return 0; // skip test - not applicable if (DeclaredInProgram()) return 0; // skip test - not applicable
@@ -1621,7 +1699,10 @@ public:
} }
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{ {
if constexpr (std::is_same_v<HostDataType, HOST_HALF>) if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
{ {
return threadCount; return threadCount;
} }
@@ -1656,6 +1737,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
if (gFloatAtomicsSupported) if (gFloatAtomicsSupported)
{ {
auto spec_vals_fp32 =
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT,
HOST_FLOAT>::GetSpecialValues();
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(error,
test_spec_float.Execute(deviceID, context, queue,
spec_vals_fp32.size()
* spec_vals_fp32.size()));
auto spec_vals_halfs = auto spec_vals_halfs =
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF, CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
HOST_HALF>::GetSpecialValues(); HOST_HALF>::GetSpecialValues();