Added support for cl_ext_float_atomics in CBasicTestFetchMin/Max with atomic_float (#2353)

Related to #2142, according to the work plan, extending
`CBasicTestFetchMin`/`CBasicTestFetchMax` with support for atomic_float.
This commit is contained in:
Marcin Hajder
2025-09-09 17:45:35 +02:00
committed by GitHub
parent a0bd81d574
commit df61cad39f
4 changed files with 348 additions and 64 deletions

View File

@@ -24,8 +24,9 @@
#include "CL/cl_half.h"
#include <vector>
#include <iomanip>
#include <sstream>
#include <vector>
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
#define MAX_HOST_THREADS GetThreadCount()
@@ -892,14 +893,15 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
header += std::string("__global volatile ") + aTypeName + " destMemory["
+ ss.str() + "] = {\n";
ss.str("");
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
!= TYPE_ATOMIC_HALF)
ss << _startValue;
else
== TYPE_ATOMIC_FLOAT)
ss << std::setprecision(10) << _startValue;
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
== TYPE_ATOMIC_HALF)
ss << static_cast<HostDataType>(
cl_half_to_float(static_cast<cl_half>(_startValue)));
else
ss << _startValue;
for (cl_uint i = 0; i < maxNumDestItems; i++)
{
if (aTypeName == "atomic_flag")

View File

@@ -172,19 +172,34 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
TExplicitMemoryOrderType order_success,
TExplicitMemoryOrderType order_failure)
{
CorrespondingType tmp;
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
tmp = InterlockedCompareExchange(a, desired, *expected);
CorrespondingType tmp;
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
tmp = *reinterpret_cast<volatile float *>(a);
if (tmp == *expected)
{
*reinterpret_cast<volatile float *>(a) = desired;
return true;
}
*expected = tmp;
}
else
{
#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
tmp = InterlockedCompareExchange(a, desired, *expected);
#elif defined(__GNUC__)
tmp = __sync_val_compare_and_swap(a, *expected, desired);
tmp = __sync_val_compare_and_swap(a, *expected, desired);
#else
log_info("Host function not implemented: atomic_compare_exchange\n");
tmp = 0;
log_info("Host function not implemented: atomic_compare_exchange\n");
tmp = 0;
#endif
if(tmp == *expected)
return true;
*expected = tmp;
return false;
if (tmp == *expected) return true;
*expected = tmp;
}
return false;
}
template <typename AtomicType, typename CorrespondingType>

View File

@@ -138,6 +138,7 @@ test_status InitCL(cl_device_id device) {
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
if (is_extension_available(device, "cl_khr_fp16"))
{
cl_int error = clGetDeviceInfo(

View File

@@ -2608,54 +2608,178 @@ REGISTER_TEST(svm_atomic_fetch_xor2)
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchMin
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range;
double max_range;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
useSVM),
min_range(-999.0), max_range(999.0)
{
StartValue(DataType().MaxValue());
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
}
virtual std::string ProgramCore()
std::string ProgramCore() override
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return " oldValues[tid] = atomic_fetch_min" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid],
MemoryOrder());
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d)
{
for (cl_uint i = 0; i < threadCount; i++)
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
startRefValues[i] = genrand_int32(d);
if (sizeof(HostDataType) >= 8)
startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
return " atomic_fetch_min" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
+ " oldValues[tid] = atomic_fetch_min" + postfix
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
+ ")0" + memoryOrderScope + ");\n";
}
else
{
return " oldValues[tid] = atomic_fetch_min" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope
+ ");\n";
}
}
void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
host_atomic_fetch_min(&destMemory[0], oldValues[tid],
MemoryOrder());
oldValues[tid] = host_atomic_fetch_min(
&destMemory[tid], (HostDataType)0, MemoryOrder());
}
else
{
oldValues[tid] = host_atomic_fetch_min(
&destMemory[0], oldValues[tid], MemoryOrder());
}
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = get_random_float(min_range, max_range, d);
}
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = genrand_int32(d);
if (sizeof(HostDataType) >= 8)
{
cl_ulong v = startRefValues[i];
v |= (cl_ulong)genrand_int32(d) << 16;
startRefValues[i] = v;
}
}
}
return true;
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue) override
{
expected = StartValue();
for (cl_uint i = 0; i < threadCount; i++)
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
if (startRefValues[i] < expected) expected = startRefValues[i];
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
if (startRefValues[i] < expected)
expected = startRefValues[i];
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
{
if (startRefValues[i] < expected) expected = startRefValues[i];
}
}
return true;
}
bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
{
if (whichDestValue == 0)
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
IsTestNotAsExpected(expected, testValues, whichDestValue);
return false; // ignore all but 0 which stores final result
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
testValues,
whichDestValue);
}
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
{
for (cl_uint i = 1; i < threadCount; i++)
{
log_error("Thread %d found %d mismatch(es)\n", i,
(cl_uint)refValues[i]);
correct = false;
}
}
return !correct;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::VerifyRefs(correct,
threadCount,
refValues,
finalValues);
}
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
return threadCount;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::NumResults(threadCount,
deviceID);
}
};
static int test_atomic_fetch_min_generic(cl_device_id deviceID,
@@ -2680,6 +2804,15 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
@@ -2742,54 +2875,178 @@ REGISTER_TEST(svm_atomic_fetch_min)
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchMax
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range;
double max_range;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
useSVM),
min_range(-999.0), max_range(999.0)
{
StartValue(DataType().MinValue());
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
}
virtual std::string ProgramCore()
std::string ProgramCore() override
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return " oldValues[tid] = atomic_fetch_max" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid],
MemoryOrder());
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d)
{
for (cl_uint i = 0; i < threadCount; i++)
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
startRefValues[i] = genrand_int32(d);
if (sizeof(HostDataType) >= 8)
startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
return " atomic_fetch_max" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
+ " oldValues[tid] = atomic_fetch_max" + postfix
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
+ ")0" + memoryOrderScope + ");\n";
}
else
{
return " oldValues[tid] = atomic_fetch_max" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope
+ ");\n";
}
}
void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
host_atomic_fetch_max(&destMemory[0], oldValues[tid],
MemoryOrder());
oldValues[tid] = host_atomic_fetch_max(
&destMemory[tid], (HostDataType)0, MemoryOrder());
}
else
{
oldValues[tid] = host_atomic_fetch_max(
&destMemory[0], oldValues[tid], MemoryOrder());
}
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = get_random_float(min_range, max_range, d);
}
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = genrand_int32(d);
if (sizeof(HostDataType) >= 8)
{
cl_ulong v = startRefValues[i];
v |= (cl_ulong)genrand_int32(d) << 16;
startRefValues[i] = v;
}
}
}
return true;
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue) override
{
expected = StartValue();
for (cl_uint i = 0; i < threadCount; i++)
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
if (startRefValues[i] > expected) expected = startRefValues[i];
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
if (startRefValues[i] > expected)
expected = startRefValues[i];
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
{
if (startRefValues[i] > expected) expected = startRefValues[i];
}
}
return true;
}
bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
cl_uint whichDestValue) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
{
if (whichDestValue == 0)
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
IsTestNotAsExpected(expected, testValues, whichDestValue);
return false; // ignore all but 0 which stores final result
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
testValues,
whichDestValue);
}
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
{
if (refValues[i] != StartValue())
{
log_error("Thread %d found %d mismatch(es)\n", i,
(cl_uint)refValues[i]);
correct = false;
}
}
return !correct;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::VerifyRefs(correct,
threadCount,
refValues,
finalValues);
}
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
{
return threadCount;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::NumResults(threadCount,
deviceID);
}
};
static int test_atomic_fetch_max_generic(cl_device_id deviceID,
@@ -2814,6 +3071,15 @@ static int test_atomic_fetch_max_generic(cl_device_id deviceID,
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>