mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-24 15:59:03 +00:00
Added support for cl_ext_float_atomics in CBasicTestFetchAddSpecialFloats with atomic_half (#2386)
Related to #2142, according to the work plan, extending CBasicTestFetchAddSpecialFloats with support for atomic_half.
This commit is contained in:
@@ -1418,6 +1418,219 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <typename HostAtomicType, typename HostDataType>
|
||||
class CBasicTestFetchAddSpecialFloats
|
||||
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
||||
|
||||
std::vector<HostDataType> ref_vals;
|
||||
|
||||
public:
|
||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
||||
using CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::MemoryOrderScopeStr;
|
||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
||||
using CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::DeclaredInProgram;
|
||||
CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
|
||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||
useSVM)
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
// StartValue is used as an index divisor in the following test
|
||||
// logic. It is set to the number of special values, which allows
|
||||
// threads to be mapped deterministically onto the input data array.
|
||||
// This enables repeated add operations arranged so that every
|
||||
// special value is added to every other one (“all-to-all”).
|
||||
|
||||
auto spec_vals = GetSpecialValues();
|
||||
StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
|
||||
CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::OldValueCheck(false);
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<HostDataType> &GetSpecialValues()
|
||||
{
|
||||
static std::vector<HostDataType> special_values;
|
||||
if (special_values.empty())
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
special_values = {
|
||||
0xffff, 0x0000, 0x7c00, /*INFINITY*/
|
||||
0xfc00, /*-INFINITY*/
|
||||
0x8000, /*-0*/
|
||||
0x7bff, /*HALF_MAX*/
|
||||
0x0400, /*HALF_MIN*/
|
||||
0x3c00, /* 1 */
|
||||
0xbc00, /* -1 */
|
||||
0x3555, /*nearest value to 1/3*/
|
||||
0x3bff, /*largest number less than one*/
|
||||
0xc000, /* -2 */
|
||||
0xfbff, /* -HALF_MAX */
|
||||
0x8400, /* -HALF_MIN */
|
||||
0x4248, /* M_PI_H */
|
||||
0xc248, /* -M_PI_H */
|
||||
0xbbff, /* Largest negative fraction */
|
||||
};
|
||||
|
||||
if (0 != (CL_FP_DENORM & gHalfFPConfig))
|
||||
{
|
||||
special_values.push_back(0x0001 /* Smallest denormal */);
|
||||
special_values.push_back(0x03ff /* Largest denormal */);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return special_values;
|
||||
}
|
||||
|
||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||
MTdata d) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (threadCount > ref_vals.size())
|
||||
{
|
||||
ref_vals.assign(threadCount, 0);
|
||||
auto spec_vals = GetSpecialValues();
|
||||
|
||||
cl_uint total_cnt = 0;
|
||||
while (total_cnt < threadCount)
|
||||
{
|
||||
cl_uint block_cnt =
|
||||
std::min((cl_int)(threadCount - total_cnt),
|
||||
(cl_int)spec_vals.size());
|
||||
memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
|
||||
sizeof(HostDataType) * block_cnt);
|
||||
total_cnt += block_cnt;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(startRefValues, ref_vals.data(),
|
||||
sizeof(HostDataType) * threadCount);
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
std::string ProgramCore() override
|
||||
{
|
||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
// The start_value variable (set by StartValue) is used
|
||||
// as a divisor of the thread index when selecting the operand for
|
||||
// atomic_fetch_add. This groups threads into blocks corresponding
|
||||
// to the number of special values and implements an “all-to-all”
|
||||
// addition pattern. As a result, each destination element is
|
||||
// updated using different combinations of input values, enabling
|
||||
// consistent comparison between host and device execution.
|
||||
|
||||
return std::string(DataType().AddSubOperandTypeName())
|
||||
+ " start_value = atomic_load_explicit(destMemory+tid, "
|
||||
"memory_order_relaxed, memory_scope_work_group);\n"
|
||||
" atomic_store_explicit(destMemory+tid, oldValues[tid], "
|
||||
"memory_order_relaxed, memory_scope_work_group);\n"
|
||||
" atomic_fetch_add"
|
||||
+ postfix + "(&destMemory[tid], ("
|
||||
+ DataType().AddSubOperandTypeName()
|
||||
+ ")oldValues[tid/(int)start_value]" + memoryOrderScope
|
||||
+ ");\n";
|
||||
}
|
||||
}
|
||||
void HostFunction(cl_uint tid, cl_uint threadCount,
|
||||
volatile HostAtomicType *destMemory,
|
||||
HostDataType *oldValues) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
auto spec_vals = GetSpecialValues();
|
||||
host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
|
||||
MEMORY_ORDER_SEQ_CST);
|
||||
host_atomic_fetch_add(
|
||||
&destMemory[tid],
|
||||
(HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder());
|
||||
}
|
||||
}
|
||||
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
||||
HostDataType *startRefValues,
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
expected = StartValue();
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
auto spec_vals = GetSpecialValues();
|
||||
expected = cl_half_from_float(
|
||||
cl_half_to_float(startRefValues[whichDestValue])
|
||||
+ cl_half_to_float(
|
||||
startRefValues[whichDestValue / spec_vals.size()]),
|
||||
gHalfRoundingMode);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsTestNotAsExpected(const HostDataType &expected,
|
||||
const std::vector<HostAtomicType> &testValues,
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
return static_cast<cl_half>(expected) != testValues[whichDestValue];
|
||||
}
|
||||
|
||||
return CBasicTestMemOrderScope<
|
||||
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
||||
testValues,
|
||||
whichDestValue);
|
||||
}
|
||||
|
||||
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
if (DeclaredInProgram()) return 0; // skip test - not applicable
|
||||
|
||||
if (LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0;
|
||||
|
||||
if (!CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::LocalMemory()
|
||||
&& CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::DeclaredInProgram())
|
||||
{
|
||||
if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0;
|
||||
}
|
||||
}
|
||||
return CBasicTestMemOrderScope<
|
||||
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
||||
queue);
|
||||
}
|
||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
return CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::NumResults(threadCount,
|
||||
deviceID);
|
||||
}
|
||||
};
|
||||
|
||||
static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
||||
cl_context context,
|
||||
cl_command_queue queue,
|
||||
@@ -1443,6 +1656,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
||||
|
||||
if (gFloatAtomicsSupported)
|
||||
{
|
||||
auto spec_vals_halfs =
|
||||
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
|
||||
HOST_HALF>::GetSpecialValues();
|
||||
|
||||
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF, HOST_HALF>
|
||||
test_spec_half(TYPE_ATOMIC_HALF, useSVM);
|
||||
EXECUTE_TEST(error,
|
||||
test_spec_half.Execute(deviceID, context, queue,
|
||||
spec_vals_halfs.size()
|
||||
* spec_vals_halfs.size()));
|
||||
|
||||
CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
||||
TYPE_ATOMIC_HALF, useSVM);
|
||||
EXECUTE_TEST(error,
|
||||
|
||||
Reference in New Issue
Block a user