mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Added support for cl_ext_float_atomics in CBasicTestFetchAddSpecialFloats with atomic_float (#2371)
Related to #2142, according to the work plan, extending CBasicTestFetchAddSpecialFloats with support for atomic_float.
This commit is contained in:
@@ -22,13 +22,13 @@
|
|||||||
|
|
||||||
#include "host_atomics.h"
|
#include "host_atomics.h"
|
||||||
|
|
||||||
#include "CL/cl_half.h"
|
|
||||||
|
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "CL/cl_half.h"
|
||||||
|
|
||||||
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
|
#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
|
||||||
#define MAX_HOST_THREADS GetThreadCount()
|
#define MAX_HOST_THREADS GetThreadCount()
|
||||||
|
|
||||||
@@ -76,9 +76,8 @@ extern int
|
|||||||
gMaxDeviceThreads; // maximum number of threads executed on OCL device
|
gMaxDeviceThreads; // maximum number of threads executed on OCL device
|
||||||
extern cl_device_atomic_capabilities gAtomicMemCap,
|
extern cl_device_atomic_capabilities gAtomicMemCap,
|
||||||
gAtomicFenceCap; // atomic memory and fence capabilities for this device
|
gAtomicFenceCap; // atomic memory and fence capabilities for this device
|
||||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
|
||||||
extern bool gFloatAtomicsSupported;
|
extern cl_device_fp_config gFloatFPConfig;
|
||||||
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
|
|
||||||
extern cl_device_fp_config gHalfFPConfig;
|
extern cl_device_fp_config gHalfFPConfig;
|
||||||
|
|
||||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||||
@@ -183,6 +182,7 @@ public:
|
|||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool
|
virtual bool
|
||||||
IsTestNotAsExpected(const HostDataType &expected,
|
IsTestNotAsExpected(const HostDataType &expected,
|
||||||
const std::vector<HostAtomicType> &testValues,
|
const std::vector<HostAtomicType> &testValues,
|
||||||
@@ -926,7 +926,14 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
|
|||||||
|
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
{
|
{
|
||||||
ss << std::setprecision(10) << _startValue;
|
if (std::isinf(_startValue))
|
||||||
|
ss << (_startValue < 0 ? "-" : "") << "INFINITY";
|
||||||
|
else if (std::isnan(_startValue))
|
||||||
|
ss << "0.0f / 0.0f";
|
||||||
|
else
|
||||||
|
ss << std::setprecision(
|
||||||
|
std::numeric_limits<HostDataType>::max_digits10)
|
||||||
|
<< _startValue;
|
||||||
}
|
}
|
||||||
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -31,6 +31,8 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio
|
|||||||
int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
|
int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
|
||||||
cl_device_atomic_capabilities gAtomicMemCap,
|
cl_device_atomic_capabilities gAtomicMemCap,
|
||||||
gAtomicFenceCap; // atomic memory and fence capabilities for this device
|
gAtomicFenceCap; // atomic memory and fence capabilities for this device
|
||||||
|
|
||||||
|
cl_device_fp_config gFloatFPConfig = 0;
|
||||||
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
||||||
bool gFloatAtomicsSupported = false;
|
bool gFloatAtomicsSupported = false;
|
||||||
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
|
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
|
||||||
@@ -148,6 +150,13 @@ test_status InitCL(cl_device_id device) {
|
|||||||
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
|
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
|
||||||
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
|
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
|
||||||
|
|
||||||
|
error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
|
||||||
|
sizeof(gFloatFPConfig), &gFloatFPConfig, NULL);
|
||||||
|
test_error_ret(
|
||||||
|
error,
|
||||||
|
"Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)",
|
||||||
|
TEST_FAIL);
|
||||||
|
|
||||||
if (is_extension_available(device, "cl_khr_fp16"))
|
if (is_extension_available(device, "cl_khr_fp16"))
|
||||||
{
|
{
|
||||||
cl_int error = clGetDeviceInfo(
|
cl_int error = clGetDeviceInfo(
|
||||||
|
|||||||
@@ -1437,13 +1437,21 @@ public:
|
|||||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||||
useSVM)
|
useSVM)
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
// StartValue is used as an index divisor in the following test
|
||||||
|
// logic. It is set to the number of special values, which allows
|
||||||
|
// threads to be mapped deterministically onto the input data array.
|
||||||
|
// This enables repeated add operations arranged so that every
|
||||||
|
// special value is added to every other one (“all-to-all”).
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
|
{
|
||||||
|
auto spec_vals = GetSpecialValues();
|
||||||
|
StartValue(spec_vals.size());
|
||||||
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::OldValueCheck(false);
|
||||||
|
}
|
||||||
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||||
{
|
{
|
||||||
// StartValue is used as an index divisor in the following test
|
|
||||||
// logic. It is set to the number of special values, which allows
|
|
||||||
// threads to be mapped deterministically onto the input data array.
|
|
||||||
// This enables repeated add operations arranged so that every
|
|
||||||
// special value is added to every other one (“all-to-all”).
|
|
||||||
|
|
||||||
auto spec_vals = GetSpecialValues();
|
auto spec_vals = GetSpecialValues();
|
||||||
StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
|
StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
|
||||||
@@ -1454,10 +1462,42 @@ public:
|
|||||||
|
|
||||||
static std::vector<HostDataType> &GetSpecialValues()
|
static std::vector<HostDataType> &GetSpecialValues()
|
||||||
{
|
{
|
||||||
|
const float test_value_zero = 0.0f;
|
||||||
|
const float test_value_minus_zero = -0.0f;
|
||||||
|
const float test_value_without_fraction = 2.0f;
|
||||||
|
const float test_value_with_fraction = 2.2f;
|
||||||
|
|
||||||
static std::vector<HostDataType> special_values;
|
static std::vector<HostDataType> special_values;
|
||||||
if (special_values.empty())
|
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if (special_values.empty())
|
||||||
|
{
|
||||||
|
special_values = {
|
||||||
|
static_cast<HostDataType>(test_value_minus_zero),
|
||||||
|
static_cast<HostDataType>(test_value_zero),
|
||||||
|
static_cast<HostDataType>(test_value_without_fraction),
|
||||||
|
static_cast<HostDataType>(test_value_with_fraction),
|
||||||
|
std::numeric_limits<HostDataType>::infinity(),
|
||||||
|
std::numeric_limits<HostDataType>::quiet_NaN(),
|
||||||
|
std::numeric_limits<HostDataType>::signaling_NaN(),
|
||||||
|
-std::numeric_limits<HostDataType>::infinity(),
|
||||||
|
-std::numeric_limits<HostDataType>::quiet_NaN(),
|
||||||
|
-std::numeric_limits<HostDataType>::signaling_NaN(),
|
||||||
|
std::numeric_limits<HostDataType>::lowest(),
|
||||||
|
std::numeric_limits<HostDataType>::min(),
|
||||||
|
std::numeric_limits<HostDataType>::max(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (0 != (CL_FP_DENORM & gFloatFPConfig))
|
||||||
|
{
|
||||||
|
special_values.push_back(
|
||||||
|
std::numeric_limits<HostDataType>::denorm_min());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||||
|
{
|
||||||
|
if (special_values.empty())
|
||||||
{
|
{
|
||||||
special_values = {
|
special_values = {
|
||||||
0xffff, 0x0000, 0x7c00, /*INFINITY*/
|
0xffff, 0x0000, 0x7c00, /*INFINITY*/
|
||||||
@@ -1491,7 +1531,10 @@ public:
|
|||||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
MTdata d) override
|
MTdata d) override
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (
|
||||||
|
std::is_same_v<
|
||||||
|
HostDataType,
|
||||||
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
{
|
{
|
||||||
if (threadCount > ref_vals.size())
|
if (threadCount > ref_vals.size())
|
||||||
{
|
{
|
||||||
@@ -1522,7 +1565,10 @@ public:
|
|||||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||||
|
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (
|
||||||
|
std::is_same_v<
|
||||||
|
HostDataType,
|
||||||
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
{
|
{
|
||||||
// The start_value variable (set by StartValue) is used
|
// The start_value variable (set by StartValue) is used
|
||||||
// as a divisor of the thread index when selecting the operand for
|
// as a divisor of the thread index when selecting the operand for
|
||||||
@@ -1548,7 +1594,10 @@ public:
|
|||||||
volatile HostAtomicType *destMemory,
|
volatile HostAtomicType *destMemory,
|
||||||
HostDataType *oldValues) override
|
HostDataType *oldValues) override
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (
|
||||||
|
std::is_same_v<
|
||||||
|
HostDataType,
|
||||||
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
{
|
{
|
||||||
auto spec_vals = GetSpecialValues();
|
auto spec_vals = GetSpecialValues();
|
||||||
host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
|
host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
|
||||||
@@ -1563,7 +1612,14 @@ public:
|
|||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
|
{
|
||||||
|
auto spec_vals = GetSpecialValues();
|
||||||
|
expected = startRefValues[whichDestValue]
|
||||||
|
+ startRefValues[whichDestValue / spec_vals.size()];
|
||||||
|
}
|
||||||
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||||
{
|
{
|
||||||
auto spec_vals = GetSpecialValues();
|
auto spec_vals = GetSpecialValues();
|
||||||
expected = cl_half_from_float(
|
expected = cl_half_from_float(
|
||||||
@@ -1575,13 +1631,18 @@ public:
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsTestNotAsExpected(const HostDataType &expected,
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
||||||
const std::vector<HostAtomicType> &testValues,
|
const std::vector<HostAtomicType> &testValues,
|
||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
{
|
||||||
|
if (std::isnan(testValues[whichDestValue]) && std::isnan(expected))
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
return expected != testValues[whichDestValue];
|
||||||
|
}
|
||||||
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||||
{
|
{
|
||||||
return static_cast<cl_half>(expected) != testValues[whichDestValue];
|
return static_cast<cl_half>(expected) != testValues[whichDestValue];
|
||||||
}
|
}
|
||||||
@@ -1591,11 +1652,28 @@ public:
|
|||||||
testValues,
|
testValues,
|
||||||
whichDestValue);
|
whichDestValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||||
cl_command_queue queue) override
|
cl_command_queue queue) override
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
|
{
|
||||||
|
if (LocalMemory()
|
||||||
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||||
|
return 0; // skip test - not applicable
|
||||||
|
|
||||||
|
if (!LocalMemory()
|
||||||
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::LocalMemory()
|
||||||
|
&& CBasicTestMemOrderScope<HostAtomicType,
|
||||||
|
HostDataType>::DeclaredInProgram())
|
||||||
|
{
|
||||||
|
if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||||
{
|
{
|
||||||
if (DeclaredInProgram()) return 0; // skip test - not applicable
|
if (DeclaredInProgram()) return 0; // skip test - not applicable
|
||||||
|
|
||||||
@@ -1621,7 +1699,10 @@ public:
|
|||||||
}
|
}
|
||||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (
|
||||||
|
std::is_same_v<
|
||||||
|
HostDataType,
|
||||||
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
||||||
{
|
{
|
||||||
return threadCount;
|
return threadCount;
|
||||||
}
|
}
|
||||||
@@ -1656,6 +1737,17 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
|||||||
|
|
||||||
if (gFloatAtomicsSupported)
|
if (gFloatAtomicsSupported)
|
||||||
{
|
{
|
||||||
|
auto spec_vals_fp32 =
|
||||||
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT,
|
||||||
|
HOST_FLOAT>::GetSpecialValues();
|
||||||
|
|
||||||
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
|
||||||
|
test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
|
||||||
|
EXECUTE_TEST(error,
|
||||||
|
test_spec_float.Execute(deviceID, context, queue,
|
||||||
|
spec_vals_fp32.size()
|
||||||
|
* spec_vals_fp32.size()));
|
||||||
|
|
||||||
auto spec_vals_halfs =
|
auto spec_vals_halfs =
|
||||||
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
|
||||||
HOST_HALF>::GetSpecialValues();
|
HOST_HALF>::GetSpecialValues();
|
||||||
|
|||||||
Reference in New Issue
Block a user