mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
c11_atomics: unify host half representation and conversion with wrapper class (#2503)
Introduce `HostHalf` wrapper class to eliminate explicit `cl_half_from_float` and `cl_half_to_float` conversions throughout the test code. The wrapper provides semantic value constructors/operators and automatic conversions, simplifying half-precision arithmetic operations. Key improvements: - `HostHalf` class with operator overloading for arithmetic and comparisons - Type traits `is_host_atomic_fp_v` and `is_host_fp_v` for generic FP handling - Unified floating-point atomic operations (add/sub/min/max/exchange) - Removed 300+ lines of half-specific conditional branches - Consistent calculation for all FP types
This commit is contained in:
@@ -194,9 +194,9 @@ template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;
|
|||||||
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
|
template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
|
||||||
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
|
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
|
||||||
template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
|
template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
|
||||||
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
|
template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MinValue()
|
||||||
{
|
{
|
||||||
return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode);
|
return -CL_HALF_MAX;
|
||||||
}
|
}
|
||||||
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
|
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
|
||||||
{
|
{
|
||||||
@@ -217,9 +217,9 @@ template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
|
|||||||
}
|
}
|
||||||
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
|
template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
|
||||||
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
|
template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
|
||||||
template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
|
template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MaxValue()
|
||||||
{
|
{
|
||||||
return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
|
return CL_HALF_MAX;
|
||||||
}
|
}
|
||||||
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
|
template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -183,7 +183,8 @@ public:
|
|||||||
const std::vector<HostAtomicType> &testValues,
|
const std::vector<HostAtomicType> &testValues,
|
||||||
cl_uint whichDestValue)
|
cl_uint whichDestValue)
|
||||||
{
|
{
|
||||||
return expected != testValues[whichDestValue];
|
return expected
|
||||||
|
!= static_cast<HostDataType>(testValues[whichDestValue]);
|
||||||
}
|
}
|
||||||
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
MTdata d)
|
MTdata d)
|
||||||
@@ -911,12 +912,9 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
|
|||||||
+ ss.str() + "] = {\n";
|
+ ss.str() + "] = {\n";
|
||||||
ss.str("");
|
ss.str("");
|
||||||
|
|
||||||
if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
== TYPE_ATOMIC_FLOAT)
|
ss << std::hexfloat
|
||||||
ss << std::setprecision(10) << _startValue;
|
<< _startValue; // use hex format for accurate representation
|
||||||
else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
|
|
||||||
== TYPE_ATOMIC_HALF)
|
|
||||||
ss << cl_half_to_float(static_cast<cl_half>(_startValue));
|
|
||||||
else
|
else
|
||||||
ss << _startValue;
|
ss << _startValue;
|
||||||
|
|
||||||
@@ -1305,7 +1303,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
|
|||||||
numDestItems = NumResults(threadCount, deviceID);
|
numDestItems = NumResults(threadCount, deviceID);
|
||||||
|
|
||||||
destItems.resize(numDestItems);
|
destItems.resize(numDestItems);
|
||||||
for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue;
|
for (cl_uint i = 0; i < numDestItems; i++)
|
||||||
|
destItems[i] = static_cast<HostAtomicType>(_startValue);
|
||||||
|
|
||||||
// Create main buffer with atomic variables (array size dependent on
|
// Create main buffer with atomic variables (array size dependent on
|
||||||
// particular test)
|
// particular test)
|
||||||
@@ -1483,7 +1482,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
|
|||||||
std::stringstream logLine;
|
std::stringstream logLine;
|
||||||
logLine << "ERROR: Result " << i
|
logLine << "ERROR: Result " << i
|
||||||
<< " from kernel does not validate! (should be " << expected
|
<< " from kernel does not validate! (should be " << expected
|
||||||
<< ", was " << destItems[i] << ")\n";
|
<< ", was " << static_cast<HostDataType>(destItems[i])
|
||||||
|
<< ")\n";
|
||||||
log_error("%s", logLine.str().c_str());
|
log_error("%s", logLine.str().c_str());
|
||||||
for (i = 0; i < threadCount; i++)
|
for (i = 0; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
@@ -1550,7 +1550,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
|
|||||||
// clEnqueueNDRangeKernel
|
// clEnqueueNDRangeKernel
|
||||||
{
|
{
|
||||||
/* Re-write the starting value */
|
/* Re-write the starting value */
|
||||||
for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue;
|
for (size_t i = 0; i < numDestItems; i++)
|
||||||
|
destItems[i] = static_cast<HostAtomicType>(_startValue);
|
||||||
refValues[0] = 0;
|
refValues[0] = 0;
|
||||||
if (deviceThreadCount > 0)
|
if (deviceThreadCount > 0)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -24,6 +24,8 @@
|
|||||||
#include "Windows.h"
|
#include "Windows.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||||
|
|
||||||
//flag for test verification (good test should discover non-atomic functions and fail)
|
//flag for test verification (good test should discover non-atomic functions and fail)
|
||||||
//#define NON_ATOMIC_FUNCTIONS
|
//#define NON_ATOMIC_FUNCTIONS
|
||||||
|
|
||||||
@@ -37,6 +39,93 @@ enum TExplicitMemoryOrderType
|
|||||||
MEMORY_ORDER_SEQ_CST
|
MEMORY_ORDER_SEQ_CST
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Wrapper class for half-precision
|
||||||
|
class HostHalf {
|
||||||
|
public:
|
||||||
|
// Convert from semantic values
|
||||||
|
HostHalf(cl_uint value = 0)
|
||||||
|
: value(
|
||||||
|
cl_half_from_float(static_cast<float>(value), gHalfRoundingMode))
|
||||||
|
{}
|
||||||
|
HostHalf(int value): HostHalf(static_cast<cl_uint>(value)) {}
|
||||||
|
HostHalf(float value): value(cl_half_from_float(value, gHalfRoundingMode))
|
||||||
|
{}
|
||||||
|
HostHalf(double value): HostHalf(static_cast<float>(value)) {}
|
||||||
|
|
||||||
|
// Convert to semantic values
|
||||||
|
operator cl_uint() const
|
||||||
|
{
|
||||||
|
return static_cast<cl_uint>(cl_half_to_float(value));
|
||||||
|
}
|
||||||
|
operator float() const { return cl_half_to_float(value); }
|
||||||
|
operator double() const
|
||||||
|
{
|
||||||
|
return static_cast<double>(cl_half_to_float(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construct from bit representation
|
||||||
|
HostHalf(cl_half value): value(value) {}
|
||||||
|
|
||||||
|
// Get the underlying bit representation
|
||||||
|
operator cl_half() const { return value; }
|
||||||
|
|
||||||
|
HostHalf operator-() const
|
||||||
|
{
|
||||||
|
return HostHalf(
|
||||||
|
cl_half_from_float(-cl_half_to_float(value), gHalfRoundingMode));
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GENERIC_OP(RetType, op) \
|
||||||
|
RetType operator op(const HostHalf &other) const \
|
||||||
|
{ \
|
||||||
|
return RetType(cl_half_to_float(value) \
|
||||||
|
op cl_half_to_float(other.value)); \
|
||||||
|
}
|
||||||
|
|
||||||
|
GENERIC_OP(bool, ==)
|
||||||
|
GENERIC_OP(bool, !=)
|
||||||
|
GENERIC_OP(bool, <)
|
||||||
|
GENERIC_OP(bool, <=)
|
||||||
|
GENERIC_OP(bool, >)
|
||||||
|
GENERIC_OP(bool, >=)
|
||||||
|
GENERIC_OP(HostHalf, +)
|
||||||
|
GENERIC_OP(HostHalf, -)
|
||||||
|
GENERIC_OP(HostHalf, *)
|
||||||
|
GENERIC_OP(HostHalf, /)
|
||||||
|
#undef GENERIC_OP
|
||||||
|
|
||||||
|
#define INPLACE_OP(op) \
|
||||||
|
HostHalf &operator op##=(const HostHalf &other) \
|
||||||
|
{ \
|
||||||
|
value = cl_half_from_float(cl_half_to_float(value) \
|
||||||
|
op cl_half_to_float(other.value), \
|
||||||
|
gHalfRoundingMode); \
|
||||||
|
return *this; \
|
||||||
|
}
|
||||||
|
INPLACE_OP(+)
|
||||||
|
INPLACE_OP(-)
|
||||||
|
INPLACE_OP(*)
|
||||||
|
INPLACE_OP(/)
|
||||||
|
#undef INPLACE_OP
|
||||||
|
|
||||||
|
friend std::ostream &operator<<(std::ostream &os, const HostHalf &hh)
|
||||||
|
{
|
||||||
|
float f = cl_half_to_float(hh.value);
|
||||||
|
os << f;
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
cl_half value;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace std {
|
||||||
|
inline HostHalf abs(const HostHalf &value)
|
||||||
|
{
|
||||||
|
return value < HostHalf(0) ? -value : value;
|
||||||
|
}
|
||||||
|
} // namespace std
|
||||||
|
|
||||||
// host atomic types (applicable for atomic functions supported on host OS)
|
// host atomic types (applicable for atomic functions supported on host OS)
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#define HOST_ATOMIC_INT unsigned long
|
#define HOST_ATOMIC_INT unsigned long
|
||||||
@@ -73,7 +162,7 @@ enum TExplicitMemoryOrderType
|
|||||||
#define HOST_UINT cl_uint
|
#define HOST_UINT cl_uint
|
||||||
#define HOST_LONG cl_long
|
#define HOST_LONG cl_long
|
||||||
#define HOST_ULONG cl_ulong
|
#define HOST_ULONG cl_ulong
|
||||||
#define HOST_HALF cl_half
|
#define HOST_HALF HostHalf
|
||||||
#define HOST_FLOAT cl_float
|
#define HOST_FLOAT cl_float
|
||||||
#define HOST_DOUBLE cl_double
|
#define HOST_DOUBLE cl_double
|
||||||
|
|
||||||
@@ -91,6 +180,18 @@ enum TExplicitMemoryOrderType
|
|||||||
|
|
||||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||||
|
|
||||||
|
template <typename HostAtomicType>
|
||||||
|
constexpr bool is_host_atomic_fp_v =
|
||||||
|
std::disjunction_v<std::is_same<HostAtomicType, HOST_ATOMIC_HALF>,
|
||||||
|
std::is_same<HostAtomicType, HOST_ATOMIC_FLOAT>,
|
||||||
|
std::is_same<HostAtomicType, HOST_ATOMIC_DOUBLE>>;
|
||||||
|
|
||||||
|
template <typename HostDataType>
|
||||||
|
constexpr bool is_host_fp_v =
|
||||||
|
std::disjunction_v<std::is_same<HostDataType, HOST_HALF>,
|
||||||
|
std::is_same<HostDataType, HOST_FLOAT>,
|
||||||
|
std::is_same<HostDataType, HOST_DOUBLE>>;
|
||||||
|
|
||||||
// host atomic functions
|
// host atomic functions
|
||||||
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
|
void host_atomic_thread_fence(TExplicitMemoryOrderType order);
|
||||||
|
|
||||||
@@ -98,24 +199,13 @@ template <typename AtomicType, typename CorrespondingType>
|
|||||||
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
|
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
|
||||||
TExplicitMemoryOrderType order)
|
TExplicitMemoryOrderType order)
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
if constexpr (is_host_atomic_fp_v<AtomicType>)
|
||||||
{
|
{
|
||||||
static std::mutex mx;
|
static std::mutex mx;
|
||||||
std::lock_guard<std::mutex> lock(mx);
|
std::lock_guard<std::mutex> lock(mx);
|
||||||
CorrespondingType old_value = *a;
|
CorrespondingType old_value = *a;
|
||||||
*a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
|
CorrespondingType new_value = old_value + c;
|
||||||
gHalfRoundingMode);
|
*a = static_cast<AtomicType>(new_value);
|
||||||
return old_value;
|
|
||||||
}
|
|
||||||
else if constexpr (
|
|
||||||
std::is_same_v<
|
|
||||||
AtomicType,
|
|
||||||
HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
|
|
||||||
{
|
|
||||||
static std::mutex mx;
|
|
||||||
std::lock_guard<std::mutex> lock(mx);
|
|
||||||
CorrespondingType old_value = *a;
|
|
||||||
*a += c;
|
|
||||||
return old_value;
|
return old_value;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -135,24 +225,13 @@ template <typename AtomicType, typename CorrespondingType>
|
|||||||
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
|
CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
|
||||||
TExplicitMemoryOrderType order)
|
TExplicitMemoryOrderType order)
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_atomic_fp_v<AtomicType>)
|
||||||
std::is_same_v<
|
|
||||||
AtomicType,
|
|
||||||
HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
|
||||||
{
|
{
|
||||||
static std::mutex mx;
|
static std::mutex mx;
|
||||||
std::lock_guard<std::mutex> lock(mx);
|
std::lock_guard<std::mutex> lock(mx);
|
||||||
CorrespondingType old_value = *a;
|
CorrespondingType old_value = *a;
|
||||||
*a -= c;
|
CorrespondingType new_value = old_value - c;
|
||||||
return old_value;
|
*a = static_cast<AtomicType>(new_value);
|
||||||
}
|
|
||||||
else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
|
||||||
{
|
|
||||||
static std::mutex mx;
|
|
||||||
std::lock_guard<std::mutex> lock(mx);
|
|
||||||
CorrespondingType old_value = *a;
|
|
||||||
*a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
return old_value;
|
return old_value;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -173,12 +252,14 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType
|
|||||||
TExplicitMemoryOrderType order)
|
TExplicitMemoryOrderType order)
|
||||||
{
|
{
|
||||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||||
if (sizeof(CorrespondingType) == 2)
|
if constexpr (sizeof(CorrespondingType) == 2)
|
||||||
return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a), c);
|
return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a),
|
||||||
|
*reinterpret_cast<SHORT *>(&c));
|
||||||
else
|
else
|
||||||
return InterlockedExchange(reinterpret_cast<volatile LONG *>(a), c);
|
return InterlockedExchange(reinterpret_cast<volatile LONG *>(a),
|
||||||
|
*reinterpret_cast<LONG *>(&c));
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
return __sync_lock_test_and_set(a, c);
|
return __sync_lock_test_and_set(a, *reinterpret_cast<AtomicType *>(&c));
|
||||||
#else
|
#else
|
||||||
log_info("Host function not implemented: atomic_exchange\n");
|
log_info("Host function not implemented: atomic_exchange\n");
|
||||||
return 0;
|
return 0;
|
||||||
@@ -195,30 +276,14 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
|
|||||||
TExplicitMemoryOrderType order_failure)
|
TExplicitMemoryOrderType order_failure)
|
||||||
{
|
{
|
||||||
CorrespondingType tmp;
|
CorrespondingType tmp;
|
||||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
|
if constexpr (is_host_atomic_fp_v<AtomicType>)
|
||||||
{
|
{
|
||||||
static std::mutex mtx;
|
static std::mutex mtx;
|
||||||
std::lock_guard<std::mutex> lock(mtx);
|
std::lock_guard<std::mutex> lock(mtx);
|
||||||
tmp = *reinterpret_cast<volatile cl_half *>(a);
|
tmp = static_cast<CorrespondingType>(*a);
|
||||||
|
|
||||||
if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
|
|
||||||
{
|
|
||||||
*reinterpret_cast<volatile cl_half *>(a) = desired;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
*expected = tmp;
|
|
||||||
}
|
|
||||||
else if constexpr (
|
|
||||||
std::is_same_v<
|
|
||||||
AtomicType,
|
|
||||||
HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
|
||||||
{
|
|
||||||
static std::mutex mtx;
|
|
||||||
std::lock_guard<std::mutex> lock(mtx);
|
|
||||||
tmp = *reinterpret_cast<volatile float *>(a);
|
|
||||||
if (tmp == *expected)
|
if (tmp == *expected)
|
||||||
{
|
{
|
||||||
*a = desired;
|
*a = static_cast<AtomicType>(desired);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
*expected = tmp;
|
*expected = tmp;
|
||||||
@@ -244,8 +309,8 @@ CorrespondingType host_atomic_load(volatile AtomicType *a,
|
|||||||
TExplicitMemoryOrderType order)
|
TExplicitMemoryOrderType order)
|
||||||
{
|
{
|
||||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||||
if (sizeof(CorrespondingType) == 2)
|
if constexpr (sizeof(CorrespondingType) == 2)
|
||||||
auto prev = InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
|
return InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
|
||||||
else
|
else
|
||||||
return InterlockedExchangeAdd(reinterpret_cast<volatile LONG *>(a), 0);
|
return InterlockedExchangeAdd(reinterpret_cast<volatile LONG *>(a), 0);
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
|
|||||||
@@ -94,13 +94,7 @@ public:
|
|||||||
HostDataType *startRefValues,
|
HostDataType *startRefValues,
|
||||||
cl_uint whichDestValue)
|
cl_uint whichDestValue)
|
||||||
{
|
{
|
||||||
if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
|
expected = static_cast<HostDataType>(whichDestValue);
|
||||||
._type
|
|
||||||
!= TYPE_ATOMIC_HALF)
|
|
||||||
expected = (HostDataType)whichDestValue;
|
|
||||||
else
|
|
||||||
expected = cl_half_from_float(static_cast<float>(whichDestValue),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -401,13 +395,7 @@ public:
|
|||||||
HostDataType *startRefValues,
|
HostDataType *startRefValues,
|
||||||
cl_uint whichDestValue)
|
cl_uint whichDestValue)
|
||||||
{
|
{
|
||||||
if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
|
expected = static_cast<HostDataType>(whichDestValue);
|
||||||
._type
|
|
||||||
!= TYPE_ATOMIC_HALF)
|
|
||||||
expected = (HostDataType)whichDestValue;
|
|
||||||
else
|
|
||||||
expected = cl_half_from_float(static_cast<float>(whichDestValue),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
||||||
@@ -417,25 +405,11 @@ public:
|
|||||||
correct = true;
|
correct = true;
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, cl_half>)
|
if (refValues[i] != (HostDataType)i)
|
||||||
{
|
{
|
||||||
HostDataType test = cl_half_from_float(static_cast<float>(i),
|
log_error("Invalid value for thread %u\n", (cl_uint)i);
|
||||||
gHalfRoundingMode);
|
correct = false;
|
||||||
if (refValues[i] != test)
|
return true;
|
||||||
{
|
|
||||||
log_error("Invalid value for thread %u\n", (cl_uint)i);
|
|
||||||
correct = false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (refValues[i] != (HostDataType)i)
|
|
||||||
{
|
|
||||||
log_error("Invalid value for thread %u\n", (cl_uint)i);
|
|
||||||
correct = false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@@ -553,11 +527,7 @@ public:
|
|||||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||||
useSVM)
|
useSVM)
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
StartValue(1234);
|
||||||
StartValue(cl_half_from_float(static_cast<float>(1234),
|
|
||||||
gHalfRoundingMode));
|
|
||||||
else
|
|
||||||
StartValue(123456);
|
|
||||||
}
|
}
|
||||||
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||||
cl_command_queue queue)
|
cl_command_queue queue)
|
||||||
@@ -619,32 +589,19 @@ public:
|
|||||||
/* Any repeated value is treated as an error */
|
/* Any repeated value is treated as an error */
|
||||||
std::vector<bool> tidFound(threadCount);
|
std::vector<bool> tidFound(threadCount);
|
||||||
bool startValueFound = false;
|
bool startValueFound = false;
|
||||||
cl_uint startVal = StartValue();
|
cl_uint startVal = static_cast<cl_uint>(StartValue());
|
||||||
|
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
|
||||||
startVal = static_cast<cl_uint>(
|
|
||||||
cl_half_to_float(static_cast<cl_half>(StartValue())));
|
|
||||||
|
|
||||||
for (cl_uint i = 0; i <= threadCount; i++)
|
for (cl_uint i = 0; i <= threadCount; i++)
|
||||||
{
|
{
|
||||||
cl_uint value = 0;
|
cl_uint value = 0;
|
||||||
if (i == threadCount)
|
if (i == threadCount)
|
||||||
{
|
{
|
||||||
if constexpr (!std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
value = static_cast<cl_uint>(
|
||||||
value =
|
static_cast<HostDataType>(finalValues[0]));
|
||||||
(cl_uint)finalValues[0]; // additional value from atomic
|
|
||||||
// variable (last written)
|
|
||||||
else
|
|
||||||
value =
|
|
||||||
cl_half_to_float(static_cast<cl_half>(finalValues[0]));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if constexpr (!std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
value = static_cast<cl_uint>(refValues[i]);
|
||||||
value = (cl_uint)refValues[i];
|
|
||||||
else
|
|
||||||
value =
|
|
||||||
cl_half_to_float(static_cast<cl_half>(refValues[i]));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (value == startVal)
|
if (value == startVal)
|
||||||
@@ -1201,85 +1158,24 @@ public:
|
|||||||
useSVM),
|
useSVM),
|
||||||
min_range(-999.0), max_range(999.0), max_error(0.0)
|
min_range(-999.0), max_range(999.0), max_error(0.0)
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
StartValue((HostDataType)0.0);
|
StartValue((HostDataType)0.0);
|
||||||
CBasicTestMemOrderScope<HostAtomicType,
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
HostDataType>::OldValueCheck(false);
|
HostDataType>::OldValueCheck(false);
|
||||||
|
|
||||||
|
// Narrow down range for half to avoid overflow to infinity
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||||
|
{
|
||||||
|
min_range = -50.0;
|
||||||
|
max_range = 50.0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
|
|
||||||
{
|
|
||||||
cl_half sum = 0;
|
|
||||||
for (auto it = begin; it != end; ++it)
|
|
||||||
{
|
|
||||||
sum = cl_half_from_float(cl_half_to_float(sum)
|
|
||||||
+ cl_half_to_float(*it),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
}
|
|
||||||
return cl_half_to_float(sum);
|
|
||||||
}
|
|
||||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
MTdata d) override
|
MTdata d) override
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
{
|
|
||||||
if (threadCount > ref_vals.size())
|
|
||||||
{
|
|
||||||
ref_vals.resize(threadCount);
|
|
||||||
|
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
|
||||||
ref_vals[i] = cl_half_from_float(
|
|
||||||
get_random_float(min_range, max_range, d),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
|
|
||||||
memcpy(startRefValues, ref_vals.data(),
|
|
||||||
sizeof(HostDataType) * ref_vals.size());
|
|
||||||
|
|
||||||
// Estimate highest possible summation error for given set.
|
|
||||||
std::vector<float> sums;
|
|
||||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
|
||||||
[](cl_half a, cl_half b) {
|
|
||||||
return cl_half_to_float(a) < cl_half_to_float(b);
|
|
||||||
});
|
|
||||||
|
|
||||||
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
|
|
||||||
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
|
|
||||||
|
|
||||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
|
||||||
[](cl_half a, cl_half b) {
|
|
||||||
return std::abs(cl_half_to_float(a))
|
|
||||||
< std::abs(cl_half_to_float(b));
|
|
||||||
});
|
|
||||||
|
|
||||||
float precise = 0.f;
|
|
||||||
for (auto elem : ref_vals) precise += cl_half_to_float(elem);
|
|
||||||
sums.push_back(precise);
|
|
||||||
|
|
||||||
sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
|
|
||||||
sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
|
|
||||||
|
|
||||||
std::sort(sums.begin(), sums.end());
|
|
||||||
max_error = std::abs(sums.front() - sums.back());
|
|
||||||
|
|
||||||
// restore unsorted order
|
|
||||||
memcpy(ref_vals.data(), startRefValues,
|
|
||||||
sizeof(HostDataType) * ref_vals.size());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
memcpy(startRefValues, ref_vals.data(),
|
|
||||||
sizeof(HostDataType) * threadCount);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if constexpr (
|
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
if (threadCount > ref_vals.size())
|
if (threadCount > ref_vals.size())
|
||||||
{
|
{
|
||||||
@@ -1299,11 +1195,12 @@ public:
|
|||||||
std::vector<HostDataType> sums;
|
std::vector<HostDataType> sums;
|
||||||
std::sort(ref_vals.begin(), ref_vals.end());
|
std::sort(ref_vals.begin(), ref_vals.end());
|
||||||
|
|
||||||
sums.push_back(
|
sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
|
||||||
std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
|
static_cast<HostDataType>(0.f)));
|
||||||
|
|
||||||
sums.push_back(
|
sums.push_back(std::accumulate(ref_vals.rbegin(),
|
||||||
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
|
ref_vals.rend(),
|
||||||
|
static_cast<HostDataType>(0.f)));
|
||||||
|
|
||||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
std::sort(ref_vals.begin(), ref_vals.end(),
|
||||||
[](HostDataType a, HostDataType b) {
|
[](HostDataType a, HostDataType b) {
|
||||||
@@ -1318,15 +1215,25 @@ public:
|
|||||||
|
|
||||||
sums.push_back(precise);
|
sums.push_back(precise);
|
||||||
|
|
||||||
sums.push_back(
|
sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
|
||||||
std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
|
static_cast<HostDataType>(0.f)));
|
||||||
|
|
||||||
sums.push_back(
|
sums.push_back(std::accumulate(ref_vals.rbegin(),
|
||||||
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
|
ref_vals.rend(),
|
||||||
|
static_cast<HostDataType>(0.f)));
|
||||||
|
|
||||||
std::sort(sums.begin(), sums.end());
|
std::sort(sums.begin(), sums.end());
|
||||||
|
assert(std::all_of(sums.begin(), sums.end(),
|
||||||
|
[](const HostDataType &val) {
|
||||||
|
return std::isfinite(
|
||||||
|
static_cast<double>(val));
|
||||||
|
})
|
||||||
|
&& "Infinite summation value detected!");
|
||||||
max_error = std::abs(sums.front() - sums.back());
|
max_error = std::abs(sums.front() - sums.back());
|
||||||
|
|
||||||
|
log_info("Max allowed error for %u elements: %.10f\n",
|
||||||
|
threadCount, max_error);
|
||||||
|
|
||||||
// restore unsorted order
|
// restore unsorted order
|
||||||
memcpy(ref_vals.data(), startRefValues,
|
memcpy(ref_vals.data(), startRefValues,
|
||||||
sizeof(HostDataType) * ref_vals.size());
|
sizeof(HostDataType) * ref_vals.size());
|
||||||
@@ -1345,10 +1252,7 @@ public:
|
|||||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||||
|
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
|
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
|
||||||
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
||||||
@@ -1382,10 +1286,7 @@ public:
|
|||||||
volatile HostAtomicType *destMemory,
|
volatile HostAtomicType *destMemory,
|
||||||
HostDataType *oldValues) override
|
HostDataType *oldValues) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
|
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
|
||||||
MemoryOrder());
|
MemoryOrder());
|
||||||
@@ -1411,23 +1312,7 @@ public:
|
|||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
{
|
|
||||||
if (whichDestValue == 0)
|
|
||||||
{
|
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
|
||||||
{
|
|
||||||
expected = cl_half_from_float(
|
|
||||||
cl_half_to_float(expected)
|
|
||||||
+ cl_half_to_float(startRefValues[i]),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if constexpr (
|
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
if (whichDestValue == 0)
|
if (whichDestValue == 0)
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
@@ -1446,21 +1331,13 @@ public:
|
|||||||
const std::vector<HostAtomicType> &testValues,
|
const std::vector<HostAtomicType> &testValues,
|
||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
{
|
{
|
||||||
if (whichDestValue == 0)
|
if (whichDestValue == 0)
|
||||||
return std::abs(cl_half_to_float(expected)
|
return std::abs(
|
||||||
- cl_half_to_float(testValues[whichDestValue]))
|
static_cast<double>(expected
|
||||||
> max_error;
|
- static_cast<HostDataType>(
|
||||||
}
|
testValues[whichDestValue])))
|
||||||
else if constexpr (
|
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
|
||||||
if (whichDestValue == 0)
|
|
||||||
return std::abs((HostDataType)expected
|
|
||||||
- testValues[whichDestValue])
|
|
||||||
> max_error;
|
> max_error;
|
||||||
}
|
}
|
||||||
return CBasicTestMemOrderScope<
|
return CBasicTestMemOrderScope<
|
||||||
@@ -1471,10 +1348,7 @@ public:
|
|||||||
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||||
HostAtomicType *finalValues) override
|
HostAtomicType *finalValues) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
correct = true;
|
correct = true;
|
||||||
for (cl_uint i = 1; i < threadCount; i++)
|
for (cl_uint i = 1; i < threadCount; i++)
|
||||||
@@ -1534,10 +1408,7 @@ public:
|
|||||||
}
|
}
|
||||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
return threadCount;
|
return threadCount;
|
||||||
}
|
}
|
||||||
@@ -1657,8 +1528,8 @@ template <> double kahan_sub<double>(const std::vector<double> &nums)
|
|||||||
double compensation = 0.0;
|
double compensation = 0.0;
|
||||||
for (double num : nums)
|
for (double num : nums)
|
||||||
{
|
{
|
||||||
double y = num - compensation;
|
double y = -num - compensation;
|
||||||
double t = sum - y;
|
double t = sum + y;
|
||||||
compensation = (t - sum) - y;
|
compensation = (t - sum) - y;
|
||||||
sum = t;
|
sum = t;
|
||||||
}
|
}
|
||||||
@@ -1685,14 +1556,18 @@ public:
|
|||||||
useSVM),
|
useSVM),
|
||||||
min_range(-999.0), max_range(999.0), max_error(0.0)
|
min_range(-999.0), max_range(999.0), max_error(0.0)
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
|
|
||||||
{
|
{
|
||||||
StartValue(0);
|
StartValue(0);
|
||||||
CBasicTestMemOrderScope<HostAtomicType,
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
HostDataType>::OldValueCheck(false);
|
HostDataType>::OldValueCheck(false);
|
||||||
|
|
||||||
|
// Narrow down range for half to avoid overflow to infinity
|
||||||
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
||||||
|
{
|
||||||
|
min_range = -50.0;
|
||||||
|
max_range = 50.0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template <typename Iterator>
|
template <typename Iterator>
|
||||||
@@ -1702,25 +1577,10 @@ public:
|
|||||||
for (auto it = begin; it != end; ++it) res = res - *it;
|
for (auto it = begin; it != end; ++it) res = res - *it;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
template <typename Iterator>
|
|
||||||
float subtract_halfs(Iterator begin, Iterator end)
|
|
||||||
{
|
|
||||||
cl_half res = 0;
|
|
||||||
for (auto it = begin; it != end; ++it)
|
|
||||||
{
|
|
||||||
res = cl_half_from_float(cl_half_to_float(res)
|
|
||||||
- cl_half_to_float(*it),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
}
|
|
||||||
return cl_half_to_float(res);
|
|
||||||
}
|
|
||||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
MTdata d) override
|
MTdata d) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
if (threadCount > ref_vals.size())
|
if (threadCount > ref_vals.size())
|
||||||
{
|
{
|
||||||
@@ -1736,105 +1596,36 @@ public:
|
|||||||
// Estimate highest possible subtraction error for given set.
|
// Estimate highest possible subtraction error for given set.
|
||||||
std::vector<HostDataType> sums;
|
std::vector<HostDataType> sums;
|
||||||
std::sort(ref_vals.begin(), ref_vals.end());
|
std::sort(ref_vals.begin(), ref_vals.end());
|
||||||
|
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
||||||
|
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
|
||||||
|
|
||||||
|
std::sort(ref_vals.begin(), ref_vals.end(),
|
||||||
|
[](HostDataType a, HostDataType b) {
|
||||||
|
return std::abs(a) < std::abs(b);
|
||||||
|
});
|
||||||
|
|
||||||
|
double precise = 0.0;
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
||||||
{
|
precise = kahan_sub(ref_vals);
|
||||||
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
|
||||||
|
|
||||||
sums.push_back(
|
|
||||||
subtract(ref_vals.rbegin(), ref_vals.rend()));
|
|
||||||
|
|
||||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
|
||||||
[](double a, double b) {
|
|
||||||
return std::abs(a) < std::abs(b);
|
|
||||||
});
|
|
||||||
|
|
||||||
double precise = kahan_sub(ref_vals);
|
|
||||||
sums.push_back(precise);
|
|
||||||
|
|
||||||
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
|
||||||
|
|
||||||
sums.push_back(
|
|
||||||
subtract(ref_vals.rbegin(), ref_vals.rend()));
|
|
||||||
|
|
||||||
std::sort(sums.begin(), sums.end());
|
|
||||||
max_error = std::abs((double)sums.front() - sums.back());
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
|
||||||
sums.push_back(
|
|
||||||
subtract(ref_vals.rbegin(), ref_vals.rend()));
|
|
||||||
|
|
||||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
|
||||||
[](float a, float b) {
|
|
||||||
return std::abs(a) < std::abs(b);
|
|
||||||
});
|
|
||||||
|
|
||||||
double precise = 0.0;
|
|
||||||
for (auto elem : ref_vals) precise += double(elem);
|
for (auto elem : ref_vals) precise += double(elem);
|
||||||
sums.push_back(precise);
|
|
||||||
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
|
||||||
sums.push_back(
|
|
||||||
subtract(ref_vals.rbegin(), ref_vals.rend()));
|
|
||||||
|
|
||||||
std::sort(sums.begin(), sums.end());
|
|
||||||
max_error =
|
|
||||||
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
|
|
||||||
}
|
|
||||||
|
|
||||||
// restore unsorted order
|
|
||||||
memcpy(ref_vals.data(), startRefValues,
|
|
||||||
sizeof(HostDataType) * ref_vals.size());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
memcpy(startRefValues, ref_vals.data(),
|
|
||||||
sizeof(HostDataType) * threadCount);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
||||||
{
|
|
||||||
if (threadCount > ref_vals.size())
|
|
||||||
{
|
|
||||||
ref_vals.resize(threadCount);
|
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
|
||||||
ref_vals[i] = cl_half_from_float(
|
|
||||||
get_random_float(min_range, max_range, d),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
|
|
||||||
memcpy(startRefValues, ref_vals.data(),
|
|
||||||
sizeof(HostDataType) * ref_vals.size());
|
|
||||||
|
|
||||||
// Estimate highest possible summation error for given set.
|
|
||||||
std::vector<float> sums;
|
|
||||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
|
||||||
[](cl_half a, cl_half b) {
|
|
||||||
return cl_half_to_float(a) < cl_half_to_float(b);
|
|
||||||
});
|
|
||||||
|
|
||||||
sums.push_back(
|
|
||||||
subtract_halfs(ref_vals.begin(), ref_vals.end()));
|
|
||||||
sums.push_back(
|
|
||||||
subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
|
|
||||||
|
|
||||||
std::sort(ref_vals.begin(), ref_vals.end(),
|
|
||||||
[](cl_half a, cl_half b) {
|
|
||||||
return std::abs(cl_half_to_float(a))
|
|
||||||
< std::abs(cl_half_to_float(b));
|
|
||||||
});
|
|
||||||
|
|
||||||
float precise = 0.f;
|
|
||||||
for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
|
|
||||||
sums.push_back(precise);
|
sums.push_back(precise);
|
||||||
sums.push_back(
|
|
||||||
subtract_halfs(ref_vals.begin(), ref_vals.end()));
|
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
||||||
sums.push_back(
|
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
|
||||||
subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
|
|
||||||
|
|
||||||
std::sort(sums.begin(), sums.end());
|
std::sort(sums.begin(), sums.end());
|
||||||
|
assert(std::all_of(sums.begin(), sums.end(),
|
||||||
|
[](const HostDataType &val) {
|
||||||
|
return std::isfinite(
|
||||||
|
static_cast<double>(val));
|
||||||
|
})
|
||||||
|
&& "Infinite subtraction value detected!");
|
||||||
max_error = std::abs(sums.front() - sums.back());
|
max_error = std::abs(sums.front() - sums.back());
|
||||||
|
|
||||||
|
log_info("Max allowed error for %u elements: %.10f\n",
|
||||||
|
threadCount, max_error);
|
||||||
|
|
||||||
// restore unsorted order
|
// restore unsorted order
|
||||||
memcpy(ref_vals.data(), startRefValues,
|
memcpy(ref_vals.data(), startRefValues,
|
||||||
sizeof(HostDataType) * ref_vals.size());
|
sizeof(HostDataType) * ref_vals.size());
|
||||||
@@ -1853,10 +1644,7 @@ public:
|
|||||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||||
|
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
|
return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
|
||||||
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
||||||
@@ -1878,10 +1666,7 @@ public:
|
|||||||
volatile HostAtomicType *destMemory,
|
volatile HostAtomicType *destMemory,
|
||||||
HostDataType *oldValues) override
|
HostDataType *oldValues) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
|
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
|
||||||
MemoryOrder());
|
MemoryOrder());
|
||||||
@@ -1903,29 +1688,12 @@ public:
|
|||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
if constexpr (
|
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
if (whichDestValue == 0)
|
if (whichDestValue == 0)
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
expected -= startRefValues[i];
|
expected -= startRefValues[i];
|
||||||
}
|
}
|
||||||
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
||||||
{
|
|
||||||
if (whichDestValue == 0)
|
|
||||||
{
|
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
|
||||||
{
|
|
||||||
expected = cl_half_from_float(
|
|
||||||
cl_half_to_float(expected)
|
|
||||||
- cl_half_to_float(startRefValues[i]),
|
|
||||||
gHalfRoundingMode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
@@ -1938,21 +1706,13 @@ public:
|
|||||||
const std::vector<HostAtomicType> &testValues,
|
const std::vector<HostAtomicType> &testValues,
|
||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
if (whichDestValue == 0)
|
if (whichDestValue == 0)
|
||||||
return std::abs((HOST_ATOMIC_FLOAT)expected
|
return std::abs(
|
||||||
- testValues[whichDestValue])
|
static_cast<double>(expected
|
||||||
> max_error;
|
- static_cast<HostDataType>(
|
||||||
}
|
testValues[whichDestValue])))
|
||||||
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
||||||
{
|
|
||||||
if (whichDestValue == 0)
|
|
||||||
return std::abs(cl_half_to_float(expected)
|
|
||||||
- cl_half_to_float(testValues[whichDestValue]))
|
|
||||||
> max_error;
|
> max_error;
|
||||||
}
|
}
|
||||||
return CBasicTestMemOrderScope<
|
return CBasicTestMemOrderScope<
|
||||||
@@ -2023,10 +1783,7 @@ public:
|
|||||||
}
|
}
|
||||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
return threadCount;
|
return threadCount;
|
||||||
}
|
}
|
||||||
@@ -2947,10 +2704,7 @@ public:
|
|||||||
min_range(-999.0), max_range(999.0)
|
min_range(-999.0), max_range(999.0)
|
||||||
{
|
{
|
||||||
StartValue(DataType().MaxValue());
|
StartValue(DataType().MaxValue());
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
CBasicTestMemOrderScope<HostAtomicType,
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
HostDataType>::OldValueCheck(false);
|
HostDataType>::OldValueCheck(false);
|
||||||
@@ -2960,10 +2714,7 @@ public:
|
|||||||
{
|
{
|
||||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
return " atomic_fetch_min" + postfix
|
return " atomic_fetch_min" + postfix
|
||||||
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
|
||||||
@@ -2982,10 +2733,7 @@ public:
|
|||||||
volatile HostAtomicType *destMemory,
|
volatile HostAtomicType *destMemory,
|
||||||
HostDataType *oldValues) override
|
HostDataType *oldValues) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
host_atomic_fetch_min(&destMemory[0], oldValues[tid],
|
host_atomic_fetch_min(&destMemory[0], oldValues[tid],
|
||||||
MemoryOrder());
|
MemoryOrder());
|
||||||
@@ -3040,19 +2788,7 @@ public:
|
|||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
{
|
|
||||||
if (whichDestValue == 0)
|
|
||||||
{
|
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
|
||||||
{
|
|
||||||
if (cl_half_to_float(startRefValues[i])
|
|
||||||
< cl_half_to_float(expected))
|
|
||||||
expected = startRefValues[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
if (whichDestValue == 0)
|
if (whichDestValue == 0)
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
@@ -3072,9 +2808,7 @@ public:
|
|||||||
const std::vector<HostAtomicType> &testValues,
|
const std::vector<HostAtomicType> &testValues,
|
||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
if (std::is_same_v<
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
if (whichDestValue == 0)
|
if (whichDestValue == 0)
|
||||||
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
||||||
@@ -3089,17 +2823,16 @@ public:
|
|||||||
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||||
HostAtomicType *finalValues) override
|
HostAtomicType *finalValues) override
|
||||||
{
|
{
|
||||||
if (std::is_same_v<
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
|
|
||||||
{
|
{
|
||||||
correct = true;
|
correct = true;
|
||||||
for (cl_uint i = 1; i < threadCount; i++)
|
for (cl_uint i = 1; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
if (refValues[i] != StartValue())
|
if (refValues[i] != StartValue())
|
||||||
{
|
{
|
||||||
log_error("Thread %d found %d mismatch(es)\n", i,
|
log_error(
|
||||||
(cl_uint)refValues[i]);
|
"Thread %d found %lf mismatch(es), start value=%lf\n",
|
||||||
|
i, (double)refValues[i], (double)StartValue());
|
||||||
correct = false;
|
correct = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3156,10 +2889,7 @@ public:
|
|||||||
}
|
}
|
||||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
return threadCount;
|
return threadCount;
|
||||||
}
|
}
|
||||||
@@ -3287,21 +3017,11 @@ public:
|
|||||||
useSVM),
|
useSVM),
|
||||||
min_range(-999.0), max_range(999.0)
|
min_range(-999.0), max_range(999.0)
|
||||||
{
|
{
|
||||||
if constexpr (
|
StartValue(DataType().MinValue());
|
||||||
std::is_same_v<
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
CBasicTestMemOrderScope<HostAtomicType,
|
CBasicTestMemOrderScope<HostAtomicType,
|
||||||
HostDataType>::OldValueCheck(false);
|
HostDataType>::OldValueCheck(false);
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
||||||
StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode));
|
|
||||||
else
|
|
||||||
StartValue(-DataType().MaxValue());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
StartValue(DataType().MinValue());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::string ProgramCore() override
|
std::string ProgramCore() override
|
||||||
@@ -3330,10 +3050,7 @@ public:
|
|||||||
volatile HostAtomicType *destMemory,
|
volatile HostAtomicType *destMemory,
|
||||||
HostDataType *oldValues) override
|
HostDataType *oldValues) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
host_atomic_fetch_max(&destMemory[0], oldValues[tid],
|
host_atomic_fetch_max(&destMemory[0], oldValues[tid],
|
||||||
MemoryOrder());
|
MemoryOrder());
|
||||||
@@ -3349,23 +3066,12 @@ public:
|
|||||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||||
MTdata d) override
|
MTdata d) override
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
{
|
{
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
startRefValues[i] = cl_half_from_float(
|
startRefValues[i] = static_cast<HostDataType>(
|
||||||
get_random_float(min_range, max_range, d),
|
get_random_float(min_range, max_range, d));
|
||||||
gHalfRoundingMode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if constexpr (
|
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
|
||||||
{
|
|
||||||
startRefValues[i] = get_random_float(min_range, max_range, d);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -3388,19 +3094,7 @@ public:
|
|||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
expected = StartValue();
|
expected = StartValue();
|
||||||
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
{
|
|
||||||
if (whichDestValue == 0)
|
|
||||||
{
|
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
|
||||||
{
|
|
||||||
if (cl_half_to_float(startRefValues[i])
|
|
||||||
> cl_half_to_float(expected))
|
|
||||||
expected = startRefValues[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
||||||
{
|
{
|
||||||
if (whichDestValue == 0)
|
if (whichDestValue == 0)
|
||||||
for (cl_uint i = 0; i < threadCount; i++)
|
for (cl_uint i = 0; i < threadCount; i++)
|
||||||
@@ -3420,9 +3114,7 @@ public:
|
|||||||
const std::vector<HostAtomicType> &testValues,
|
const std::vector<HostAtomicType> &testValues,
|
||||||
cl_uint whichDestValue) override
|
cl_uint whichDestValue) override
|
||||||
{
|
{
|
||||||
if (std::is_same_v<
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
if (whichDestValue == 0)
|
if (whichDestValue == 0)
|
||||||
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
||||||
@@ -3437,17 +3129,16 @@ public:
|
|||||||
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||||
HostAtomicType *finalValues) override
|
HostAtomicType *finalValues) override
|
||||||
{
|
{
|
||||||
if (std::is_same_v<
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
correct = true;
|
correct = true;
|
||||||
for (cl_uint i = 1; i < threadCount; i++)
|
for (cl_uint i = 1; i < threadCount; i++)
|
||||||
{
|
{
|
||||||
if (refValues[i] != StartValue())
|
if (refValues[i] != StartValue())
|
||||||
{
|
{
|
||||||
log_error("Thread %d found %d mismatch(es)\n", i,
|
log_error(
|
||||||
(cl_uint)refValues[i]);
|
"Thread %d found %lf mismatch(es), start value=%lf\n",
|
||||||
|
i, (double)refValues[i], (double)StartValue());
|
||||||
correct = false;
|
correct = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3504,10 +3195,7 @@ public:
|
|||||||
}
|
}
|
||||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||||
{
|
{
|
||||||
if constexpr (
|
if constexpr (is_host_fp_v<HostDataType>)
|
||||||
std::is_same_v<
|
|
||||||
HostDataType,
|
|
||||||
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
||||||
{
|
{
|
||||||
return threadCount;
|
return threadCount;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user