mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Added support for cl_ext_float_atomics in CBasicTestFetchAdd with atomic_float (#2345)
Related to #2142, according to the work plan, extending CBasicTestFetchAdd with support for atomic_float.
This commit is contained in:
@@ -74,9 +74,11 @@ extern int
|
||||
gMaxDeviceThreads; // maximum number of threads executed on OCL device
|
||||
extern cl_device_atomic_capabilities gAtomicMemCap,
|
||||
gAtomicFenceCap; // atomic memory and fence capabilities for this device
|
||||
|
||||
extern cl_half_rounding_mode gHalfRoundingMode;
|
||||
extern bool gFloatAtomicsSupported;
|
||||
extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
|
||||
extern cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps;
|
||||
|
||||
extern const char *
|
||||
get_memory_order_type_name(TExplicitMemoryOrderType orderType);
|
||||
@@ -174,6 +176,13 @@ public:
|
||||
{
|
||||
return false;
|
||||
}
|
||||
virtual bool
|
||||
IsTestNotAsExpected(const HostDataType &expected,
|
||||
const std::vector<HostAtomicType> &testValues,
|
||||
cl_uint whichDestValue)
|
||||
{
|
||||
return expected != testValues[whichDestValue];
|
||||
}
|
||||
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||
MTdata d)
|
||||
{
|
||||
@@ -1449,7 +1458,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
|
||||
startRefValues.size() ? &startRefValues[0] : 0, i))
|
||||
break; // no expected value function provided
|
||||
|
||||
if (expected != destItems[i])
|
||||
if (IsTestNotAsExpected(expected, destItems, i))
|
||||
{
|
||||
std::stringstream logLine;
|
||||
logLine << "ERROR: Result " << i
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#define HOST_ATOMICS_H_
|
||||
|
||||
#include "harness/testHarness.h"
|
||||
#include <mutex>
|
||||
|
||||
#ifdef WIN32
|
||||
#include "Windows.h"
|
||||
@@ -94,6 +95,16 @@ template <typename AtomicType, typename CorrespondingType>
|
||||
CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
|
||||
TExplicitMemoryOrderType order)
|
||||
{
|
||||
if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
static std::mutex mx;
|
||||
std::lock_guard<std::mutex> lock(mx);
|
||||
CorrespondingType old_value = *a;
|
||||
*a += c;
|
||||
return old_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
|
||||
return InterlockedExchangeAdd(a, c);
|
||||
#elif defined(__GNUC__)
|
||||
@@ -102,6 +113,7 @@ CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingTyp
|
||||
log_info("Host function not implemented: atomic_fetch_add\n");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AtomicType, typename CorrespondingType>
|
||||
|
||||
@@ -34,6 +34,7 @@ cl_device_atomic_capabilities gAtomicMemCap,
|
||||
cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
|
||||
bool gFloatAtomicsSupported = false;
|
||||
cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
|
||||
cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
|
||||
|
||||
test_status InitCL(cl_device_id device) {
|
||||
auto version = get_device_cl_version(device);
|
||||
@@ -132,6 +133,11 @@ test_status InitCL(cl_device_id device) {
|
||||
if (is_extension_available(device, "cl_ext_float_atomics"))
|
||||
{
|
||||
gFloatAtomicsSupported = true;
|
||||
|
||||
cl_int error = clGetDeviceInfo(
|
||||
device, CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
|
||||
test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
|
||||
if (is_extension_available(device, "cl_khr_fp16"))
|
||||
{
|
||||
cl_int error = clGetDeviceInfo(
|
||||
|
||||
@@ -16,10 +16,13 @@
|
||||
#include "harness/testHarness.h"
|
||||
#include "harness/kernelHelpers.h"
|
||||
#include "harness/typeWrappers.h"
|
||||
#include "harness/conversions.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "host_atomics.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
@@ -1163,39 +1166,134 @@ REGISTER_TEST(svm_atomic_compare_exchange_weak)
|
||||
template <typename HostAtomicType, typename HostDataType>
|
||||
class CBasicTestFetchAdd
|
||||
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
||||
|
||||
double min_range;
|
||||
double max_range;
|
||||
double max_error_fp32;
|
||||
std::vector<HostDataType> ref_vals;
|
||||
|
||||
public:
|
||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
||||
using CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::MemoryOrderScopeStr;
|
||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
||||
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
||||
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
|
||||
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
||||
useSVM)
|
||||
{}
|
||||
virtual std::string ProgramCore()
|
||||
useSVM),
|
||||
min_range(-999.0), max_range(999.0), max_error_fp32(0.0)
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
StartValue(0.f);
|
||||
CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::OldValueCheck(false);
|
||||
}
|
||||
}
|
||||
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
||||
MTdata d) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
if (threadCount > ref_vals.size())
|
||||
{
|
||||
ref_vals.resize(threadCount);
|
||||
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
ref_vals[i] = get_random_float(min_range, max_range, d);
|
||||
|
||||
memcpy(startRefValues, ref_vals.data(),
|
||||
sizeof(HostDataType) * ref_vals.size());
|
||||
|
||||
// Estimate highest possible summation error for given set.
|
||||
std::vector<HostDataType> sums;
|
||||
std::sort(ref_vals.begin(), ref_vals.end());
|
||||
|
||||
sums.push_back(
|
||||
std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
|
||||
|
||||
sums.push_back(
|
||||
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
|
||||
|
||||
std::sort(
|
||||
ref_vals.begin(), ref_vals.end(),
|
||||
[](float a, float b) { return std::abs(a) < std::abs(b); });
|
||||
|
||||
double precise = 0.0;
|
||||
for (auto elem : ref_vals) precise += double(elem);
|
||||
sums.push_back(precise);
|
||||
|
||||
sums.push_back(
|
||||
std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
|
||||
|
||||
sums.push_back(
|
||||
std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
|
||||
|
||||
std::sort(sums.begin(), sums.end());
|
||||
max_error_fp32 =
|
||||
std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
|
||||
|
||||
// restore unsorted order
|
||||
memcpy(ref_vals.data(), startRefValues,
|
||||
sizeof(HostDataType) * ref_vals.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(startRefValues, ref_vals.data(),
|
||||
sizeof(HostDataType) * threadCount);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
std::string ProgramCore() override
|
||||
{
|
||||
std::string memoryOrderScope = MemoryOrderScopeStr();
|
||||
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
||||
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
|
||||
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
||||
+ memoryOrderScope + ");\n"
|
||||
+ " oldValues[tid] = atomic_fetch_add" + postfix
|
||||
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
||||
+ ")0" + memoryOrderScope + ");\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
return " oldValues[tid] = atomic_fetch_add" + postfix
|
||||
+ "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
|
||||
+ ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add"
|
||||
+ postfix + "(&destMemory[0], ("
|
||||
+ DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
|
||||
+ DataType().AddSubOperandTypeName() + ")tid + 3"
|
||||
+ memoryOrderScope
|
||||
+ ");\n"
|
||||
" atomic_fetch_add"
|
||||
+ postfix + "(&destMemory[0], ("
|
||||
+ DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
|
||||
+ DataType().AddSubOperandTypeName() + ")tid + 3"
|
||||
+ memoryOrderScope
|
||||
+ ");\n"
|
||||
" atomic_fetch_add"
|
||||
+ postfix + "(&destMemory[0], (("
|
||||
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
|
||||
+ DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope
|
||||
+ ");\n";
|
||||
+ DataType().AddSubOperandTypeName() + ")-1)*8"
|
||||
+ memoryOrderScope + ");\n";
|
||||
}
|
||||
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
||||
}
|
||||
void HostFunction(cl_uint tid, cl_uint threadCount,
|
||||
volatile HostAtomicType *destMemory,
|
||||
HostDataType *oldValues)
|
||||
HostDataType *oldValues) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
|
||||
MemoryOrder());
|
||||
oldValues[tid] = host_atomic_fetch_add(
|
||||
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
||||
}
|
||||
else
|
||||
{
|
||||
oldValues[tid] = host_atomic_fetch_add(
|
||||
&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
|
||||
@@ -1203,21 +1301,98 @@ public:
|
||||
MemoryOrder());
|
||||
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
|
||||
MemoryOrder());
|
||||
host_atomic_fetch_add(&destMemory[0],
|
||||
((HostDataType)tid + 3)
|
||||
<< (sizeof(HostDataType) - 1) * 8,
|
||||
host_atomic_fetch_add(
|
||||
&destMemory[0],
|
||||
(((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
|
||||
MemoryOrder());
|
||||
}
|
||||
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
||||
}
|
||||
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
||||
HostDataType *startRefValues,
|
||||
cl_uint whichDestValue)
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
expected = StartValue();
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
expected += startRefValues[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
for (cl_uint i = 0; i < threadCount; i++)
|
||||
expected += ((HostDataType)i + 3) * 3
|
||||
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
bool IsTestNotAsExpected(const HostDataType &expected,
|
||||
const std::vector<HostAtomicType> &testValues,
|
||||
cl_uint whichDestValue) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
{
|
||||
if (whichDestValue == 0)
|
||||
return std::abs((HOST_ATOMIC_FLOAT)expected
|
||||
- testValues[whichDestValue])
|
||||
> max_error_fp32;
|
||||
}
|
||||
return CBasicTestMemOrderScope<
|
||||
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
||||
testValues,
|
||||
whichDestValue);
|
||||
}
|
||||
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
||||
HostAtomicType *finalValues) override
|
||||
{
|
||||
if (std::is_same<HostDataType, HOST_ATOMIC_FLOAT>::value)
|
||||
{
|
||||
correct = true;
|
||||
for (cl_uint i = 1; i < threadCount; i++)
|
||||
{
|
||||
if (refValues[i] != StartValue())
|
||||
{
|
||||
log_error("Thread %d found %d mismatch(es)\n", i,
|
||||
(cl_uint)refValues[i]);
|
||||
correct = false;
|
||||
}
|
||||
}
|
||||
return !correct;
|
||||
}
|
||||
return CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::VerifyRefs(correct,
|
||||
threadCount,
|
||||
refValues,
|
||||
finalValues);
|
||||
}
|
||||
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
if (LocalMemory()
|
||||
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0; // skip test - not applicable
|
||||
|
||||
if (!LocalMemory()
|
||||
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
||||
return 0;
|
||||
}
|
||||
return CBasicTestMemOrderScope<
|
||||
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
||||
queue);
|
||||
}
|
||||
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
||||
{
|
||||
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_FLOAT>)
|
||||
{
|
||||
return threadCount;
|
||||
}
|
||||
return CBasicTestMemOrderScope<HostAtomicType,
|
||||
HostDataType>::NumResults(threadCount,
|
||||
deviceID);
|
||||
}
|
||||
};
|
||||
|
||||
static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
||||
@@ -1242,6 +1417,15 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
||||
TYPE_ATOMIC_ULONG, useSVM);
|
||||
EXECUTE_TEST(error,
|
||||
test_ulong.Execute(deviceID, context, queue, num_elements));
|
||||
|
||||
if (gFloatAtomicsSupported)
|
||||
{
|
||||
CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
||||
TYPE_ATOMIC_FLOAT, useSVM);
|
||||
EXECUTE_TEST(
|
||||
error, test_float.Execute(deviceID, context, queue, num_elements));
|
||||
}
|
||||
|
||||
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
||||
{
|
||||
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
||||
|
||||
Reference in New Issue
Block a user