mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Related to #2142, according to the work plan, extending CBasicTestFetchAddSpecialFloats with support for atomic_double.
4516 lines
188 KiB
C++
4516 lines
188 KiB
C++
//
|
|
// Copyright (c) 2024 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "harness/testHarness.h"
|
|
#include "harness/kernelHelpers.h"
|
|
#include "harness/typeWrappers.h"
|
|
#include "harness/conversions.h"
|
|
|
|
#include "common.h"
|
|
#include "host_atomics.h"
|
|
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
#include <sstream>
|
|
#include <vector>
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestStore
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
CBasicTestStore(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
OldValueCheck(false);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
return threadCount;
|
|
}
|
|
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue)
|
|
{
|
|
if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
|
|
|| MemoryOrder() == MEMORY_ORDER_ACQ_REL)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (CheckCapabilities(MemoryScope(), MemoryOrder())
|
|
== TEST_SKIPPED_ITSELF)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
|
|
._type
|
|
== TYPE_ATOMIC_HALF)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return " atomic_store" + postfix + "(&destMemory[tid], tid"
|
|
+ memoryOrderScope + ");\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
expected = static_cast<HostDataType>(whichDestValue);
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_store_generic(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_float.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
|
TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_double.Execute(deviceID, context, queue, num_elements));
|
|
|
|
if (gFloatAtomicsSupported)
|
|
{
|
|
CBasicTestStore<HOST_ATOMIC_HALF, HOST_HALF> test_half(TYPE_ATOMIC_HALF,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_half.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
|
|
TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
|
|
TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_store)
|
|
{
|
|
return test_atomic_store_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_store)
|
|
{
|
|
return test_atomic_store_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
|
|
CBasicTestInit(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
|
|
{
|
|
OldValueCheck(false);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
return threadCount;
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
return " atomic_init(&destMemory[tid], tid);\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
host_atomic_init(&destMemory[tid], (HostDataType)tid);
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
expected = (HostDataType)whichDestValue;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_init_generic(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_float.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
|
TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_double.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
|
|
TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
|
|
TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_init)
|
|
{
|
|
return test_atomic_init_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_init)
|
|
{
|
|
return test_atomic_init_generic(device, context, queue, num_elements, true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestLoad
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
|
|
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
OldValueCheck(false);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
return threadCount;
|
|
}
|
|
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue)
|
|
{
|
|
if (MemoryOrder() == MEMORY_ORDER_RELEASE
|
|
|| MemoryOrder() == MEMORY_ORDER_ACQ_REL)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (CheckCapabilities(MemoryScope(), MemoryOrder())
|
|
== TEST_SKIPPED_ITSELF)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
|
|
._type
|
|
== TYPE_ATOMIC_HALF)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
// In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
|
|
// should be MEMORY_ORDER_RELEASE
|
|
std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
|
|
std::string memoryOrderScopeStore =
|
|
(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
|
|
? (", memory_order_release" + MemoryScopeStr())
|
|
: memoryOrderScopeLoad;
|
|
std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
|
|
return " atomic_store" + postfix + "(&destMemory[tid], tid"
|
|
+ memoryOrderScopeStore
|
|
+ ");\n"
|
|
" oldValues[tid] = atomic_load"
|
|
+ postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
host_atomic_store(&destMemory[tid], (HostDataType)tid,
|
|
MEMORY_ORDER_SEQ_CST);
|
|
oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(
|
|
&destMemory[tid], MemoryOrder());
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
expected = static_cast<HostDataType>(whichDestValue);
|
|
return true;
|
|
}
|
|
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
|
HostDataType *refValues,
|
|
HostAtomicType *finalValues)
|
|
{
|
|
correct = true;
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
if (refValues[i] != (HostDataType)i)
|
|
{
|
|
log_error("Invalid value for thread %u\n", (cl_uint)i);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_load_generic(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_float.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
|
TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_double.Execute(deviceID, context, queue, num_elements));
|
|
|
|
if (gFloatAtomicsSupported)
|
|
{
|
|
CBasicTestLoad<HOST_ATOMIC_HALF, HOST_HALF> test_half(TYPE_ATOMIC_HALF,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_half.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
|
|
TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
|
|
TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_load)
|
|
{
|
|
return test_atomic_load_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_load)
|
|
{
|
|
return test_atomic_load_generic(device, context, queue, num_elements, true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestExchange
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue(1234);
|
|
}
|
|
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue)
|
|
{
|
|
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return " oldValues[tid] = atomic_exchange" + postfix
|
|
+ "(&destMemory[0], tid" + memoryOrderScope
|
|
+ ");\n"
|
|
" for(int i = 0; i < "
|
|
+ IterationsStr()
|
|
+ "; i++)\n"
|
|
" oldValues[tid] = atomic_exchange"
|
|
+ postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope
|
|
+ ");\n";
|
|
}
|
|
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid,
|
|
MemoryOrder());
|
|
for (int i = 0; i < Iterations(); i++)
|
|
oldValues[tid] = host_atomic_exchange(
|
|
&destMemory[0], oldValues[tid], MemoryOrder());
|
|
}
|
|
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
|
HostDataType *refValues,
|
|
HostAtomicType *finalValues)
|
|
{
|
|
OldValueCheck(
|
|
Iterations() % 2
|
|
== 0); // check is valid for even number of iterations only
|
|
correct = true;
|
|
/* We are expecting values from 0 to size-1 and initial value from
|
|
* atomic variable */
|
|
/* These values must be distributed across refValues array and atomic
|
|
* variable finalVaue[0] */
|
|
/* Any repeated value is treated as an error */
|
|
std::vector<bool> tidFound(threadCount);
|
|
bool startValueFound = false;
|
|
cl_uint startVal = static_cast<cl_uint>(StartValue());
|
|
|
|
for (cl_uint i = 0; i <= threadCount; i++)
|
|
{
|
|
cl_uint value = 0;
|
|
if (i == threadCount)
|
|
{
|
|
value = static_cast<cl_uint>(
|
|
static_cast<HostDataType>(finalValues[0]));
|
|
}
|
|
else
|
|
{
|
|
value = static_cast<cl_uint>(refValues[i]);
|
|
}
|
|
|
|
if (value == startVal)
|
|
{
|
|
// Special initial value
|
|
if (startValueFound)
|
|
{
|
|
log_error("ERROR: Starting reference value (%u) occurred "
|
|
"more thane once\n",
|
|
(cl_uint)StartValue());
|
|
correct = false;
|
|
return true;
|
|
}
|
|
startValueFound = true;
|
|
continue;
|
|
}
|
|
if (value >= threadCount)
|
|
{
|
|
log_error(
|
|
"ERROR: Reference value %u outside of valid range! (%u)\n",
|
|
i, value);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
if (tidFound[value])
|
|
{
|
|
log_error("ERROR: Value (%u) occurred more thane once\n",
|
|
value);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
tidFound[value] = true;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_exchange_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
|
TYPE_ATOMIC_FLOAT, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_float.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
|
TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_double.Execute(deviceID, context, queue, num_elements));
|
|
if (gFloatAtomicsSupported)
|
|
{
|
|
CBasicTestExchange<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
|
TYPE_ATOMIC_HALF, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_half.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_exchange)
|
|
{
|
|
return test_atomic_exchange_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_exchange)
|
|
{
|
|
return test_atomic_exchange_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestCompareStrong
|
|
: public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
|
|
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
|
|
using CBasicTestMemOrder2Scope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScope;
|
|
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
|
|
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
|
|
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
|
|
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
|
|
CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue(123456);
|
|
OldValueCheck(false);
|
|
}
|
|
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue)
|
|
{
|
|
if (MemoryOrder2() == MEMORY_ORDER_RELEASE
|
|
|| MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
|
|
return 0; // not allowed as 'failure' argument
|
|
if ((MemoryOrder() == MEMORY_ORDER_RELAXED
|
|
&& MemoryOrder2() != MEMORY_ORDER_RELAXED)
|
|
|| (MemoryOrder() != MEMORY_ORDER_SEQ_CST
|
|
&& MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
|
|
return 0; // failure argument shall be no stronger than the success
|
|
|
|
if (CheckCapabilities(MemoryScope(), MemoryOrder())
|
|
== TEST_SKIPPED_ITSELF)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (CheckCapabilities(MemoryScope(), MemoryOrder2())
|
|
== TEST_SKIPPED_ITSELF)
|
|
return 0; // skip test - not applicable
|
|
|
|
return CBasicTestMemOrder2Scope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScope();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return std::string(" ") + DataType().RegularTypeName()
|
|
+ " expected, previous;\n"
|
|
" int successCount = 0;\n"
|
|
" oldValues[tid] = tid;\n"
|
|
" expected = tid; // force failure at the beginning\n"
|
|
" if(atomic_compare_exchange_strong"
|
|
+ postfix + "(&destMemory[0], &expected, oldValues[tid]"
|
|
+ memoryOrderScope
|
|
+ ") || expected == tid)\n"
|
|
" oldValues[tid] = threadCount+1; //mark unexpected success "
|
|
"with invalid value\n"
|
|
" else\n"
|
|
" {\n"
|
|
" for(int i = 0; i < "
|
|
+ IterationsStr()
|
|
+ " || successCount == 0; i++)\n"
|
|
" {\n"
|
|
" previous = expected;\n"
|
|
" if(atomic_compare_exchange_strong"
|
|
+ postfix + "(&destMemory[0], &expected, oldValues[tid]"
|
|
+ memoryOrderScope
|
|
+ "))\n"
|
|
" {\n"
|
|
" oldValues[tid] = expected;\n"
|
|
" successCount++;\n"
|
|
" }\n"
|
|
" else\n"
|
|
" {\n"
|
|
" if(previous == expected) // spurious failure - "
|
|
"shouldn't occur for 'strong'\n"
|
|
" {\n"
|
|
" oldValues[tid] = threadCount; //mark fail with "
|
|
"invalid value\n"
|
|
" break;\n"
|
|
" }\n"
|
|
" }\n"
|
|
" }\n"
|
|
" }\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
HostDataType expected = (HostDataType)StartValue(), previous;
|
|
oldValues[tid] = (HostDataType)tid;
|
|
for (int i = 0; i < Iterations(); i++)
|
|
{
|
|
previous = expected;
|
|
if (host_atomic_compare_exchange(&destMemory[0], &expected,
|
|
oldValues[tid], MemoryOrder(),
|
|
MemoryOrder2()))
|
|
oldValues[tid] = expected;
|
|
else
|
|
{
|
|
if (previous == expected) // shouldn't occur for 'strong'
|
|
{
|
|
oldValues[tid] = threadCount; // mark fail with invalid
|
|
// value
|
|
}
|
|
}
|
|
}
|
|
}
|
|
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
|
HostDataType *refValues,
|
|
HostAtomicType *finalValues)
|
|
{
|
|
correct = true;
|
|
/* We are expecting values from 0 to size-1 and initial value from
|
|
* atomic variable */
|
|
/* These values must be distributed across refValues array and atomic
|
|
* variable finalVaue[0] */
|
|
/* Any repeated value is treated as an error */
|
|
std::vector<bool> tidFound(threadCount);
|
|
bool startValueFound = false;
|
|
cl_uint i;
|
|
|
|
for (i = 0; i <= threadCount; i++)
|
|
{
|
|
cl_uint value;
|
|
if (i == threadCount)
|
|
value = (cl_uint)finalValues[0]; // additional value from atomic
|
|
// variable (last written)
|
|
else
|
|
value = (cl_uint)refValues[i];
|
|
if (value == (cl_uint)StartValue())
|
|
{
|
|
// Special initial value
|
|
if (startValueFound)
|
|
{
|
|
log_error("ERROR: Starting reference value (%u) occurred "
|
|
"more thane once\n",
|
|
(cl_uint)StartValue());
|
|
correct = false;
|
|
return true;
|
|
}
|
|
startValueFound = true;
|
|
continue;
|
|
}
|
|
if (value >= threadCount)
|
|
{
|
|
if (value == threadCount)
|
|
log_error("ERROR: Spurious failure detected for "
|
|
"atomic_compare_exchange_strong\n");
|
|
log_error(
|
|
"ERROR: Reference value %u outside of valid range! (%u)\n",
|
|
i, value);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
if (tidFound[value])
|
|
{
|
|
log_error("ERROR: Value (%u) occurred more thane once\n",
|
|
value);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
tidFound[value] = true;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements,
|
|
bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
|
|
TYPE_ATOMIC_UINT, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(
|
|
TYPE_ATOMIC_LONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32>
|
|
test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64>
|
|
test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_compare_exchange_strong)
|
|
{
|
|
return test_atomic_compare_exchange_strong_generic(device, context, queue,
|
|
num_elements, false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_compare_exchange_strong)
|
|
{
|
|
return test_atomic_compare_exchange_strong_generic(device, context, queue,
|
|
num_elements, true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestCompareWeak
|
|
: public CBasicTestCompareStrong<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestCompareStrong<HostAtomicType,
|
|
HostDataType>::MemoryOrderScope;
|
|
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
|
|
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
|
|
CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScope();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return std::string(" ") + DataType().RegularTypeName()
|
|
+ " expected , previous;\n"
|
|
" int successCount = 0;\n"
|
|
" oldValues[tid] = tid;\n"
|
|
" expected = tid; // force failure at the beginning\n"
|
|
" if(atomic_compare_exchange_weak"
|
|
+ postfix + "(&destMemory[0], &expected, oldValues[tid]"
|
|
+ memoryOrderScope
|
|
+ ") || expected == tid)\n"
|
|
" oldValues[tid] = threadCount+1; //mark unexpected success "
|
|
"with invalid value\n"
|
|
" else\n"
|
|
" {\n"
|
|
" for(int i = 0; i < "
|
|
+ IterationsStr()
|
|
+ " || successCount == 0; i++)\n"
|
|
" {\n"
|
|
" previous = expected;\n"
|
|
" if(atomic_compare_exchange_weak"
|
|
+ postfix + "(&destMemory[0], &expected, oldValues[tid]"
|
|
+ memoryOrderScope
|
|
+ "))\n"
|
|
" {\n"
|
|
" oldValues[tid] = expected;\n"
|
|
" successCount++;\n"
|
|
" }\n"
|
|
" }\n"
|
|
" }\n";
|
|
}
|
|
};
|
|
|
|
static int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements,
|
|
bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
|
|
TYPE_ATOMIC_UINT, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(
|
|
TYPE_ATOMIC_LONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_compare_exchange_weak)
|
|
{
|
|
return test_atomic_compare_exchange_weak_generic(device, context, queue,
|
|
num_elements, false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_compare_exchange_weak)
|
|
{
|
|
return test_atomic_compare_exchange_weak_generic(device, context, queue,
|
|
num_elements, true);
|
|
}
|
|
|
|
template <typename T> double kahan_sum(const std::vector<T> &nums)
|
|
{
|
|
return 0.0;
|
|
}
|
|
template <> double kahan_sum<double>(const std::vector<double> &nums)
|
|
{
|
|
double sum = 0.0;
|
|
double compensation = 0.0;
|
|
for (double num : nums)
|
|
{
|
|
double y = num - compensation;
|
|
double t = sum + y;
|
|
compensation = (t - sum) - y;
|
|
sum = t;
|
|
}
|
|
return sum;
|
|
}
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchAdd
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
|
|
double min_range;
|
|
double max_range;
|
|
double max_error;
|
|
std::vector<HostDataType> ref_vals;
|
|
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM),
|
|
min_range(-999.0), max_range(999.0), max_error(0.0)
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
StartValue((HostDataType)0.0);
|
|
CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::OldValueCheck(false);
|
|
|
|
// Narrow down range for half to avoid overflow to infinity
|
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
min_range = -50.0;
|
|
max_range = 50.0;
|
|
}
|
|
}
|
|
}
|
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
|
MTdata d) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (threadCount > ref_vals.size())
|
|
{
|
|
ref_vals.resize(threadCount);
|
|
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
ref_vals[i] =
|
|
get_random_double(min_range, max_range, d);
|
|
else
|
|
ref_vals[i] = get_random_float(min_range, max_range, d);
|
|
|
|
memcpy(startRefValues, ref_vals.data(),
|
|
sizeof(HostDataType) * ref_vals.size());
|
|
|
|
// Estimate highest possible summation error for given set.
|
|
std::vector<HostDataType> sums;
|
|
std::sort(ref_vals.begin(), ref_vals.end());
|
|
|
|
sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
|
|
static_cast<HostDataType>(0.f)));
|
|
|
|
sums.push_back(std::accumulate(ref_vals.rbegin(),
|
|
ref_vals.rend(),
|
|
static_cast<HostDataType>(0.f)));
|
|
|
|
std::sort(ref_vals.begin(), ref_vals.end(),
|
|
[](HostDataType a, HostDataType b) {
|
|
return std::abs(a) < std::abs(b);
|
|
});
|
|
|
|
double precise = 0.0;
|
|
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
precise = kahan_sum(ref_vals);
|
|
else
|
|
for (auto elem : ref_vals) precise += double(elem);
|
|
|
|
sums.push_back(precise);
|
|
|
|
sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
|
|
static_cast<HostDataType>(0.f)));
|
|
|
|
sums.push_back(std::accumulate(ref_vals.rbegin(),
|
|
ref_vals.rend(),
|
|
static_cast<HostDataType>(0.f)));
|
|
|
|
std::sort(sums.begin(), sums.end());
|
|
assert(std::all_of(sums.begin(), sums.end(),
|
|
[](const HostDataType &val) {
|
|
return std::isfinite(
|
|
static_cast<double>(val));
|
|
})
|
|
&& "Infinite summation value detected!");
|
|
max_error = std::abs(sums.front() - sums.back());
|
|
|
|
log_info("Max allowed error for %u elements: %.10f\n",
|
|
threadCount, max_error);
|
|
|
|
// restore unsorted order
|
|
memcpy(ref_vals.data(), startRefValues,
|
|
sizeof(HostDataType) * ref_vals.size());
|
|
}
|
|
else
|
|
{
|
|
memcpy(startRefValues, ref_vals.data(),
|
|
sizeof(HostDataType) * threadCount);
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
std::string ProgramCore() override
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
|
|
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
|
+ memoryOrderScope + ");\n"
|
|
+ " oldValues[tid] = atomic_fetch_add" + postfix
|
|
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
|
+ ")0" + memoryOrderScope + ");\n";
|
|
}
|
|
else
|
|
{
|
|
return " oldValues[tid] = atomic_fetch_add" + postfix
|
|
+ "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
|
|
+ ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add"
|
|
+ postfix + "(&destMemory[0], ("
|
|
+ DataType().AddSubOperandTypeName() + ")tid + 3"
|
|
+ memoryOrderScope
|
|
+ ");\n"
|
|
" atomic_fetch_add"
|
|
+ postfix + "(&destMemory[0], ("
|
|
+ DataType().AddSubOperandTypeName() + ")tid + 3"
|
|
+ memoryOrderScope
|
|
+ ");\n"
|
|
" atomic_fetch_add"
|
|
+ postfix + "(&destMemory[0], (("
|
|
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
|
|
+ DataType().AddSubOperandTypeName() + ")-1)*8"
|
|
+ memoryOrderScope + ");\n";
|
|
}
|
|
}
|
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
|
|
MemoryOrder());
|
|
oldValues[tid] = host_atomic_fetch_add(
|
|
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
|
}
|
|
else
|
|
{
|
|
oldValues[tid] = host_atomic_fetch_add(
|
|
&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
|
|
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
|
|
MemoryOrder());
|
|
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
|
|
MemoryOrder());
|
|
host_atomic_fetch_add(
|
|
&destMemory[0],
|
|
(((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
|
|
MemoryOrder());
|
|
}
|
|
}
|
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
expected = StartValue();
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (whichDestValue == 0)
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
expected += startRefValues[i];
|
|
}
|
|
else
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
expected += ((HostDataType)i + 3) * 3
|
|
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
|
const std::vector<HostAtomicType> &testValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (whichDestValue == 0)
|
|
return std::abs(
|
|
static_cast<double>(expected
|
|
- static_cast<HostDataType>(
|
|
testValues[whichDestValue])))
|
|
> max_error;
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
|
testValues,
|
|
whichDestValue);
|
|
}
|
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
|
HostAtomicType *finalValues) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
correct = true;
|
|
for (cl_uint i = 1; i < threadCount; i++)
|
|
{
|
|
if (refValues[i] != StartValue())
|
|
{
|
|
log_error("Thread %d found %d mismatch(es)\n", i,
|
|
(cl_uint)refValues[i]);
|
|
correct = false;
|
|
}
|
|
}
|
|
return !correct;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::VerifyRefs(correct,
|
|
threadCount,
|
|
refValues,
|
|
finalValues);
|
|
}
|
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue) override
|
|
{
|
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0;
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0;
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
return threadCount;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::NumResults(threadCount,
|
|
deviceID);
|
|
}
|
|
};
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchAddSpecialFloats
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
|
|
std::vector<HostDataType> ref_vals;
|
|
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::DeclaredInProgram;
|
|
CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
// StartValue is used as an index divisor in the following test
|
|
// logic. It is set to the number of special values, which allows
|
|
// threads to be mapped deterministically onto the input data array.
|
|
// This enables repeated add operations arranged so that every
|
|
// special value is added to every other one (“all-to-all”).
|
|
if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
auto spec_vals = GetSpecialValues();
|
|
StartValue(spec_vals.size());
|
|
CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::OldValueCheck(false);
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
auto spec_vals = GetSpecialValues();
|
|
StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
|
|
CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::OldValueCheck(false);
|
|
}
|
|
}
|
|
|
|
static std::vector<HostDataType> &GetSpecialValues()
|
|
{
|
|
static std::vector<HostDataType> special_values;
|
|
if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
const HostDataType test_value_zero =
|
|
static_cast<HostDataType>(0.0f);
|
|
const HostDataType test_value_minus_zero =
|
|
static_cast<HostDataType>(-0.0f);
|
|
const HostDataType test_value_without_fraction =
|
|
static_cast<HostDataType>(2.0f);
|
|
const HostDataType test_value_with_fraction =
|
|
static_cast<HostDataType>(2.2f);
|
|
|
|
if (special_values.empty())
|
|
{
|
|
special_values = {
|
|
static_cast<HostDataType>(test_value_minus_zero),
|
|
static_cast<HostDataType>(test_value_zero),
|
|
static_cast<HostDataType>(test_value_without_fraction),
|
|
static_cast<HostDataType>(test_value_with_fraction),
|
|
std::numeric_limits<HostDataType>::infinity(),
|
|
std::numeric_limits<HostDataType>::quiet_NaN(),
|
|
std::numeric_limits<HostDataType>::signaling_NaN(),
|
|
-std::numeric_limits<HostDataType>::infinity(),
|
|
-std::numeric_limits<HostDataType>::quiet_NaN(),
|
|
-std::numeric_limits<HostDataType>::signaling_NaN(),
|
|
std::numeric_limits<HostDataType>::lowest(),
|
|
std::numeric_limits<HostDataType>::min(),
|
|
std::numeric_limits<HostDataType>::max(),
|
|
};
|
|
|
|
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
{
|
|
if (0 != (CL_FP_DENORM & gDoubleFPConfig))
|
|
{
|
|
special_values.push_back(
|
|
std::numeric_limits<HostDataType>::denorm_min());
|
|
}
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
if (0 != (CL_FP_DENORM & gFloatFPConfig))
|
|
{
|
|
special_values.push_back(
|
|
std::numeric_limits<HostDataType>::denorm_min());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
if (special_values.empty())
|
|
{
|
|
special_values = {
|
|
0xffff, 0x0000, 0x7c00, /*INFINITY*/
|
|
0xfc00, /*-INFINITY*/
|
|
0x8000, /*-0*/
|
|
0x7bff, /*HALF_MAX*/
|
|
0x0400, /*HALF_MIN*/
|
|
0x3c00, /* 1 */
|
|
0xbc00, /* -1 */
|
|
0x3555, /*nearest value to 1/3*/
|
|
0x3bff, /*largest number less than one*/
|
|
0xc000, /* -2 */
|
|
0xfbff, /* -HALF_MAX */
|
|
0x8400, /* -HALF_MIN */
|
|
0x4248, /* M_PI_H */
|
|
0xc248, /* -M_PI_H */
|
|
0xbbff, /* Largest negative fraction */
|
|
};
|
|
|
|
if (0 != (CL_FP_DENORM & gHalfFPConfig))
|
|
{
|
|
special_values.push_back(0x0001 /* Smallest denormal */);
|
|
special_values.push_back(0x03ff /* Largest denormal */);
|
|
}
|
|
}
|
|
}
|
|
return special_values;
|
|
}
|
|
|
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
|
MTdata d) override
|
|
{
|
|
if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
if (threadCount > ref_vals.size())
|
|
{
|
|
ref_vals.assign(threadCount, 0);
|
|
auto spec_vals = GetSpecialValues();
|
|
|
|
cl_uint total_cnt = 0;
|
|
while (total_cnt < threadCount)
|
|
{
|
|
cl_uint block_cnt =
|
|
std::min((cl_int)(threadCount - total_cnt),
|
|
(cl_int)spec_vals.size());
|
|
memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
|
|
sizeof(HostDataType) * block_cnt);
|
|
total_cnt += block_cnt;
|
|
}
|
|
}
|
|
|
|
memcpy(startRefValues, ref_vals.data(),
|
|
sizeof(HostDataType) * threadCount);
|
|
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
std::string ProgramCore() override
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
|
|
if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
// The start_value variable (set by StartValue) is used
|
|
// as a divisor of the thread index when selecting the operand for
|
|
// atomic_fetch_add. This groups threads into blocks corresponding
|
|
// to the number of special values and implements an “all-to-all”
|
|
// addition pattern. As a result, each destination element is
|
|
// updated using different combinations of input values, enabling
|
|
// consistent comparison between host and device execution.
|
|
|
|
return std::string(DataType().AddSubOperandTypeName())
|
|
+ " start_value = atomic_load_explicit(destMemory+tid, "
|
|
"memory_order_relaxed, memory_scope_work_group);\n"
|
|
" atomic_store_explicit(destMemory+tid, oldValues[tid], "
|
|
"memory_order_relaxed, memory_scope_work_group);\n"
|
|
" atomic_fetch_add"
|
|
+ postfix + "(&destMemory[tid], ("
|
|
+ DataType().AddSubOperandTypeName()
|
|
+ ")oldValues[tid/(int)start_value]" + memoryOrderScope
|
|
+ ");\n";
|
|
}
|
|
}
|
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues) override
|
|
{
|
|
if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
auto spec_vals = GetSpecialValues();
|
|
host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
|
|
MEMORY_ORDER_SEQ_CST);
|
|
host_atomic_fetch_add(
|
|
&destMemory[tid],
|
|
(HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder());
|
|
}
|
|
}
|
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
expected = StartValue();
|
|
if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
auto spec_vals = GetSpecialValues();
|
|
expected = startRefValues[whichDestValue]
|
|
+ startRefValues[whichDestValue / spec_vals.size()];
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
auto spec_vals = GetSpecialValues();
|
|
expected = cl_half_from_float(
|
|
cl_half_to_float(startRefValues[whichDestValue])
|
|
+ cl_half_to_float(
|
|
startRefValues[whichDestValue / spec_vals.size()]),
|
|
gHalfRoundingMode);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
|
const std::vector<HostAtomicType> &testValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
return static_cast<cl_half>(expected) != testValues[whichDestValue];
|
|
}
|
|
else
|
|
{
|
|
if (std::isnan(testValues[whichDestValue]) && std::isnan(expected))
|
|
return false;
|
|
else
|
|
return expected != testValues[whichDestValue];
|
|
}
|
|
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
|
testValues,
|
|
whichDestValue);
|
|
}
|
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue) override
|
|
{
|
|
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
|
|
== 0)
|
|
return 0;
|
|
|
|
if (!CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::LocalMemory()
|
|
&& CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::DeclaredInProgram())
|
|
{
|
|
if ((gDoubleFPConfig & CL_FP_INF_NAN) == 0) return 0;
|
|
}
|
|
}
|
|
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0;
|
|
|
|
if (!CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::LocalMemory()
|
|
&& CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::DeclaredInProgram())
|
|
{
|
|
if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
|
|
}
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
if (DeclaredInProgram()) return 0; // skip test - not applicable
|
|
|
|
if (LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0;
|
|
|
|
if (!CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::LocalMemory()
|
|
&& CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::DeclaredInProgram())
|
|
{
|
|
if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0;
|
|
}
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
|
{
|
|
if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
return threadCount;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::NumResults(threadCount,
|
|
deviceID);
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_add_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
|
|
if (gFloatAtomicsSupported)
|
|
{
|
|
auto spec_vals_fp64 =
|
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_DOUBLE,
|
|
HOST_DOUBLE>::GetSpecialValues();
|
|
|
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_DOUBLE, HOST_DOUBLE>
|
|
test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_spec_double.Execute(deviceID, context, queue,
|
|
spec_vals_fp64.size()
|
|
* spec_vals_fp64.size()));
|
|
|
|
auto spec_vals_fp32 =
|
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT,
|
|
HOST_FLOAT>::GetSpecialValues();
|
|
|
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
|
|
test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_spec_float.Execute(deviceID, context, queue,
|
|
spec_vals_fp32.size()
|
|
* spec_vals_fp32.size()));
|
|
|
|
auto spec_vals_halfs =
|
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF,
|
|
HOST_HALF>::GetSpecialValues();
|
|
|
|
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF, HOST_HALF>
|
|
test_spec_half(TYPE_ATOMIC_HALF, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_spec_half.Execute(deviceID, context, queue,
|
|
spec_vals_halfs.size()
|
|
* spec_vals_halfs.size()));
|
|
|
|
CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
|
TYPE_ATOMIC_HALF, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_half.Execute(deviceID, context, queue, num_elements));
|
|
|
|
CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
|
TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_double.Execute(deviceID, context, queue, num_elements));
|
|
|
|
CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
|
TYPE_ATOMIC_FLOAT, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_float.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_add)
|
|
{
|
|
return test_atomic_fetch_add_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_add)
|
|
{
|
|
return test_atomic_fetch_add_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename T> double kahan_sub(const std::vector<T> &nums)
|
|
{
|
|
return 0.0;
|
|
}
|
|
template <> double kahan_sub<double>(const std::vector<double> &nums)
|
|
{
|
|
double sum = 0.0;
|
|
double compensation = 0.0;
|
|
for (double num : nums)
|
|
{
|
|
double y = -num - compensation;
|
|
double t = sum + y;
|
|
compensation = (t - sum) - y;
|
|
sum = t;
|
|
}
|
|
return sum;
|
|
}
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchSub
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
|
|
double min_range;
|
|
double max_range;
|
|
double max_error;
|
|
std::vector<HostDataType> ref_vals;
|
|
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM),
|
|
min_range(-999.0), max_range(999.0), max_error(0.0)
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
StartValue(0);
|
|
CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::OldValueCheck(false);
|
|
|
|
// Narrow down range for half to avoid overflow to infinity
|
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
min_range = -50.0;
|
|
max_range = 50.0;
|
|
}
|
|
}
|
|
}
|
|
template <typename Iterator>
|
|
HostDataType subtract(Iterator begin, Iterator end)
|
|
{
|
|
HostDataType res = 0;
|
|
for (auto it = begin; it != end; ++it) res = res - *it;
|
|
return res;
|
|
}
|
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
|
MTdata d) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (threadCount > ref_vals.size())
|
|
{
|
|
ref_vals.resize(threadCount);
|
|
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
ref_vals[i] = (HostDataType)get_random_double(min_range,
|
|
max_range, d);
|
|
|
|
memcpy(startRefValues, ref_vals.data(),
|
|
sizeof(HostDataType) * ref_vals.size());
|
|
|
|
// Estimate highest possible subtraction error for given set.
|
|
std::vector<HostDataType> sums;
|
|
std::sort(ref_vals.begin(), ref_vals.end());
|
|
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
|
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
|
|
|
|
std::sort(ref_vals.begin(), ref_vals.end(),
|
|
[](HostDataType a, HostDataType b) {
|
|
return std::abs(a) < std::abs(b);
|
|
});
|
|
|
|
double precise = 0.0;
|
|
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
precise = kahan_sub(ref_vals);
|
|
else
|
|
for (auto elem : ref_vals) precise += double(elem);
|
|
sums.push_back(precise);
|
|
|
|
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
|
|
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
|
|
|
|
std::sort(sums.begin(), sums.end());
|
|
assert(std::all_of(sums.begin(), sums.end(),
|
|
[](const HostDataType &val) {
|
|
return std::isfinite(
|
|
static_cast<double>(val));
|
|
})
|
|
&& "Infinite subtraction value detected!");
|
|
max_error = std::abs(sums.front() - sums.back());
|
|
|
|
log_info("Max allowed error for %u elements: %.10f\n",
|
|
threadCount, max_error);
|
|
|
|
// restore unsorted order
|
|
memcpy(ref_vals.data(), startRefValues,
|
|
sizeof(HostDataType) * ref_vals.size());
|
|
}
|
|
else
|
|
{
|
|
memcpy(startRefValues, ref_vals.data(),
|
|
sizeof(HostDataType) * threadCount);
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
std::string ProgramCore() override
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
|
|
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
|
|
+ memoryOrderScope + ");\n"
|
|
+ " oldValues[tid] = atomic_fetch_sub" + postfix
|
|
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
|
+ ")0" + memoryOrderScope + ");\n";
|
|
}
|
|
else
|
|
{
|
|
return " oldValues[tid] = atomic_fetch_sub" + postfix
|
|
+ "(&destMemory[0], tid + 3 +((("
|
|
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
|
|
+ DataType().AddSubOperandTypeName() + ")-1)*8)"
|
|
+ memoryOrderScope + ");\n";
|
|
}
|
|
}
|
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
|
|
MemoryOrder());
|
|
oldValues[tid] = host_atomic_fetch_sub(
|
|
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
|
}
|
|
else
|
|
{
|
|
oldValues[tid] =
|
|
host_atomic_fetch_sub(&destMemory[0],
|
|
(HostDataType)tid + 3
|
|
+ (((HostDataType)tid + 3)
|
|
<< (sizeof(HostDataType) - 1) * 8),
|
|
MemoryOrder());
|
|
}
|
|
}
|
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
expected = StartValue();
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (whichDestValue == 0)
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
expected -= startRefValues[i];
|
|
}
|
|
else
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
expected -= (HostDataType)i + 3
|
|
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
|
|
}
|
|
return true;
|
|
}
|
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
|
const std::vector<HostAtomicType> &testValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (whichDestValue == 0)
|
|
return std::abs(
|
|
static_cast<double>(expected
|
|
- static_cast<HostDataType>(
|
|
testValues[whichDestValue])))
|
|
> max_error;
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
|
testValues,
|
|
whichDestValue);
|
|
}
|
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
|
HostAtomicType *finalValues) override
|
|
{
|
|
if (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
correct = true;
|
|
for (cl_uint i = 1; i < threadCount; i++)
|
|
{
|
|
if (refValues[i] != StartValue())
|
|
{
|
|
log_error("Thread %d found %d mismatch(es)\n", i,
|
|
(cl_uint)refValues[i]);
|
|
correct = false;
|
|
}
|
|
}
|
|
return !correct;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::VerifyRefs(correct,
|
|
threadCount,
|
|
refValues,
|
|
finalValues);
|
|
}
|
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue) override
|
|
{
|
|
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0;
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
|
|
return 0;
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
return threadCount;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::NumResults(threadCount,
|
|
deviceID);
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
|
|
if (gFloatAtomicsSupported)
|
|
{
|
|
CBasicTestFetchSub<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
|
TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_double.Execute(deviceID, context, queue, num_elements));
|
|
|
|
CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
|
TYPE_ATOMIC_FLOAT, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_float.Execute(deviceID, context, queue, num_elements));
|
|
|
|
CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
|
TYPE_ATOMIC_HALF, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_half.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_sub)
|
|
{
|
|
return test_atomic_fetch_sub_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_sub)
|
|
{
|
|
return test_atomic_fetch_sub_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchOr
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue(0);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
cl_uint numBits = DataType().Size(deviceID) * 8;
|
|
|
|
return (threadCount + numBits - 1) / numBits;
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return std::string(" size_t numBits = sizeof(")
|
|
+ DataType().RegularTypeName()
|
|
+ ") * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - (whichResult * numBits);\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_fetch_or"
|
|
+ postfix + "(&destMemory[whichResult], (("
|
|
+ DataType().RegularTypeName() + ")1 << bitIndex) "
|
|
+ memoryOrderScope + ");\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
size_t numBits = sizeof(HostDataType) * 8;
|
|
size_t whichResult = tid / numBits;
|
|
size_t bitIndex = tid - (whichResult * numBits);
|
|
|
|
oldValues[tid] =
|
|
host_atomic_fetch_or(&destMemory[whichResult],
|
|
((HostDataType)1 << bitIndex), MemoryOrder());
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
|
|
/ (sizeof(HostDataType) * 8);
|
|
if (whichDestValue < numValues - 1)
|
|
{
|
|
expected = ~(HostDataType)0;
|
|
return true;
|
|
}
|
|
// Last item doesn't get or'ed on every bit, so we have to mask away
|
|
cl_uint numBits =
|
|
threadCount - whichDestValue * (sizeof(HostDataType) * 8);
|
|
expected = StartValue();
|
|
for (cl_uint i = 0; i < numBits; i++)
|
|
expected |= ((HostDataType)1 << i);
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_or_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_or)
|
|
{
|
|
return test_atomic_fetch_or_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_or)
|
|
{
|
|
return test_atomic_fetch_or_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchXor
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue((HostDataType)0x2f08ab418ba0541LL);
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return std::string(" int numBits = sizeof(")
|
|
+ DataType().RegularTypeName()
|
|
+ ") * 8;\n"
|
|
" int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_fetch_xor"
|
|
+ postfix + "(&destMemory[0], ((" + DataType().RegularTypeName()
|
|
+ ")1 << bitIndex) " + memoryOrderScope + ");\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
int numBits = sizeof(HostDataType) * 8;
|
|
int bitIndex = (numBits - 1) * (tid + 1) / threadCount;
|
|
|
|
oldValues[tid] = host_atomic_fetch_xor(
|
|
&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
int numBits = sizeof(HostDataType) * 8;
|
|
expected = StartValue();
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
int bitIndex = (numBits - 1) * (i + 1) / threadCount;
|
|
expected ^= ((HostDataType)1 << bitIndex);
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_xor_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_xor)
|
|
{
|
|
return test_atomic_fetch_xor_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_xor)
|
|
{
|
|
return test_atomic_fetch_xor_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchAnd
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue(~(HostDataType)0);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
cl_uint numBits = DataType().Size(deviceID) * 8;
|
|
|
|
return (threadCount + numBits - 1) / numBits;
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return std::string(" size_t numBits = sizeof(")
|
|
+ DataType().RegularTypeName()
|
|
+ ") * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - (whichResult * numBits);\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_fetch_and"
|
|
+ postfix + "(&destMemory[whichResult], ~(("
|
|
+ DataType().RegularTypeName() + ")1 << bitIndex) "
|
|
+ memoryOrderScope + ");\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
size_t numBits = sizeof(HostDataType) * 8;
|
|
size_t whichResult = tid / numBits;
|
|
size_t bitIndex = tid - (whichResult * numBits);
|
|
|
|
oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult],
|
|
~((HostDataType)1 << bitIndex),
|
|
MemoryOrder());
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
|
|
/ (sizeof(HostDataType) * 8);
|
|
if (whichDestValue < numValues - 1)
|
|
{
|
|
expected = 0;
|
|
return true;
|
|
}
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits =
|
|
threadCount - whichDestValue * (sizeof(HostDataType) * 8);
|
|
expected = StartValue();
|
|
for (size_t i = 0; i < numBits; i++)
|
|
expected &= ~((HostDataType)1 << i);
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_and_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_and)
|
|
{
|
|
return test_atomic_fetch_and_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_and)
|
|
{
|
|
return test_atomic_fetch_and_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchOrAnd
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
|
|
CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue(0);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
|
|
}
|
|
// each thread modifies (with OR and AND operations) and verifies
|
|
// only one bit in atomic variable
|
|
// other bits are modified by other threads but it must not affect current
|
|
// thread operation
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return std::string(" int bits = sizeof(")
|
|
+ DataType().RegularTypeName() + ")*8;\n"
|
|
+ " size_t valueInd = tid/bits;\n"
|
|
" "
|
|
+ DataType().RegularTypeName() + " value, bitMask = ("
|
|
+ DataType().RegularTypeName()
|
|
+ ")1 << tid%bits;\n"
|
|
" oldValues[tid] = 0;\n"
|
|
" for(int i = 0; i < "
|
|
+ IterationsStr()
|
|
+ "; i++)\n"
|
|
" {\n"
|
|
" value = atomic_fetch_or"
|
|
+ postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
|
|
+ ");\n"
|
|
" if(value & bitMask) // bit should be set to 0\n"
|
|
" oldValues[tid]++;\n"
|
|
" value = atomic_fetch_and"
|
|
+ postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope
|
|
+ ");\n"
|
|
" if(!(value & bitMask)) // bit should be set to 1\n"
|
|
" oldValues[tid]++;\n"
|
|
" }\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
int bits = sizeof(HostDataType) * 8;
|
|
size_t valueInd = tid / bits;
|
|
HostDataType value, bitMask = (HostDataType)1 << tid % bits;
|
|
oldValues[tid] = 0;
|
|
for (int i = 0; i < Iterations(); i++)
|
|
{
|
|
value = host_atomic_fetch_or(destMemory + valueInd, bitMask,
|
|
MemoryOrder());
|
|
if (value & bitMask) // bit should be set to 0
|
|
oldValues[tid]++;
|
|
value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask,
|
|
MemoryOrder());
|
|
if (!(value & bitMask)) // bit should be set to 1
|
|
oldValues[tid]++;
|
|
}
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
expected = 0;
|
|
return true;
|
|
}
|
|
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
|
HostDataType *refValues,
|
|
HostAtomicType *finalValues)
|
|
{
|
|
correct = true;
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
if (refValues[i] > 0)
|
|
{
|
|
log_error("Thread %d found %d mismatch(es)\n", i,
|
|
(cl_uint)refValues[i]);
|
|
correct = false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_orand_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
|
|
TYPE_ATOMIC_UINT, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(
|
|
TYPE_ATOMIC_LONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_orand)
|
|
{
|
|
return test_atomic_fetch_orand_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_orand)
|
|
{
|
|
return test_atomic_fetch_orand_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchXor2
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
|
|
CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue(0);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
|
|
}
|
|
// each thread modifies (with XOR operation) and verifies
|
|
// only one bit in atomic variable
|
|
// other bits are modified by other threads but it must not affect current
|
|
// thread operation
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
return std::string(" int bits = sizeof(")
|
|
+ DataType().RegularTypeName() + ")*8;\n"
|
|
+ " size_t valueInd = tid/bits;\n"
|
|
" "
|
|
+ DataType().RegularTypeName() + " value, bitMask = ("
|
|
+ DataType().RegularTypeName()
|
|
+ ")1 << tid%bits;\n"
|
|
" oldValues[tid] = 0;\n"
|
|
" for(int i = 0; i < "
|
|
+ IterationsStr()
|
|
+ "; i++)\n"
|
|
" {\n"
|
|
" value = atomic_fetch_xor"
|
|
+ postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
|
|
+ ");\n"
|
|
" if(value & bitMask) // bit should be set to 0\n"
|
|
" oldValues[tid]++;\n"
|
|
" value = atomic_fetch_xor"
|
|
+ postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
|
|
+ ");\n"
|
|
" if(!(value & bitMask)) // bit should be set to 1\n"
|
|
" oldValues[tid]++;\n"
|
|
" }\n";
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
int bits = sizeof(HostDataType) * 8;
|
|
size_t valueInd = tid / bits;
|
|
HostDataType value, bitMask = (HostDataType)1 << tid % bits;
|
|
oldValues[tid] = 0;
|
|
for (int i = 0; i < Iterations(); i++)
|
|
{
|
|
value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
|
|
MemoryOrder());
|
|
if (value & bitMask) // bit should be set to 0
|
|
oldValues[tid]++;
|
|
value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
|
|
MemoryOrder());
|
|
if (!(value & bitMask)) // bit should be set to 1
|
|
oldValues[tid]++;
|
|
}
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
expected = 0;
|
|
return true;
|
|
}
|
|
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
|
HostDataType *refValues,
|
|
HostAtomicType *finalValues)
|
|
{
|
|
correct = true;
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
if (refValues[i] > 0)
|
|
{
|
|
log_error("Thread %d found %d mismatches\n", i,
|
|
(cl_uint)refValues[i]);
|
|
correct = false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_xor2_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_xor2)
|
|
{
|
|
return test_atomic_fetch_xor2_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_xor2)
|
|
{
|
|
return test_atomic_fetch_xor2_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchMin
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
double min_range;
|
|
double max_range;
|
|
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM),
|
|
min_range(-999.0), max_range(999.0)
|
|
{
|
|
StartValue(DataType().MaxValue());
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::OldValueCheck(false);
|
|
}
|
|
}
|
|
std::string ProgramCore() override
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
return " atomic_fetch_min" + postfix
|
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
|
|
+ " oldValues[tid] = atomic_fetch_min" + postfix
|
|
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
|
+ ")0" + memoryOrderScope + ");\n";
|
|
}
|
|
else
|
|
{
|
|
return " oldValues[tid] = atomic_fetch_min" + postfix
|
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope
|
|
+ ");\n";
|
|
}
|
|
}
|
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
host_atomic_fetch_min(&destMemory[0], oldValues[tid],
|
|
MemoryOrder());
|
|
oldValues[tid] = host_atomic_fetch_min(
|
|
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
|
}
|
|
else
|
|
{
|
|
oldValues[tid] = host_atomic_fetch_min(
|
|
&destMemory[0], oldValues[tid], MemoryOrder());
|
|
}
|
|
}
|
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
|
MTdata d) override
|
|
{
|
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
startRefValues[i] = cl_half_from_float(
|
|
get_random_float(min_range, max_range, d),
|
|
gHalfRoundingMode);
|
|
}
|
|
}
|
|
else if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
startRefValues[i] = get_random_float(min_range, max_range, d);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
startRefValues[i] = genrand_int32(d);
|
|
if (sizeof(HostDataType) >= 8)
|
|
{
|
|
cl_ulong v = startRefValues[i];
|
|
v |= (cl_ulong)genrand_int32(d) << 16;
|
|
startRefValues[i] = v;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
expected = StartValue();
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (whichDestValue == 0)
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
if (startRefValues[i] < expected)
|
|
expected = startRefValues[i];
|
|
}
|
|
else
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
if (startRefValues[i] < expected) expected = startRefValues[i];
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
|
const std::vector<HostAtomicType> &testValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (whichDestValue == 0)
|
|
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
|
IsTestNotAsExpected(expected, testValues, whichDestValue);
|
|
return false; // ignore all but 0 which stores final result
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
|
testValues,
|
|
whichDestValue);
|
|
}
|
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
|
HostAtomicType *finalValues) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
correct = true;
|
|
for (cl_uint i = 1; i < threadCount; i++)
|
|
{
|
|
if (refValues[i] != StartValue())
|
|
{
|
|
log_error(
|
|
"Thread %d found %lf mismatch(es), start value=%lf\n",
|
|
i, (double)refValues[i], (double)StartValue());
|
|
correct = false;
|
|
}
|
|
}
|
|
return !correct;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::VerifyRefs(correct,
|
|
threadCount,
|
|
refValues,
|
|
finalValues);
|
|
}
|
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue) override
|
|
{
|
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
return threadCount;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::NumResults(threadCount,
|
|
deviceID);
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_min_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
|
|
if (gFloatAtomicsSupported)
|
|
{
|
|
CBasicTestFetchMin<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
|
TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_double.Execute(deviceID, context, queue, num_elements));
|
|
|
|
CBasicTestFetchMin<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
|
TYPE_ATOMIC_HALF, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_half.Execute(deviceID, context, queue, num_elements));
|
|
|
|
CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
|
TYPE_ATOMIC_FLOAT, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_float.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_min)
|
|
{
|
|
return test_atomic_fetch_min_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_min)
|
|
{
|
|
return test_atomic_fetch_min_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFetchMax
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
double min_range;
|
|
double max_range;
|
|
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM),
|
|
min_range(-999.0), max_range(999.0)
|
|
{
|
|
StartValue(DataType().MinValue());
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::OldValueCheck(false);
|
|
}
|
|
}
|
|
std::string ProgramCore() override
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
if constexpr (
|
|
std::is_same_v<
|
|
HostDataType,
|
|
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
{
|
|
return " atomic_fetch_max" + postfix
|
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
|
|
+ " oldValues[tid] = atomic_fetch_max" + postfix
|
|
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
|
|
+ ")0" + memoryOrderScope + ");\n";
|
|
}
|
|
else
|
|
{
|
|
return " oldValues[tid] = atomic_fetch_max" + postfix
|
|
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope
|
|
+ ");\n";
|
|
}
|
|
}
|
|
void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
host_atomic_fetch_max(&destMemory[0], oldValues[tid],
|
|
MemoryOrder());
|
|
oldValues[tid] = host_atomic_fetch_max(
|
|
&destMemory[tid], (HostDataType)0, MemoryOrder());
|
|
}
|
|
else
|
|
{
|
|
oldValues[tid] = host_atomic_fetch_max(
|
|
&destMemory[0], oldValues[tid], MemoryOrder());
|
|
}
|
|
}
|
|
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
|
MTdata d) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
startRefValues[i] = static_cast<HostDataType>(
|
|
get_random_float(min_range, max_range, d));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
startRefValues[i] = genrand_int32(d);
|
|
if (sizeof(HostDataType) >= 8)
|
|
{
|
|
cl_ulong v = startRefValues[i];
|
|
v |= (cl_ulong)genrand_int32(d) << 16;
|
|
startRefValues[i] = v;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
expected = StartValue();
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (whichDestValue == 0)
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
if (startRefValues[i] > expected)
|
|
expected = startRefValues[i];
|
|
}
|
|
else
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
{
|
|
if (startRefValues[i] > expected) expected = startRefValues[i];
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
bool IsTestNotAsExpected(const HostDataType &expected,
|
|
const std::vector<HostAtomicType> &testValues,
|
|
cl_uint whichDestValue) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
if (whichDestValue == 0)
|
|
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
|
IsTestNotAsExpected(expected, testValues, whichDestValue);
|
|
return false; // ignore all but 0 which stores final result
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
|
|
testValues,
|
|
whichDestValue);
|
|
}
|
|
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
|
|
HostAtomicType *finalValues) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
correct = true;
|
|
for (cl_uint i = 1; i < threadCount; i++)
|
|
{
|
|
if (refValues[i] != StartValue())
|
|
{
|
|
log_error(
|
|
"Thread %d found %lf mismatch(es), start value=%lf\n",
|
|
i, (double)refValues[i], (double)StartValue());
|
|
correct = false;
|
|
}
|
|
}
|
|
return !correct;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::VerifyRefs(correct,
|
|
threadCount,
|
|
refValues,
|
|
finalValues);
|
|
}
|
|
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue) override
|
|
{
|
|
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
|
|
{
|
|
if (LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0; // skip test - not applicable
|
|
|
|
if (!LocalMemory()
|
|
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
|
|
== 0)
|
|
return 0;
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
|
|
{
|
|
if constexpr (is_host_fp_v<HostDataType>)
|
|
{
|
|
return threadCount;
|
|
}
|
|
return CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::NumResults(threadCount,
|
|
deviceID);
|
|
}
|
|
};
|
|
|
|
static int test_atomic_fetch_max_generic(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue,
|
|
int num_elements, bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
|
|
TYPE_ATOMIC_ULONG, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
|
|
if (gFloatAtomicsSupported)
|
|
{
|
|
CBasicTestFetchMax<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
|
|
TYPE_ATOMIC_DOUBLE, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_double.Execute(deviceID, context, queue, num_elements));
|
|
|
|
CBasicTestFetchMax<HOST_ATOMIC_HALF, HOST_HALF> test_half(
|
|
TYPE_ATOMIC_HALF, useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_half.Execute(deviceID, context, queue, num_elements));
|
|
|
|
CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
|
|
TYPE_ATOMIC_FLOAT, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_float.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
|
|
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fetch_max)
|
|
{
|
|
return test_atomic_fetch_max_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fetch_max)
|
|
{
|
|
return test_atomic_fetch_max_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFlag
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
|
|
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::MemoryOrderScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue(0);
|
|
OldValueCheck(false);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
return threadCount;
|
|
}
|
|
TExplicitMemoryOrderType MemoryOrderForClear()
|
|
{
|
|
// Memory ordering for atomic_flag_clear function
|
|
// ("shall not be memory_order_acquire nor memory_order_acq_rel")
|
|
if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED;
|
|
if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE;
|
|
return MemoryOrder();
|
|
}
|
|
std::string MemoryOrderScopeStrForClear()
|
|
{
|
|
std::string orderStr;
|
|
if (MemoryOrder() != MEMORY_ORDER_EMPTY)
|
|
orderStr = std::string(", ")
|
|
+ get_memory_order_type_name(MemoryOrderForClear());
|
|
return orderStr + MemoryScopeStr();
|
|
}
|
|
|
|
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue)
|
|
{
|
|
// This test assumes support for the memory_scope_device scope in the
|
|
// case that LocalMemory() == false. Therefore we should skip this test
|
|
// in that configuration on a 3.0 driver since supporting the
|
|
// memory_scope_device scope is optionaly.
|
|
if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
|
|
{
|
|
if (!LocalMemory()
|
|
&& !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
|
|
{
|
|
log_info("Skipping atomic_flag test due to use of "
|
|
"atomic_scope_device "
|
|
"which is optionally not supported on this device\n");
|
|
return 0; // skip test - not applicable
|
|
}
|
|
}
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::string memoryOrderScope = MemoryOrderScopeStr();
|
|
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
|
|
std::string program =
|
|
" uint cnt, stop = 0;\n"
|
|
" for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread "
|
|
"must find critical section where it is the first visitor\n"
|
|
" {\n"
|
|
" bool set = atomic_flag_test_and_set"
|
|
+ postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
|
|
if (MemoryOrder() == MEMORY_ORDER_RELAXED
|
|
|| MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory())
|
|
program += " atomic_work_item_fence("
|
|
+ std::string(
|
|
LocalMemory()
|
|
? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
|
|
: "CLK_GLOBAL_MEM_FENCE, ")
|
|
+ "memory_order_acquire,"
|
|
+ std::string(LocalMemory()
|
|
? "memory_scope_work_group"
|
|
: (UseSVM() ? "memory_scope_all_svm_devices"
|
|
: "memory_scope_device"))
|
|
+ ");\n";
|
|
|
|
program += " if (!set)\n"
|
|
" {\n";
|
|
|
|
if (LocalMemory())
|
|
program += " uint csIndex = "
|
|
"get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
|
|
else
|
|
program += " uint csIndex = cnt;\n";
|
|
|
|
std::ostringstream csNotVisited;
|
|
csNotVisited << CRITICAL_SECTION_NOT_VISITED;
|
|
program += " // verify that thread is the first visitor\n"
|
|
" if(oldValues[csIndex] == "
|
|
+ csNotVisited.str()
|
|
+ ")\n"
|
|
" {\n"
|
|
" oldValues[csIndex] = tid; // set the winner id for this "
|
|
"critical section\n"
|
|
" stop = 1;\n"
|
|
" }\n";
|
|
|
|
if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
|
|
|| MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory())
|
|
program += " atomic_work_item_fence("
|
|
+ std::string(
|
|
LocalMemory()
|
|
? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
|
|
: "CLK_GLOBAL_MEM_FENCE, ")
|
|
+ "memory_order_release,"
|
|
+ std::string(LocalMemory()
|
|
? "memory_scope_work_group"
|
|
: (UseSVM() ? "memory_scope_all_svm_devices"
|
|
: "memory_scope_device"))
|
|
+ ");\n";
|
|
|
|
program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]"
|
|
+ MemoryOrderScopeStrForClear()
|
|
+ ");\n"
|
|
" }\n"
|
|
" }\n";
|
|
return program;
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
cl_uint cnt, stop = 0;
|
|
for (cnt = 0; !stop && cnt < threadCount;
|
|
cnt++) // each thread must find critical section where it is the
|
|
// first visitor\n"
|
|
{
|
|
if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
|
|
{
|
|
cl_uint csIndex = cnt;
|
|
// verify that thread is the first visitor\n"
|
|
if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
|
|
{
|
|
oldValues[csIndex] =
|
|
tid; // set the winner id for this critical section\n"
|
|
stop = 1;
|
|
}
|
|
host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
|
|
}
|
|
}
|
|
}
|
|
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
|
|
HostDataType *startRefValues,
|
|
cl_uint whichDestValue)
|
|
{
|
|
expected = StartValue();
|
|
return true;
|
|
}
|
|
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
|
MTdata d)
|
|
{
|
|
for (cl_uint i = 0; i < threadCount; i++)
|
|
startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
|
|
return true;
|
|
}
|
|
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
|
HostDataType *refValues,
|
|
HostAtomicType *finalValues)
|
|
{
|
|
correct = true;
|
|
/* We are expecting unique values from 0 to threadCount-1 (each critical
|
|
* section must be visited) */
|
|
/* These values must be distributed across refValues array */
|
|
std::vector<bool> tidFound(threadCount);
|
|
cl_uint i;
|
|
|
|
for (i = 0; i < threadCount; i++)
|
|
{
|
|
cl_uint value = (cl_uint)refValues[i];
|
|
if (value == CRITICAL_SECTION_NOT_VISITED)
|
|
{
|
|
// Special initial value
|
|
log_error("ERROR: Critical section %u not visited\n", i);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
if (value >= threadCount)
|
|
{
|
|
log_error(
|
|
"ERROR: Reference value %u outside of valid range! (%u)\n",
|
|
i, value);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
if (tidFound[value])
|
|
{
|
|
log_error("ERROR: Value (%u) occurred more thane once\n",
|
|
value);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
tidFound[value] = true;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
static int test_atomic_flag_generic(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_flag.Execute(deviceID, context, queue, num_elements));
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_flag)
|
|
{
|
|
return test_atomic_flag_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_flag)
|
|
{
|
|
return test_atomic_flag_generic(device, context, queue, num_elements, true);
|
|
}
|
|
|
|
template <typename HostAtomicType, typename HostDataType>
|
|
class CBasicTestFence
|
|
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
|
|
struct TestDefinition
|
|
{
|
|
bool op1IsFence;
|
|
TExplicitMemoryOrderType op1MemOrder;
|
|
bool op2IsFence;
|
|
TExplicitMemoryOrderType op2MemOrder;
|
|
};
|
|
|
|
public:
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::DeclaredInProgram;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
|
|
using CBasicTestMemOrderScope<HostAtomicType,
|
|
HostDataType>::CurrentGroupSize;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
|
|
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
|
|
CBasicTestFence(TExplicitAtomicType dataType, bool useSVM)
|
|
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
|
|
useSVM)
|
|
{
|
|
StartValue(0);
|
|
OldValueCheck(false);
|
|
}
|
|
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
|
|
{
|
|
return threadCount;
|
|
}
|
|
virtual cl_uint NumNonAtomicVariablesPerThread()
|
|
{
|
|
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1;
|
|
if (LocalMemory())
|
|
{
|
|
if (gIsEmbedded)
|
|
{
|
|
if (CurrentGroupSize() > 512) CurrentGroupSize(512);
|
|
return 2; // 1KB of local memory required by spec. Clamp group
|
|
// size to 512 and allow 2 variables per thread
|
|
}
|
|
else
|
|
return 32 * 1024 / 8 / CurrentGroupSize()
|
|
- 1; // 32KB of local memory required by spec
|
|
}
|
|
return 256;
|
|
}
|
|
virtual std::string SingleTestName()
|
|
{
|
|
std::string testName;
|
|
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
|
|
testName += "seq_cst fence, ";
|
|
else
|
|
testName +=
|
|
std::string(get_memory_order_type_name(_subCase.op1MemOrder))
|
|
.substr(sizeof("memory_order"))
|
|
+ (_subCase.op1IsFence ? " fence" : " atomic")
|
|
+ " synchronizes-with "
|
|
+ std::string(get_memory_order_type_name(_subCase.op2MemOrder))
|
|
.substr(sizeof("memory_order"))
|
|
+ (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
|
|
testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
|
|
testName += std::string(", ")
|
|
+ std::string(get_memory_scope_type_name(MemoryScope()))
|
|
.substr(sizeof("memory"));
|
|
return testName;
|
|
}
|
|
virtual bool SVMDataBufferAllSVMConsistent()
|
|
{
|
|
// Although memory_scope_all_devices doesn't mention SVM it is just an
|
|
// alias for memory_scope_all_svm_devices. So both scopes interact with
|
|
// SVM allocations, on devices that support those, just the same.
|
|
return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
|
|
|| MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
|
|
}
|
|
virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
|
|
cl_context context,
|
|
cl_command_queue queue)
|
|
{
|
|
int error = 0;
|
|
// execute 3 (maximum) sub cases for each memory order
|
|
for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
|
|
{
|
|
EXECUTE_TEST(
|
|
error,
|
|
(CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
|
ExecuteForEachParameterSet(deviceID, context, queue)));
|
|
}
|
|
return error;
|
|
}
|
|
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue)
|
|
{
|
|
if (DeclaredInProgram() || UsedInFunction())
|
|
return 0; // skip test - not applicable - no overloaded fence
|
|
// functions for different address spaces
|
|
if (MemoryOrder() == MEMORY_ORDER_EMPTY
|
|
|| MemoryScope()
|
|
== MEMORY_SCOPE_EMPTY) // empty 'scope' not required since
|
|
// opencl20-openclc-rev15
|
|
return 0; // skip test - not applicable
|
|
if ((UseSVM() || gHost) && LocalMemory())
|
|
return 0; // skip test - not applicable for SVM and local memory
|
|
struct TestDefinition acqTests[] = {
|
|
// {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
|
|
{ false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
|
|
{ true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
|
|
{ true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
|
|
};
|
|
struct TestDefinition relTests[] = {
|
|
{ true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
|
|
{ true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
|
|
};
|
|
struct TestDefinition arTests[] = {
|
|
{ false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
|
|
{ true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
|
|
{ true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
|
|
};
|
|
switch (MemoryOrder())
|
|
{
|
|
case MEMORY_ORDER_ACQUIRE:
|
|
if (_subCaseId
|
|
>= sizeof(acqTests) / sizeof(struct TestDefinition))
|
|
return 0;
|
|
_subCase = acqTests[_subCaseId];
|
|
break;
|
|
case MEMORY_ORDER_RELEASE:
|
|
if (_subCaseId
|
|
>= sizeof(relTests) / sizeof(struct TestDefinition))
|
|
return 0;
|
|
_subCase = relTests[_subCaseId];
|
|
break;
|
|
case MEMORY_ORDER_ACQ_REL:
|
|
if (_subCaseId
|
|
>= sizeof(arTests) / sizeof(struct TestDefinition))
|
|
return 0;
|
|
_subCase = arTests[_subCaseId];
|
|
break;
|
|
case MEMORY_ORDER_SEQ_CST:
|
|
if (_subCaseId != 0) // one special case only
|
|
return 0;
|
|
break;
|
|
default: return 0;
|
|
}
|
|
LocalRefValues(LocalMemory());
|
|
return CBasicTestMemOrderScope<
|
|
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
|
|
queue);
|
|
}
|
|
virtual std::string ProgramHeader(cl_uint maxNumDestItems)
|
|
{
|
|
std::string header;
|
|
if (gOldAPI)
|
|
{
|
|
if (MemoryScope() == MEMORY_SCOPE_EMPTY)
|
|
{
|
|
header += "#define atomic_work_item_fence(x,y) "
|
|
" mem_fence(x)\n";
|
|
}
|
|
else
|
|
{
|
|
header += "#define atomic_work_item_fence(x,y,z) "
|
|
" mem_fence(x)\n";
|
|
}
|
|
}
|
|
return header
|
|
+ CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
|
|
ProgramHeader(maxNumDestItems);
|
|
}
|
|
virtual std::string ProgramCore()
|
|
{
|
|
std::ostringstream naValues;
|
|
naValues << NumNonAtomicVariablesPerThread();
|
|
std::string program, fenceType, nonAtomic;
|
|
if (LocalMemory())
|
|
{
|
|
program = " size_t myId = get_local_id(0), hisId = "
|
|
"get_local_size(0)-1-myId;\n";
|
|
fenceType = "CLK_LOCAL_MEM_FENCE";
|
|
nonAtomic = "localValues";
|
|
}
|
|
else
|
|
{
|
|
program = " size_t myId = tid, hisId = threadCount-1-tid;\n";
|
|
fenceType = "CLK_GLOBAL_MEM_FENCE";
|
|
nonAtomic = "oldValues";
|
|
}
|
|
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
|
|
{
|
|
// All threads are divided into pairs.
|
|
// Each thread has its own atomic variable and performs the
|
|
// following actions:
|
|
// - increments its own variable
|
|
// - performs fence operation to propagate its value and to see
|
|
// value from other thread
|
|
// - reads value from other thread's variable
|
|
// - repeats the above steps when both values are the same (and less
|
|
// than 500000)
|
|
// - stores the last value read from other thread (in additional
|
|
// variable) At the end of execution at least one thread should know
|
|
// the last value from other thread
|
|
program += std::string("") + " " + DataType().RegularTypeName()
|
|
+ " myValue = 0, hisValue; \n"
|
|
" do {\n"
|
|
" myValue++;\n"
|
|
" atomic_store_explicit(&destMemory[myId], myValue, "
|
|
"memory_order_relaxed"
|
|
+ MemoryScopeStr()
|
|
+ ");\n"
|
|
" atomic_work_item_fence("
|
|
+ fenceType + ", memory_order_seq_cst" + MemoryScopeStr()
|
|
+ "); \n"
|
|
" hisValue = atomic_load_explicit(&destMemory[hisId], "
|
|
"memory_order_relaxed"
|
|
+ MemoryScopeStr()
|
|
+ ");\n"
|
|
" } while(myValue == hisValue && myValue < 500000);\n"
|
|
" "
|
|
+ nonAtomic + "[myId] = hisValue; \n";
|
|
}
|
|
else
|
|
{
|
|
// Each thread modifies one of its non-atomic variables, increments
|
|
// value of its atomic variable and reads values from another thread
|
|
// in typical synchronizes-with scenario with:
|
|
// - non-atomic variable (at index A) modification (value change
|
|
// from 0 to A)
|
|
// - release operation (additional fence or within atomic) + atomic
|
|
// variable modification (value A)
|
|
// - atomic variable read (value B) + acquire operation (additional
|
|
// fence or within atomic)
|
|
// - non-atomic variable (at index B) read (value C)
|
|
// Each thread verifies dependency between atomic and non-atomic
|
|
// value read from another thread The following condition must be
|
|
// true: B == C
|
|
program += std::string("") + " " + DataType().RegularTypeName()
|
|
+ " myValue = 0, hisAtomicValue, hisValue; \n"
|
|
" do {\n"
|
|
" myValue++;\n"
|
|
" "
|
|
+ nonAtomic + "[myId*" + naValues.str()
|
|
+ "+myValue] = myValue;\n";
|
|
if (_subCase.op1IsFence)
|
|
program += std::string("") + " atomic_work_item_fence("
|
|
+ fenceType + ", "
|
|
+ get_memory_order_type_name(_subCase.op1MemOrder)
|
|
+ MemoryScopeStr()
|
|
+ "); \n"
|
|
" atomic_store_explicit(&destMemory[myId], myValue, "
|
|
"memory_order_relaxed"
|
|
+ MemoryScopeStr() + ");\n";
|
|
else
|
|
program += std::string("")
|
|
+ " atomic_store_explicit(&destMemory[myId], myValue, "
|
|
+ get_memory_order_type_name(_subCase.op1MemOrder)
|
|
+ MemoryScopeStr() + ");\n";
|
|
if (_subCase.op2IsFence)
|
|
program += std::string("")
|
|
+ " hisAtomicValue = "
|
|
"atomic_load_explicit(&destMemory[hisId], "
|
|
"memory_order_relaxed"
|
|
+ MemoryScopeStr()
|
|
+ ");\n"
|
|
" atomic_work_item_fence("
|
|
+ fenceType + ", "
|
|
+ get_memory_order_type_name(_subCase.op2MemOrder)
|
|
+ MemoryScopeStr() + "); \n";
|
|
else
|
|
program += std::string("")
|
|
+ " hisAtomicValue = "
|
|
"atomic_load_explicit(&destMemory[hisId], "
|
|
+ get_memory_order_type_name(_subCase.op2MemOrder)
|
|
+ MemoryScopeStr() + ");\n";
|
|
program += " hisValue = " + nonAtomic + "[hisId*"
|
|
+ naValues.str() + "+hisAtomicValue]; \n";
|
|
if (LocalMemory())
|
|
program += " hisId = (hisId+1)%get_local_size(0);\n";
|
|
else
|
|
program += " hisId = (hisId+1)%threadCount;\n";
|
|
program += " } while(hisAtomicValue == hisValue && myValue < "
|
|
+ naValues.str()
|
|
+ "-1);\n"
|
|
" if(hisAtomicValue != hisValue)\n"
|
|
" { // fail\n"
|
|
" atomic_store_explicit(&destMemory[myId], myValue-1,"
|
|
" memory_order_relaxed, memory_scope_work_group);\n";
|
|
if (LocalMemory())
|
|
program += " hisId = "
|
|
"(hisId+get_local_size(0)-1)%get_local_size(0);\n";
|
|
else
|
|
program += " hisId = (hisId+threadCount-1)%threadCount;\n";
|
|
program += " if(myValue+1 < " + naValues.str()
|
|
+ ")\n"
|
|
" "
|
|
+ nonAtomic + "[myId*" + naValues.str()
|
|
+ "+myValue+1] = hisId;\n"
|
|
" if(myValue+2 < "
|
|
+ naValues.str()
|
|
+ ")\n"
|
|
" "
|
|
+ nonAtomic + "[myId*" + naValues.str()
|
|
+ "+myValue+2] = hisAtomicValue;\n"
|
|
" if(myValue+3 < "
|
|
+ naValues.str()
|
|
+ ")\n"
|
|
" "
|
|
+ nonAtomic + "[myId*" + naValues.str()
|
|
+ "+myValue+3] = hisValue;\n";
|
|
if (gDebug)
|
|
{
|
|
program += " printf(\"WI %d: atomic value (%d) at index %d "
|
|
"is different than non-atomic value (%d)\\n\", tid, "
|
|
"hisAtomicValue, hisId, hisValue);\n";
|
|
}
|
|
program += " }\n";
|
|
}
|
|
return program;
|
|
}
|
|
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
|
|
volatile HostAtomicType *destMemory,
|
|
HostDataType *oldValues)
|
|
{
|
|
size_t myId = tid, hisId = threadCount - 1 - tid;
|
|
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
|
|
{
|
|
HostDataType myValue = 0, hisValue;
|
|
// CPU thread typically starts faster - wait for GPU thread
|
|
myValue++;
|
|
host_atomic_store<HostAtomicType, HostDataType>(
|
|
&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
|
|
while (host_atomic_load<HostAtomicType, HostDataType>(
|
|
&destMemory[hisId], MEMORY_ORDER_SEQ_CST)
|
|
== 0)
|
|
;
|
|
do
|
|
{
|
|
myValue++;
|
|
host_atomic_store<HostAtomicType, HostDataType>(
|
|
&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
|
|
host_atomic_thread_fence(MemoryOrder());
|
|
hisValue = host_atomic_load<HostAtomicType, HostDataType>(
|
|
&destMemory[hisId], MEMORY_ORDER_RELAXED);
|
|
} while (myValue == hisValue && hisValue < 500000);
|
|
oldValues[tid] = hisValue;
|
|
}
|
|
else
|
|
{
|
|
HostDataType myValue = 0, hisAtomicValue, hisValue;
|
|
do
|
|
{
|
|
myValue++;
|
|
oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] =
|
|
myValue;
|
|
if (_subCase.op1IsFence)
|
|
{
|
|
host_atomic_thread_fence(_subCase.op1MemOrder);
|
|
host_atomic_store<HostAtomicType, HostDataType>(
|
|
&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
|
|
}
|
|
else
|
|
host_atomic_store<HostAtomicType, HostDataType>(
|
|
&destMemory[myId], myValue, _subCase.op1MemOrder);
|
|
if (_subCase.op2IsFence)
|
|
{
|
|
hisAtomicValue =
|
|
host_atomic_load<HostAtomicType, HostDataType>(
|
|
&destMemory[hisId], MEMORY_ORDER_RELAXED);
|
|
host_atomic_thread_fence(_subCase.op2MemOrder);
|
|
}
|
|
else
|
|
hisAtomicValue =
|
|
host_atomic_load<HostAtomicType, HostDataType>(
|
|
&destMemory[hisId], _subCase.op2MemOrder);
|
|
hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread()
|
|
+ hisAtomicValue];
|
|
hisId = (hisId + 1) % threadCount;
|
|
} while (hisAtomicValue == hisValue
|
|
&& myValue
|
|
< (HostDataType)NumNonAtomicVariablesPerThread() - 1);
|
|
if (hisAtomicValue != hisValue)
|
|
{ // fail
|
|
host_atomic_store<HostAtomicType, HostDataType>(
|
|
&destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST);
|
|
if (gDebug)
|
|
{
|
|
hisId = (hisId + threadCount - 1) % threadCount;
|
|
printf("WI %d: atomic value (%d) at index %d is different "
|
|
"than non-atomic value (%d)\n",
|
|
tid, hisAtomicValue, hisId, hisValue);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
|
|
MTdata d)
|
|
{
|
|
for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread();
|
|
i++)
|
|
startRefValues[i] = 0;
|
|
return true;
|
|
}
|
|
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
|
|
HostDataType *refValues,
|
|
HostAtomicType *finalValues)
|
|
{
|
|
correct = true;
|
|
cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
|
|
for (cl_uint workOffset = 0; workOffset < threadCount;
|
|
workOffset += workSize)
|
|
{
|
|
if (workOffset + workSize > threadCount)
|
|
// last workgroup (host threads)
|
|
workSize = threadCount - workOffset;
|
|
for (cl_uint i = 0; i < workSize && workOffset + i < threadCount;
|
|
i++)
|
|
{
|
|
HostAtomicType myValue = finalValues[workOffset + i];
|
|
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
|
|
{
|
|
HostDataType hisValue = refValues[workOffset + i];
|
|
if (myValue == hisValue)
|
|
{
|
|
// a draw - both threads should reach final value
|
|
// 500000
|
|
if (myValue != 500000)
|
|
{
|
|
log_error("ERROR: Invalid reference value #%u (%d "
|
|
"instead of 500000)\n",
|
|
workOffset + i, myValue);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// slower thread (in total order of seq_cst operations)
|
|
// must know last value written by faster thread
|
|
HostAtomicType hisRealValue =
|
|
finalValues[workOffset + workSize - 1 - i];
|
|
HostDataType myValueReadByHim =
|
|
refValues[workOffset + workSize - 1 - i];
|
|
|
|
// who is the winner? - thread with lower private
|
|
// counter value
|
|
if (myValue == hisRealValue) // forbidden result - fence
|
|
// doesn't work
|
|
{
|
|
log_error("ERROR: Atomic counter values #%u and "
|
|
"#%u are the same (%u)\n",
|
|
workOffset + i,
|
|
workOffset + workSize - 1 - i, myValue);
|
|
log_error(
|
|
"ERROR: Both threads have outdated values read "
|
|
"from another thread (%u and %u)\n",
|
|
hisValue, myValueReadByHim);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
if (myValue > hisRealValue) // I'm slower
|
|
{
|
|
if (hisRealValue != hisValue)
|
|
{
|
|
log_error("ERROR: Invalid reference value #%u "
|
|
"(%d instead of %d)\n",
|
|
workOffset + i, hisValue,
|
|
hisRealValue);
|
|
log_error(
|
|
"ERROR: Slower thread #%u should know "
|
|
"value written by faster thread #%u\n",
|
|
workOffset + i,
|
|
workOffset + workSize - 1 - i);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
}
|
|
else // I'm faster
|
|
{
|
|
if (myValueReadByHim != myValue)
|
|
{
|
|
log_error("ERROR: Invalid reference value #%u "
|
|
"(%d instead of %d)\n",
|
|
workOffset + workSize - 1 - i,
|
|
myValueReadByHim, myValue);
|
|
log_error(
|
|
"ERROR: Slower thread #%u should know "
|
|
"value written by faster thread #%u\n",
|
|
workOffset + workSize - 1 - i,
|
|
workOffset + i);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (myValue != NumNonAtomicVariablesPerThread() - 1)
|
|
{
|
|
log_error("ERROR: Invalid atomic value #%u (%d instead "
|
|
"of %d)\n",
|
|
workOffset + i, myValue,
|
|
NumNonAtomicVariablesPerThread() - 1);
|
|
log_error("ERROR: Thread #%u observed invalid values "
|
|
"in other thread's variables\n",
|
|
workOffset + i);
|
|
correct = false;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
size_t _subCaseId;
|
|
struct TestDefinition _subCase;
|
|
};
|
|
|
|
#if 0
|
|
// The tests below are likely incorrect and have been disabled.
|
|
// See https://github.com/KhronosGroup/OpenCL-CTS/issues/2544
|
|
static int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
bool useSVM)
|
|
{
|
|
int error = 0;
|
|
CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_int.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_uint.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_long.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
|
|
useSVM);
|
|
EXECUTE_TEST(error,
|
|
test_ulong.Execute(deviceID, context, queue, num_elements));
|
|
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
|
|
{
|
|
CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
|
|
TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
else
|
|
{
|
|
CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
|
|
TYPE_ATOMIC_INTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_intptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
|
|
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
|
|
TYPE_ATOMIC_SIZE_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error, test_size_t.Execute(deviceID, context, queue, num_elements));
|
|
CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
|
|
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
|
|
EXECUTE_TEST(
|
|
error,
|
|
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
|
|
}
|
|
return error;
|
|
}
|
|
|
|
REGISTER_TEST(atomic_fence)
|
|
{
|
|
return test_atomic_fence_generic(device, context, queue, num_elements,
|
|
false);
|
|
}
|
|
|
|
REGISTER_TEST(svm_atomic_fence)
|
|
{
|
|
return test_atomic_fence_generic(device, context, queue, num_elements,
|
|
true);
|
|
}
|
|
#endif
|