Files
OpenCL-CTS/test_conformance/c11_atomics/test_atomics.cpp
Marcin Hajder 6506421614 Added support for cl_ext_float_atomics in CBasicTestFetchMinSpecialFloats with atomic_float (#2391)
Related to #2142, according to the work plan, extending
CBasicTestFetchMinSpecialFloats with support for atomic_float.
2026-03-10 08:41:40 -07:00

4764 lines
199 KiB
C++

//
// Copyright (c) 2024 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "harness/testHarness.h"
#include "harness/kernelHelpers.h"
#include "harness/typeWrappers.h"
#include "harness/conversions.h"
#include "common.h"
#include "host_atomics.h"
#include <algorithm>
#include <numeric>
#include <sstream>
#include <vector>
template <typename HostAtomicType, typename HostDataType>
class CBasicTestStore
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestStore(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue)
{
if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
|| MemoryOrder() == MEMORY_ORDER_ACQ_REL)
return 0; // skip test - not applicable
if (CheckCapabilities(MemoryScope(), MemoryOrder())
== TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
._type
== TYPE_ATOMIC_HALF)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT)
== 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return " atomic_store" + postfix + "(&destMemory[tid], tid"
+ memoryOrderScope + ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
expected = static_cast<HostDataType>(whichDestValue);
return true;
}
};
static int test_atomic_store_generic(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
bool useSVM)
{
int error = 0;
CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
useSVM);
EXECUTE_TEST(error,
test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error,
test_double.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestStore<HOST_ATOMIC_HALF, HOST_HALF> test_half(TYPE_ATOMIC_HALF,
useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_store)
{
return test_atomic_store_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_store)
{
return test_atomic_store_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType> {
public:
using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
CBasicTestInit(TExplicitAtomicType dataType, bool useSVM)
: CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
{
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
virtual std::string ProgramCore()
{
return " atomic_init(&destMemory[tid], tid);\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
host_atomic_init(&destMemory[tid], (HostDataType)tid);
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
expected = (HostDataType)whichDestValue;
return true;
}
};
static int test_atomic_init_generic(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
bool useSVM)
{
int error = 0;
CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
useSVM);
EXECUTE_TEST(error,
test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error,
test_double.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_init)
{
return test_atomic_init_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_init)
{
return test_atomic_init_generic(device, context, queue, num_elements, true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestLoad
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue)
{
if (MemoryOrder() == MEMORY_ORDER_RELEASE
|| MemoryOrder() == MEMORY_ORDER_ACQ_REL)
return 0; // skip test - not applicable
if (CheckCapabilities(MemoryScope(), MemoryOrder())
== TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
._type
== TYPE_ATOMIC_HALF)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT)
== 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
virtual std::string ProgramCore()
{
// In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
// should be MEMORY_ORDER_RELEASE
std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
std::string memoryOrderScopeStore =
(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
? (", memory_order_release" + MemoryScopeStr())
: memoryOrderScopeLoad;
std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
return " atomic_store" + postfix + "(&destMemory[tid], tid"
+ memoryOrderScopeStore
+ ");\n"
" oldValues[tid] = atomic_load"
+ postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
host_atomic_store(&destMemory[tid], (HostDataType)tid,
MEMORY_ORDER_SEQ_CST);
oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(
&destMemory[tid], MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
expected = static_cast<HostDataType>(whichDestValue);
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
HostDataType *refValues,
HostAtomicType *finalValues)
{
correct = true;
for (cl_uint i = 0; i < threadCount; i++)
{
if (refValues[i] != (HostDataType)i)
{
log_error("Invalid value for thread %u\n", (cl_uint)i);
correct = false;
return true;
}
}
return true;
}
};
static int test_atomic_load_generic(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
bool useSVM)
{
int error = 0;
CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
useSVM);
EXECUTE_TEST(error,
test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error,
test_double.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestLoad<HOST_ATOMIC_HALF, HOST_HALF> test_half(TYPE_ATOMIC_HALF,
useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_load)
{
return test_atomic_load_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_load)
{
return test_atomic_load_generic(device, context, queue, num_elements, true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestExchange
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue(1234);
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue)
{
if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT)
== 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return " oldValues[tid] = atomic_exchange" + postfix
+ "(&destMemory[0], tid" + memoryOrderScope
+ ");\n"
" for(int i = 0; i < "
+ IterationsStr()
+ "; i++)\n"
" oldValues[tid] = atomic_exchange"
+ postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope
+ ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid,
MemoryOrder());
for (int i = 0; i < Iterations(); i++)
oldValues[tid] = host_atomic_exchange(
&destMemory[0], oldValues[tid], MemoryOrder());
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
HostDataType *refValues,
HostAtomicType *finalValues)
{
OldValueCheck(
Iterations() % 2
== 0); // check is valid for even number of iterations only
correct = true;
/* We are expecting values from 0 to size-1 and initial value from
* atomic variable */
/* These values must be distributed across refValues array and atomic
* variable finalVaue[0] */
/* Any repeated value is treated as an error */
std::vector<bool> tidFound(threadCount);
bool startValueFound = false;
cl_uint startVal = static_cast<cl_uint>(StartValue());
for (cl_uint i = 0; i <= threadCount; i++)
{
cl_uint value = 0;
if (i == threadCount)
{
value = static_cast<cl_uint>(
static_cast<HostDataType>(finalValues[0]));
}
else
{
value = static_cast<cl_uint>(refValues[i]);
}
if (value == startVal)
{
// Special initial value
if (startValueFound)
{
log_error("ERROR: Starting reference value (%u) occurred "
"more thane once\n",
(cl_uint)StartValue());
correct = false;
return true;
}
startValueFound = true;
continue;
}
if (value >= threadCount)
{
log_error(
"ERROR: Reference value %u outside of valid range! (%u)\n",
i, value);
correct = false;
return true;
}
if (tidFound[value])
{
log_error("ERROR: Value (%u) occurred more thane once\n",
value);
correct = false;
return true;
}
tidFound[value] = true;
}
return true;
}
};
static int test_atomic_exchange_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(error,
test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(error,
test_double.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestExchange<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_exchange)
{
return test_atomic_exchange_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_exchange)
{
return test_atomic_exchange_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestCompareStrong
: public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
using CBasicTestMemOrder2Scope<HostAtomicType,
HostDataType>::MemoryOrderScope;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue(123456);
OldValueCheck(false);
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue)
{
if (MemoryOrder2() == MEMORY_ORDER_RELEASE
|| MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
return 0; // not allowed as 'failure' argument
if ((MemoryOrder() == MEMORY_ORDER_RELAXED
&& MemoryOrder2() != MEMORY_ORDER_RELAXED)
|| (MemoryOrder() != MEMORY_ORDER_SEQ_CST
&& MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
return 0; // failure argument shall be no stronger than the success
if (CheckCapabilities(MemoryScope(), MemoryOrder())
== TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
if (CheckCapabilities(MemoryScope(), MemoryOrder2())
== TEST_SKIPPED_ITSELF)
return 0; // skip test - not applicable
return CBasicTestMemOrder2Scope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScope();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return std::string(" ") + DataType().RegularTypeName()
+ " expected, previous;\n"
" int successCount = 0;\n"
" oldValues[tid] = tid;\n"
" expected = tid; // force failure at the beginning\n"
" if(atomic_compare_exchange_strong"
+ postfix + "(&destMemory[0], &expected, oldValues[tid]"
+ memoryOrderScope
+ ") || expected == tid)\n"
" oldValues[tid] = threadCount+1; //mark unexpected success "
"with invalid value\n"
" else\n"
" {\n"
" for(int i = 0; i < "
+ IterationsStr()
+ " || successCount == 0; i++)\n"
" {\n"
" previous = expected;\n"
" if(atomic_compare_exchange_strong"
+ postfix + "(&destMemory[0], &expected, oldValues[tid]"
+ memoryOrderScope
+ "))\n"
" {\n"
" oldValues[tid] = expected;\n"
" successCount++;\n"
" }\n"
" else\n"
" {\n"
" if(previous == expected) // spurious failure - "
"shouldn't occur for 'strong'\n"
" {\n"
" oldValues[tid] = threadCount; //mark fail with "
"invalid value\n"
" break;\n"
" }\n"
" }\n"
" }\n"
" }\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
HostDataType expected = (HostDataType)StartValue(), previous;
oldValues[tid] = (HostDataType)tid;
for (int i = 0; i < Iterations(); i++)
{
previous = expected;
if (host_atomic_compare_exchange(&destMemory[0], &expected,
oldValues[tid], MemoryOrder(),
MemoryOrder2()))
oldValues[tid] = expected;
else
{
if (previous == expected) // shouldn't occur for 'strong'
{
oldValues[tid] = threadCount; // mark fail with invalid
// value
}
}
}
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
HostDataType *refValues,
HostAtomicType *finalValues)
{
correct = true;
/* We are expecting values from 0 to size-1 and initial value from
* atomic variable */
/* These values must be distributed across refValues array and atomic
* variable finalVaue[0] */
/* Any repeated value is treated as an error */
std::vector<bool> tidFound(threadCount);
bool startValueFound = false;
cl_uint i;
for (i = 0; i <= threadCount; i++)
{
cl_uint value;
if (i == threadCount)
value = (cl_uint)finalValues[0]; // additional value from atomic
// variable (last written)
else
value = (cl_uint)refValues[i];
if (value == (cl_uint)StartValue())
{
// Special initial value
if (startValueFound)
{
log_error("ERROR: Starting reference value (%u) occurred "
"more thane once\n",
(cl_uint)StartValue());
correct = false;
return true;
}
startValueFound = true;
continue;
}
if (value >= threadCount)
{
if (value == threadCount)
log_error("ERROR: Spurious failure detected for "
"atomic_compare_exchange_strong\n");
log_error(
"ERROR: Reference value %u outside of valid range! (%u)\n",
i, value);
correct = false;
return true;
}
if (tidFound[value])
{
log_error("ERROR: Value (%u) occurred more thane once\n",
value);
correct = false;
return true;
}
tidFound[value] = true;
}
return true;
}
};
static int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements,
bool useSVM)
{
int error = 0;
CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(
TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32>
test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64>
test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_compare_exchange_strong)
{
return test_atomic_compare_exchange_strong_generic(device, context, queue,
num_elements, false);
}
REGISTER_TEST(svm_atomic_compare_exchange_strong)
{
return test_atomic_compare_exchange_strong_generic(device, context, queue,
num_elements, true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestCompareWeak
: public CBasicTestCompareStrong<HostAtomicType, HostDataType> {
public:
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
using CBasicTestCompareStrong<HostAtomicType,
HostDataType>::MemoryOrderScope;
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType,
useSVM)
{}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScope();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return std::string(" ") + DataType().RegularTypeName()
+ " expected , previous;\n"
" int successCount = 0;\n"
" oldValues[tid] = tid;\n"
" expected = tid; // force failure at the beginning\n"
" if(atomic_compare_exchange_weak"
+ postfix + "(&destMemory[0], &expected, oldValues[tid]"
+ memoryOrderScope
+ ") || expected == tid)\n"
" oldValues[tid] = threadCount+1; //mark unexpected success "
"with invalid value\n"
" else\n"
" {\n"
" for(int i = 0; i < "
+ IterationsStr()
+ " || successCount == 0; i++)\n"
" {\n"
" previous = expected;\n"
" if(atomic_compare_exchange_weak"
+ postfix + "(&destMemory[0], &expected, oldValues[tid]"
+ memoryOrderScope
+ "))\n"
" {\n"
" oldValues[tid] = expected;\n"
" successCount++;\n"
" }\n"
" }\n"
" }\n";
}
};
static int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements,
bool useSVM)
{
int error = 0;
CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(
TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_compare_exchange_weak)
{
return test_atomic_compare_exchange_weak_generic(device, context, queue,
num_elements, false);
}
REGISTER_TEST(svm_atomic_compare_exchange_weak)
{
return test_atomic_compare_exchange_weak_generic(device, context, queue,
num_elements, true);
}
template <typename T> double kahan_sum(const std::vector<T> &nums)
{
return 0.0;
}
template <> double kahan_sum<double>(const std::vector<double> &nums)
{
double sum = 0.0;
double compensation = 0.0;
for (double num : nums)
{
double y = num - compensation;
double t = sum + y;
compensation = (t - sum) - y;
sum = t;
}
return sum;
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchAdd
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range;
double max_range;
double max_error;
std::vector<HostDataType> ref_vals;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM),
min_range(-999.0), max_range(999.0), max_error(0.0)
{
if constexpr (is_host_fp_v<HostDataType>)
{
StartValue((HostDataType)0.0);
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
// Narrow down range for half to avoid overflow to infinity
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
min_range = -50.0;
max_range = 50.0;
}
}
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
ref_vals[i] =
get_random_double(min_range, max_range, d);
else
ref_vals[i] = get_random_float(min_range, max_range, d);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
// Estimate highest possible summation error for given set.
std::vector<HostDataType> sums;
std::sort(ref_vals.begin(), ref_vals.end());
sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
static_cast<HostDataType>(0.f)));
sums.push_back(std::accumulate(ref_vals.rbegin(),
ref_vals.rend(),
static_cast<HostDataType>(0.f)));
std::sort(ref_vals.begin(), ref_vals.end(),
[](HostDataType a, HostDataType b) {
return std::abs(a) < std::abs(b);
});
double precise = 0.0;
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
precise = kahan_sum(ref_vals);
else
for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise);
sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
static_cast<HostDataType>(0.f)));
sums.push_back(std::accumulate(ref_vals.rbegin(),
ref_vals.rend(),
static_cast<HostDataType>(0.f)));
std::sort(sums.begin(), sums.end());
assert(std::all_of(sums.begin(), sums.end(),
[](const HostDataType &val) {
return std::isfinite(
static_cast<double>(val));
})
&& "Infinite summation value detected!");
max_error = std::abs(sums.front() - sums.back());
log_info("Max allowed error for %u elements: %.10f\n",
threadCount, max_error);
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size());
}
else
{
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
}
return true;
}
return false;
}
std::string ProgramCore() override
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (is_host_fp_v<HostDataType>)
{
return " atomic_fetch_add" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
+ memoryOrderScope + ");\n"
+ " oldValues[tid] = atomic_fetch_add" + postfix
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
+ ")0" + memoryOrderScope + ");\n";
}
else
{
return " oldValues[tid] = atomic_fetch_add" + postfix
+ "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
+ ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add"
+ postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")tid + 3"
+ memoryOrderScope
+ ");\n"
" atomic_fetch_add"
+ postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")tid + 3"
+ memoryOrderScope
+ ");\n"
" atomic_fetch_add"
+ postfix + "(&destMemory[0], (("
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
+ DataType().AddSubOperandTypeName() + ")-1)*8"
+ memoryOrderScope + ");\n";
}
}
void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder());
oldValues[tid] = host_atomic_fetch_add(
&destMemory[tid], (HostDataType)0, MemoryOrder());
}
else
{
oldValues[tid] = host_atomic_fetch_add(
&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
MemoryOrder());
host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
MemoryOrder());
host_atomic_fetch_add(
&destMemory[0],
(((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
MemoryOrder());
}
}
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (is_host_fp_v<HostDataType>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
expected += startRefValues[i];
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
expected += ((HostDataType)i + 3) * 3
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
}
return true;
}
bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
const std::vector<HostDataType> &startRefValues,
cl_uint whichDestValue) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
if (whichDestValue == 0)
return std::abs(
static_cast<double>(expected
- static_cast<HostDataType>(
testValues[whichDestValue])))
> max_error;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
testValues,
startRefValues,
whichDestValue);
}
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
{
if (refValues[i] != StartValue())
{
log_error("Thread %d found %d mismatch(es)\n", i,
(cl_uint)refValues[i]);
correct = false;
}
}
return !correct;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::VerifyRefs(correct,
threadCount,
refValues,
finalValues);
}
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
return threadCount;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::NumResults(threadCount,
deviceID);
}
};
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchAddSpecialFloats
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
std::vector<HostDataType> ref_vals;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::DeclaredInProgram;
CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
// StartValue is used as an index divisor in the following test
// logic. It is set to the number of special values, which allows
// threads to be mapped deterministically onto the input data array.
// This enables repeated add operations arranged so that every
// special value is added to every other one (“all-to-all”).
if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
auto spec_vals = GetSpecialValues();
StartValue(spec_vals.size());
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
auto spec_vals = GetSpecialValues();
StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
}
static std::vector<HostDataType> &GetSpecialValues()
{
static std::vector<HostDataType> special_values;
if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
const HostDataType test_value_zero =
static_cast<HostDataType>(0.0f);
const HostDataType test_value_minus_zero =
static_cast<HostDataType>(-0.0f);
const HostDataType test_value_without_fraction =
static_cast<HostDataType>(2.0f);
const HostDataType test_value_with_fraction =
static_cast<HostDataType>(2.2f);
if (special_values.empty())
{
special_values = {
static_cast<HostDataType>(test_value_minus_zero),
static_cast<HostDataType>(test_value_zero),
static_cast<HostDataType>(test_value_without_fraction),
static_cast<HostDataType>(test_value_with_fraction),
std::numeric_limits<HostDataType>::infinity(),
std::numeric_limits<HostDataType>::quiet_NaN(),
std::numeric_limits<HostDataType>::signaling_NaN(),
-std::numeric_limits<HostDataType>::infinity(),
-std::numeric_limits<HostDataType>::quiet_NaN(),
-std::numeric_limits<HostDataType>::signaling_NaN(),
std::numeric_limits<HostDataType>::lowest(),
std::numeric_limits<HostDataType>::min(),
std::numeric_limits<HostDataType>::max(),
};
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (0 != (CL_FP_DENORM & gDoubleFPConfig))
{
special_values.push_back(
std::numeric_limits<HostDataType>::denorm_min());
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (0 != (CL_FP_DENORM & gFloatFPConfig))
{
special_values.push_back(
std::numeric_limits<HostDataType>::denorm_min());
}
}
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (special_values.empty())
{
special_values = {
0xffff, 0x0000, 0x7c00, /*INFINITY*/
0xfc00, /*-INFINITY*/
0x8000, /*-0*/
0x7bff, /*HALF_MAX*/
0x0400, /*HALF_MIN*/
0x3c00, /* 1 */
0xbc00, /* -1 */
0x3555, /*nearest value to 1/3*/
0x3bff, /*largest number less than one*/
0xc000, /* -2 */
0xfbff, /* -HALF_MAX */
0x8400, /* -HALF_MIN */
0x4248, /* M_PI_H */
0xc248, /* -M_PI_H */
0xbbff, /* Largest negative fraction */
};
if (0 != (CL_FP_DENORM & gHalfFPConfig))
{
special_values.push_back(0x0001 /* Smallest denormal */);
special_values.push_back(0x03ff /* Largest denormal */);
}
}
}
return special_values;
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (threadCount > ref_vals.size())
{
ref_vals.assign(threadCount, 0);
auto spec_vals = GetSpecialValues();
cl_uint total_cnt = 0;
while (total_cnt < threadCount)
{
cl_uint block_cnt =
std::min((cl_int)(threadCount - total_cnt),
(cl_int)spec_vals.size());
memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
sizeof(HostDataType) * block_cnt);
total_cnt += block_cnt;
}
}
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
return true;
}
return false;
}
std::string ProgramCore() override
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
// The start_value variable (set by StartValue) is used
// as a divisor of the thread index when selecting the operand for
// atomic_fetch_add. This groups threads into blocks corresponding
// to the number of special values and implements an “all-to-all”
// addition pattern. As a result, each destination element is
// updated using different combinations of input values, enabling
// consistent comparison between host and device execution.
return std::string(DataType().AddSubOperandTypeName())
+ " start_value = atomic_load_explicit(destMemory+tid, "
"memory_order_relaxed, memory_scope_work_group);\n"
" atomic_store_explicit(destMemory+tid, oldValues[tid], "
"memory_order_relaxed, memory_scope_work_group);\n"
" atomic_fetch_add"
+ postfix + "(&destMemory[tid], ("
+ DataType().AddSubOperandTypeName()
+ ")oldValues[tid/(int)start_value]" + memoryOrderScope
+ ");\n";
}
}
void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
auto spec_vals = GetSpecialValues();
host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
MEMORY_ORDER_SEQ_CST);
host_atomic_fetch_add(
&destMemory[tid],
(HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder());
}
}
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (
std::is_same_v<
HostDataType,
HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
auto spec_vals = GetSpecialValues();
expected = startRefValues[whichDestValue]
+ startRefValues[whichDestValue / spec_vals.size()];
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
auto spec_vals = GetSpecialValues();
expected = cl_half_from_float(
cl_half_to_float(startRefValues[whichDestValue])
+ cl_half_to_float(
startRefValues[whichDestValue / spec_vals.size()]),
gHalfRoundingMode);
}
return true;
}
bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
const std::vector<HostDataType> &startRefValues,
cl_uint whichDestValue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
return static_cast<cl_half>(expected) != testValues[whichDestValue];
}
else
{
if (std::isnan(testValues[whichDestValue]) && std::isnan(expected))
return false;
else
return expected != testValues[whichDestValue];
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
testValues,
startRefValues,
whichDestValue);
}
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
== 0)
return 0;
if (!CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::LocalMemory()
&& CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::DeclaredInProgram())
{
if ((gDoubleFPConfig & CL_FP_INF_NAN) == 0) return 0;
}
}
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
if (!CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::LocalMemory()
&& CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::DeclaredInProgram())
{
if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
}
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (DeclaredInProgram()) return 0; // skip test - not applicable
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
if (!CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::LocalMemory()
&& CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::DeclaredInProgram())
{
if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0;
}
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
{
return threadCount;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::NumResults(threadCount,
deviceID);
}
};
static int test_atomic_fetch_add_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_DOUBLE, HOST_DOUBLE>
test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error,
test_spec_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error,
test_spec_float.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF, HOST_HALF>
test_spec_half(TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(
error,
test_spec_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_add)
{
return test_atomic_fetch_add_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_add)
{
return test_atomic_fetch_add_generic(device, context, queue, num_elements,
true);
}
template <typename T> double kahan_sub(const std::vector<T> &nums)
{
return 0.0;
}
template <> double kahan_sub<double>(const std::vector<double> &nums)
{
double sum = 0.0;
double compensation = 0.0;
for (double num : nums)
{
double y = -num - compensation;
double t = sum + y;
compensation = (t - sum) - y;
sum = t;
}
return sum;
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchSub
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range;
double max_range;
double max_error;
std::vector<HostDataType> ref_vals;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM),
min_range(-999.0), max_range(999.0), max_error(0.0)
{
if constexpr (is_host_fp_v<HostDataType>)
{
StartValue(0);
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
// Narrow down range for half to avoid overflow to infinity
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
min_range = -50.0;
max_range = 50.0;
}
}
}
template <typename Iterator>
HostDataType subtract(Iterator begin, Iterator end)
{
HostDataType res = 0;
for (auto it = begin; it != end; ++it) res = res - *it;
return res;
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
if (threadCount > ref_vals.size())
{
ref_vals.resize(threadCount);
for (cl_uint i = 0; i < threadCount; i++)
ref_vals[i] = (HostDataType)get_random_double(min_range,
max_range, d);
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * ref_vals.size());
// Estimate highest possible subtraction error for given set.
std::vector<HostDataType> sums;
std::sort(ref_vals.begin(), ref_vals.end());
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(ref_vals.begin(), ref_vals.end(),
[](HostDataType a, HostDataType b) {
return std::abs(a) < std::abs(b);
});
double precise = 0.0;
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
precise = kahan_sub(ref_vals);
else
for (auto elem : ref_vals) precise += double(elem);
sums.push_back(precise);
sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
std::sort(sums.begin(), sums.end());
assert(std::all_of(sums.begin(), sums.end(),
[](const HostDataType &val) {
return std::isfinite(
static_cast<double>(val));
})
&& "Infinite subtraction value detected!");
max_error = std::abs(sums.front() - sums.back());
log_info("Max allowed error for %u elements: %.10f\n",
threadCount, max_error);
// restore unsorted order
memcpy(ref_vals.data(), startRefValues,
sizeof(HostDataType) * ref_vals.size());
}
else
{
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
}
return true;
}
return false;
}
std::string ProgramCore() override
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (is_host_fp_v<HostDataType>)
{
return " atomic_fetch_sub" + postfix + "(&destMemory[0], ("
+ DataType().AddSubOperandTypeName() + ")oldValues[tid]"
+ memoryOrderScope + ");\n"
+ " oldValues[tid] = atomic_fetch_sub" + postfix
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
+ ")0" + memoryOrderScope + ");\n";
}
else
{
return " oldValues[tid] = atomic_fetch_sub" + postfix
+ "(&destMemory[0], tid + 3 +((("
+ DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
+ DataType().AddSubOperandTypeName() + ")-1)*8)"
+ memoryOrderScope + ");\n";
}
}
void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
MemoryOrder());
oldValues[tid] = host_atomic_fetch_sub(
&destMemory[tid], (HostDataType)0, MemoryOrder());
}
else
{
oldValues[tid] =
host_atomic_fetch_sub(&destMemory[0],
(HostDataType)tid + 3
+ (((HostDataType)tid + 3)
<< (sizeof(HostDataType) - 1) * 8),
MemoryOrder());
}
}
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (is_host_fp_v<HostDataType>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
expected -= startRefValues[i];
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
expected -= (HostDataType)i + 3
+ (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
}
return true;
}
bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
const std::vector<HostDataType> &startRefValues,
cl_uint whichDestValue) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
if (whichDestValue == 0)
return std::abs(
static_cast<double>(expected
- static_cast<HostDataType>(
testValues[whichDestValue])))
> max_error;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
testValues,
startRefValues,
whichDestValue);
}
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if (std::is_same_v<HostDataType, HOST_FLOAT>)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
{
if (refValues[i] != StartValue())
{
log_error("Thread %d found %d mismatch(es)\n", i,
(cl_uint)refValues[i]);
correct = false;
}
}
return !correct;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::VerifyRefs(correct,
threadCount,
refValues,
finalValues);
}
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
return threadCount;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::NumResults(threadCount,
deviceID);
}
};
static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestFetchSub<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_sub)
{
return test_atomic_fetch_sub_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_sub)
{
return test_atomic_fetch_sub_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchOr
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue(0);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
cl_uint numBits = DataType().Size(deviceID) * 8;
return (threadCount + numBits - 1) / numBits;
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return std::string(" size_t numBits = sizeof(")
+ DataType().RegularTypeName()
+ ") * 8;\n"
" int whichResult = tid / numBits;\n"
" int bitIndex = tid - (whichResult * numBits);\n"
"\n"
" oldValues[tid] = atomic_fetch_or"
+ postfix + "(&destMemory[whichResult], (("
+ DataType().RegularTypeName() + ")1 << bitIndex) "
+ memoryOrderScope + ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
size_t numBits = sizeof(HostDataType) * 8;
size_t whichResult = tid / numBits;
size_t bitIndex = tid - (whichResult * numBits);
oldValues[tid] =
host_atomic_fetch_or(&destMemory[whichResult],
((HostDataType)1 << bitIndex), MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
/ (sizeof(HostDataType) * 8);
if (whichDestValue < numValues - 1)
{
expected = ~(HostDataType)0;
return true;
}
// Last item doesn't get or'ed on every bit, so we have to mask away
cl_uint numBits =
threadCount - whichDestValue * (sizeof(HostDataType) * 8);
expected = StartValue();
for (cl_uint i = 0; i < numBits; i++)
expected |= ((HostDataType)1 << i);
return true;
}
};
static int test_atomic_fetch_or_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_or)
{
return test_atomic_fetch_or_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_or)
{
return test_atomic_fetch_or_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchXor
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue((HostDataType)0x2f08ab418ba0541LL);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return std::string(" int numBits = sizeof(")
+ DataType().RegularTypeName()
+ ") * 8;\n"
" int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
"\n"
" oldValues[tid] = atomic_fetch_xor"
+ postfix + "(&destMemory[0], ((" + DataType().RegularTypeName()
+ ")1 << bitIndex) " + memoryOrderScope + ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
int numBits = sizeof(HostDataType) * 8;
int bitIndex = (numBits - 1) * (tid + 1) / threadCount;
oldValues[tid] = host_atomic_fetch_xor(
&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
int numBits = sizeof(HostDataType) * 8;
expected = StartValue();
for (cl_uint i = 0; i < threadCount; i++)
{
int bitIndex = (numBits - 1) * (i + 1) / threadCount;
expected ^= ((HostDataType)1 << bitIndex);
}
return true;
}
};
static int test_atomic_fetch_xor_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_xor)
{
return test_atomic_fetch_xor_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_xor)
{
return test_atomic_fetch_xor_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchAnd
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue(~(HostDataType)0);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
cl_uint numBits = DataType().Size(deviceID) * 8;
return (threadCount + numBits - 1) / numBits;
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return std::string(" size_t numBits = sizeof(")
+ DataType().RegularTypeName()
+ ") * 8;\n"
" int whichResult = tid / numBits;\n"
" int bitIndex = tid - (whichResult * numBits);\n"
"\n"
" oldValues[tid] = atomic_fetch_and"
+ postfix + "(&destMemory[whichResult], ~(("
+ DataType().RegularTypeName() + ")1 << bitIndex) "
+ memoryOrderScope + ");\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
size_t numBits = sizeof(HostDataType) * 8;
size_t whichResult = tid / numBits;
size_t bitIndex = tid - (whichResult * numBits);
oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult],
~((HostDataType)1 << bitIndex),
MemoryOrder());
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
/ (sizeof(HostDataType) * 8);
if (whichDestValue < numValues - 1)
{
expected = 0;
return true;
}
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits =
threadCount - whichDestValue * (sizeof(HostDataType) * 8);
expected = StartValue();
for (size_t i = 0; i < numBits; i++)
expected &= ~((HostDataType)1 << i);
return true;
}
};
static int test_atomic_fetch_and_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_and)
{
return test_atomic_fetch_and_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_and)
{
return test_atomic_fetch_and_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchOrAnd
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue(0);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
}
// each thread modifies (with OR and AND operations) and verifies
// only one bit in atomic variable
// other bits are modified by other threads but it must not affect current
// thread operation
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return std::string(" int bits = sizeof(")
+ DataType().RegularTypeName() + ")*8;\n"
+ " size_t valueInd = tid/bits;\n"
" "
+ DataType().RegularTypeName() + " value, bitMask = ("
+ DataType().RegularTypeName()
+ ")1 << tid%bits;\n"
" oldValues[tid] = 0;\n"
" for(int i = 0; i < "
+ IterationsStr()
+ "; i++)\n"
" {\n"
" value = atomic_fetch_or"
+ postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+ ");\n"
" if(value & bitMask) // bit should be set to 0\n"
" oldValues[tid]++;\n"
" value = atomic_fetch_and"
+ postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope
+ ");\n"
" if(!(value & bitMask)) // bit should be set to 1\n"
" oldValues[tid]++;\n"
" }\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
int bits = sizeof(HostDataType) * 8;
size_t valueInd = tid / bits;
HostDataType value, bitMask = (HostDataType)1 << tid % bits;
oldValues[tid] = 0;
for (int i = 0; i < Iterations(); i++)
{
value = host_atomic_fetch_or(destMemory + valueInd, bitMask,
MemoryOrder());
if (value & bitMask) // bit should be set to 0
oldValues[tid]++;
value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask,
MemoryOrder());
if (!(value & bitMask)) // bit should be set to 1
oldValues[tid]++;
}
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
expected = 0;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
HostDataType *refValues,
HostAtomicType *finalValues)
{
correct = true;
for (cl_uint i = 0; i < threadCount; i++)
{
if (refValues[i] > 0)
{
log_error("Thread %d found %d mismatch(es)\n", i,
(cl_uint)refValues[i]);
correct = false;
}
}
return true;
}
};
static int test_atomic_fetch_orand_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
TYPE_ATOMIC_UINT, useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(
TYPE_ATOMIC_LONG, useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_orand)
{
return test_atomic_fetch_orand_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_orand)
{
return test_atomic_fetch_orand_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchXor2
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue(0);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
}
// each thread modifies (with XOR operation) and verifies
// only one bit in atomic variable
// other bits are modified by other threads but it must not affect current
// thread operation
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return std::string(" int bits = sizeof(")
+ DataType().RegularTypeName() + ")*8;\n"
+ " size_t valueInd = tid/bits;\n"
" "
+ DataType().RegularTypeName() + " value, bitMask = ("
+ DataType().RegularTypeName()
+ ")1 << tid%bits;\n"
" oldValues[tid] = 0;\n"
" for(int i = 0; i < "
+ IterationsStr()
+ "; i++)\n"
" {\n"
" value = atomic_fetch_xor"
+ postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+ ");\n"
" if(value & bitMask) // bit should be set to 0\n"
" oldValues[tid]++;\n"
" value = atomic_fetch_xor"
+ postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+ ");\n"
" if(!(value & bitMask)) // bit should be set to 1\n"
" oldValues[tid]++;\n"
" }\n";
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
int bits = sizeof(HostDataType) * 8;
size_t valueInd = tid / bits;
HostDataType value, bitMask = (HostDataType)1 << tid % bits;
oldValues[tid] = 0;
for (int i = 0; i < Iterations(); i++)
{
value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
MemoryOrder());
if (value & bitMask) // bit should be set to 0
oldValues[tid]++;
value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
MemoryOrder());
if (!(value & bitMask)) // bit should be set to 1
oldValues[tid]++;
}
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
expected = 0;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
HostDataType *refValues,
HostAtomicType *finalValues)
{
correct = true;
for (cl_uint i = 0; i < threadCount; i++)
{
if (refValues[i] > 0)
{
log_error("Thread %d found %d mismatches\n", i,
(cl_uint)refValues[i]);
correct = false;
}
}
return true;
}
};
static int test_atomic_fetch_xor2_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_xor2)
{
return test_atomic_fetch_xor2_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_xor2)
{
return test_atomic_fetch_xor2_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchMin
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range;
double max_range;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM),
min_range(-999.0), max_range(999.0)
{
StartValue(DataType().MaxValue());
if constexpr (is_host_fp_v<HostDataType>)
{
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
}
std::string ProgramCore() override
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (is_host_fp_v<HostDataType>)
{
return " atomic_fetch_min" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
+ " oldValues[tid] = atomic_fetch_min" + postfix
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
+ ")0" + memoryOrderScope + ");\n";
}
else
{
return " oldValues[tid] = atomic_fetch_min" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope
+ ");\n";
}
}
void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
host_atomic_fetch_min(&destMemory[0], oldValues[tid],
MemoryOrder());
oldValues[tid] = host_atomic_fetch_min(
&destMemory[tid], (HostDataType)0, MemoryOrder());
}
else
{
oldValues[tid] = host_atomic_fetch_min(
&destMemory[0], oldValues[tid], MemoryOrder());
}
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = cl_half_from_float(
get_random_float(min_range, max_range, d),
gHalfRoundingMode);
}
}
else if constexpr (
std::is_same_v<
HostDataType,
HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = get_random_float(min_range, max_range, d);
}
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = genrand_int32(d);
if (sizeof(HostDataType) >= 8)
{
cl_ulong v = startRefValues[i];
v |= (cl_ulong)genrand_int32(d) << 16;
startRefValues[i] = v;
}
}
}
return true;
}
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (is_host_fp_v<HostDataType>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
if (startRefValues[i] < expected)
expected = startRefValues[i];
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
{
if (startRefValues[i] < expected) expected = startRefValues[i];
}
}
return true;
}
bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
const std::vector<HostDataType> &startRefValues,
cl_uint whichDestValue) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
if (whichDestValue == 0)
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
IsTestNotAsExpected(expected, testValues, startRefValues,
whichDestValue);
return false; // ignore all but 0 which stores final result
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
testValues,
startRefValues,
whichDestValue);
}
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
{
if (refValues[i] != StartValue())
{
log_error(
"Thread %d found %lf mismatch(es), start value=%lf\n",
i, (double)refValues[i], (double)StartValue());
correct = false;
}
}
return !correct;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::VerifyRefs(correct,
threadCount,
refValues,
finalValues);
}
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
return threadCount;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::NumResults(threadCount,
deviceID);
}
};
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchMinSpecialFloats
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
std::vector<HostDataType> ref_vals;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFetchMinSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
// StartValue is used as an index divisor in the following test
// logic. It is set to the number of special values, which allows
// threads to be mapped deterministically onto the input data array.
// This enables repeated add operations arranged so that every
// special value is added to every other one (“all-to-all”).
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
auto spec_vals = GetSpecialValues();
StartValue(spec_vals.size());
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
}
static std::vector<HostDataType> &GetSpecialValues()
{
static std::vector<HostDataType> special_values;
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
const HostDataType test_value_zero =
static_cast<HostDataType>(0.0f);
const HostDataType test_value_minus_zero =
static_cast<HostDataType>(-0.0f);
const HostDataType test_value_without_fraction =
static_cast<HostDataType>(2.0f);
const HostDataType test_value_with_fraction =
static_cast<HostDataType>(2.2f);
if (special_values.empty())
{
special_values = {
static_cast<HostDataType>(test_value_minus_zero),
static_cast<HostDataType>(test_value_zero),
static_cast<HostDataType>(test_value_without_fraction),
static_cast<HostDataType>(test_value_with_fraction),
std::numeric_limits<HostDataType>::infinity(),
std::numeric_limits<HostDataType>::quiet_NaN(),
std::numeric_limits<HostDataType>::signaling_NaN(),
-std::numeric_limits<HostDataType>::infinity(),
-std::numeric_limits<HostDataType>::quiet_NaN(),
-std::numeric_limits<HostDataType>::signaling_NaN(),
std::numeric_limits<HostDataType>::lowest(),
std::numeric_limits<HostDataType>::min(),
std::numeric_limits<HostDataType>::max(),
};
if (0 != (CL_FP_DENORM & gFloatFPConfig))
{
special_values.push_back(
std::numeric_limits<HostDataType>::denorm_min());
}
}
}
return special_values;
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (threadCount > ref_vals.size())
{
ref_vals.assign(threadCount, 0);
auto spec_vals = GetSpecialValues();
cl_uint total_cnt = 0;
while (total_cnt < threadCount)
{
cl_uint block_cnt =
std::min((cl_int)(threadCount - total_cnt),
(cl_int)spec_vals.size());
memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
sizeof(HostDataType) * block_cnt);
total_cnt += block_cnt;
}
}
memcpy(startRefValues, ref_vals.data(),
sizeof(HostDataType) * threadCount);
return true;
}
return false;
}
std::string ProgramCore() override
{
// The start_value variable (set by StartValue) is used
// as a divisor of the thread index when selecting the operand for
// atomic_fetch_add. This groups threads into blocks corresponding
// to the number of special values and implements an “all-to-all”
// addition pattern. As a result, each destination element is
// updated using different combinations of input values, enabling
// consistent comparison between host and device execution.
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
return std::string(DataType().AddSubOperandTypeName())
+ " start_value = atomic_load_explicit(destMemory+tid, "
"memory_order_relaxed, memory_scope_work_group);\n"
" atomic_store_explicit(destMemory+tid, oldValues[tid], "
"memory_order_relaxed, memory_scope_work_group);\n"
" atomic_fetch_min"
+ postfix + "(&destMemory[tid], ("
+ DataType().AddSubOperandTypeName()
+ ")oldValues[tid/(int)start_value]" + memoryOrderScope + ");\n";
}
void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
auto spec_vals = GetSpecialValues();
host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
MEMORY_ORDER_SEQ_CST);
host_atomic_fetch_min(&destMemory[tid],
(HostDataType)oldValues[tid / spec_vals.size()],
MemoryOrder());
}
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
auto spec_vals = GetSpecialValues();
expected =
std::min(startRefValues[whichDestValue],
startRefValues[whichDestValue / spec_vals.size()]);
}
return true;
}
bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
const std::vector<HostDataType> &startRefValues,
cl_uint whichDestValue) override
{
if (testValues[whichDestValue] != expected)
{
auto spec_vals = GetSpecialValues();
// special cases
// min(-0, +0) = min(+0, -0) = +0 or -0,
if (((startRefValues[whichDestValue] == -0.f)
&& (startRefValues[whichDestValue / spec_vals.size()] == 0.f))
|| ((startRefValues[whichDestValue] == 0.f)
&& (startRefValues[whichDestValue / spec_vals.size()]
== -0.f)))
return false;
else if (is_qnan(startRefValues[whichDestValue / spec_vals.size()])
|| is_qnan(startRefValues[whichDestValue]))
{
// min(x, qNaN) = min(qNaN, x) = x,
// min(qNaN, qNaN) = qNaN,
if (is_qnan(startRefValues[whichDestValue / spec_vals.size()])
&& is_qnan(startRefValues[whichDestValue]))
return !is_qnan(testValues[whichDestValue]);
else if (is_qnan(
startRefValues[whichDestValue / spec_vals.size()]))
return !std::isnan(testValues[whichDestValue])
&& testValues[whichDestValue]
!= startRefValues[whichDestValue]; // NaN != NaN always
// true
else
return !std::isnan(testValues[whichDestValue])
&& testValues[whichDestValue]
!= startRefValues[whichDestValue / spec_vals.size()];
}
else if (is_snan(startRefValues[whichDestValue / spec_vals.size()])
|| is_snan(startRefValues[whichDestValue]))
{
// min(x, sNaN) = min(sNaN, x) = NaN or x, and
// min(NaN, sNaN) = min(sNaN, NaN) = NaN
if (std::isnan(testValues[whichDestValue])
|| testValues[whichDestValue]
== startRefValues[whichDestValue]
|| testValues[whichDestValue]
== startRefValues[whichDestValue / spec_vals.size()])
return false;
}
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
testValues,
startRefValues,
whichDestValue);
}
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
if (!CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::LocalMemory()
&& CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::DeclaredInProgram())
{
if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
}
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
return threadCount;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::NumResults(threadCount,
deviceID);
}
};
static int test_atomic_fetch_min_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestFetchMinSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error,
test_spec_float.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_min)
{
return test_atomic_fetch_min_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_min)
{
return test_atomic_fetch_min_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFetchMax
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
double min_range;
double max_range;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM),
min_range(-999.0), max_range(999.0)
{
StartValue(DataType().MinValue());
if constexpr (is_host_fp_v<HostDataType>)
{
CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::OldValueCheck(false);
}
}
std::string ProgramCore() override
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
if constexpr (
std::is_same_v<
HostDataType,
HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
{
return " atomic_fetch_max" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
+ " oldValues[tid] = atomic_fetch_max" + postfix
+ "(&destMemory[tid], (" + DataType().AddSubOperandTypeName()
+ ")0" + memoryOrderScope + ");\n";
}
else
{
return " oldValues[tid] = atomic_fetch_max" + postfix
+ "(&destMemory[0], oldValues[tid] " + memoryOrderScope
+ ");\n";
}
}
void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
host_atomic_fetch_max(&destMemory[0], oldValues[tid],
MemoryOrder());
oldValues[tid] = host_atomic_fetch_max(
&destMemory[tid], (HostDataType)0, MemoryOrder());
}
else
{
oldValues[tid] = host_atomic_fetch_max(
&destMemory[0], oldValues[tid], MemoryOrder());
}
}
bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = static_cast<HostDataType>(
get_random_float(min_range, max_range, d));
}
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
{
startRefValues[i] = genrand_int32(d);
if (sizeof(HostDataType) >= 8)
{
cl_ulong v = startRefValues[i];
v |= (cl_ulong)genrand_int32(d) << 16;
startRefValues[i] = v;
}
}
}
return true;
}
bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue) override
{
expected = StartValue();
if constexpr (is_host_fp_v<HostDataType>)
{
if (whichDestValue == 0)
for (cl_uint i = 0; i < threadCount; i++)
if (startRefValues[i] > expected)
expected = startRefValues[i];
}
else
{
for (cl_uint i = 0; i < threadCount; i++)
{
if (startRefValues[i] > expected) expected = startRefValues[i];
}
}
return true;
}
bool IsTestNotAsExpected(const HostDataType &expected,
const std::vector<HostAtomicType> &testValues,
const std::vector<HostDataType> &startRefValues,
cl_uint whichDestValue) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
if (whichDestValue == 0)
return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
IsTestNotAsExpected(expected, testValues, startRefValues,
whichDestValue);
return false; // ignore all but 0 which stores final result
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
testValues,
startRefValues,
whichDestValue);
}
bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
HostAtomicType *finalValues) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
correct = true;
for (cl_uint i = 1; i < threadCount; i++)
{
if (refValues[i] != StartValue())
{
log_error(
"Thread %d found %lf mismatch(es), start value=%lf\n",
i, (double)refValues[i], (double)StartValue());
correct = false;
}
}
return !correct;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::VerifyRefs(correct,
threadCount,
refValues,
finalValues);
}
int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue) override
{
if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
{
if (LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
{
if (LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
{
if (LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0; // skip test - not applicable
if (!LocalMemory()
&& (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
== 0)
return 0;
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
{
if constexpr (is_host_fp_v<HostDataType>)
{
return threadCount;
}
return CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::NumResults(threadCount,
deviceID);
}
};
static int test_atomic_fetch_max_generic(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements, bool useSVM)
{
int error = 0;
CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
TYPE_ATOMIC_ULONG, useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (gFloatAtomicsSupported)
{
CBasicTestFetchMax<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
TYPE_ATOMIC_DOUBLE, useSVM);
EXECUTE_TEST(
error, test_double.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_HALF, HOST_HALF> test_half(
TYPE_ATOMIC_HALF, useSVM);
EXECUTE_TEST(error,
test_half.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
TYPE_ATOMIC_FLOAT, useSVM);
EXECUTE_TEST(
error, test_float.Execute(deviceID, context, queue, num_elements));
}
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fetch_max)
{
return test_atomic_fetch_max_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fetch_max)
{
return test_atomic_fetch_max_generic(device, context, queue, num_elements,
true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFlag
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue(0);
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
TExplicitMemoryOrderType MemoryOrderForClear()
{
// Memory ordering for atomic_flag_clear function
// ("shall not be memory_order_acquire nor memory_order_acq_rel")
if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED;
if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE;
return MemoryOrder();
}
std::string MemoryOrderScopeStrForClear()
{
std::string orderStr;
if (MemoryOrder() != MEMORY_ORDER_EMPTY)
orderStr = std::string(", ")
+ get_memory_order_type_name(MemoryOrderForClear());
return orderStr + MemoryScopeStr();
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue)
{
// This test assumes support for the memory_scope_device scope in the
// case that LocalMemory() == false. Therefore we should skip this test
// in that configuration on a 3.0 driver since supporting the
// memory_scope_device scope is optionaly.
if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
{
if (!LocalMemory()
&& !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
{
log_info("Skipping atomic_flag test due to use of "
"atomic_scope_device "
"which is optionally not supported on this device\n");
return 0; // skip test - not applicable
}
}
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
virtual std::string ProgramCore()
{
std::string memoryOrderScope = MemoryOrderScopeStr();
std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
std::string program =
" uint cnt, stop = 0;\n"
" for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread "
"must find critical section where it is the first visitor\n"
" {\n"
" bool set = atomic_flag_test_and_set"
+ postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
if (MemoryOrder() == MEMORY_ORDER_RELAXED
|| MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory())
program += " atomic_work_item_fence("
+ std::string(
LocalMemory()
? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
: "CLK_GLOBAL_MEM_FENCE, ")
+ "memory_order_acquire,"
+ std::string(LocalMemory()
? "memory_scope_work_group"
: (UseSVM() ? "memory_scope_all_svm_devices"
: "memory_scope_device"))
+ ");\n";
program += " if (!set)\n"
" {\n";
if (LocalMemory())
program += " uint csIndex = "
"get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
else
program += " uint csIndex = cnt;\n";
std::ostringstream csNotVisited;
csNotVisited << CRITICAL_SECTION_NOT_VISITED;
program += " // verify that thread is the first visitor\n"
" if(oldValues[csIndex] == "
+ csNotVisited.str()
+ ")\n"
" {\n"
" oldValues[csIndex] = tid; // set the winner id for this "
"critical section\n"
" stop = 1;\n"
" }\n";
if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
|| MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory())
program += " atomic_work_item_fence("
+ std::string(
LocalMemory()
? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
: "CLK_GLOBAL_MEM_FENCE, ")
+ "memory_order_release,"
+ std::string(LocalMemory()
? "memory_scope_work_group"
: (UseSVM() ? "memory_scope_all_svm_devices"
: "memory_scope_device"))
+ ");\n";
program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]"
+ MemoryOrderScopeStrForClear()
+ ");\n"
" }\n"
" }\n";
return program;
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
cl_uint cnt, stop = 0;
for (cnt = 0; !stop && cnt < threadCount;
cnt++) // each thread must find critical section where it is the
// first visitor\n"
{
if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
{
cl_uint csIndex = cnt;
// verify that thread is the first visitor\n"
if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
{
oldValues[csIndex] =
tid; // set the winner id for this critical section\n"
stop = 1;
}
host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
}
}
}
virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
HostDataType *startRefValues,
cl_uint whichDestValue)
{
expected = StartValue();
return true;
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d)
{
for (cl_uint i = 0; i < threadCount; i++)
startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
HostDataType *refValues,
HostAtomicType *finalValues)
{
correct = true;
/* We are expecting unique values from 0 to threadCount-1 (each critical
* section must be visited) */
/* These values must be distributed across refValues array */
std::vector<bool> tidFound(threadCount);
cl_uint i;
for (i = 0; i < threadCount; i++)
{
cl_uint value = (cl_uint)refValues[i];
if (value == CRITICAL_SECTION_NOT_VISITED)
{
// Special initial value
log_error("ERROR: Critical section %u not visited\n", i);
correct = false;
return true;
}
if (value >= threadCount)
{
log_error(
"ERROR: Reference value %u outside of valid range! (%u)\n",
i, value);
correct = false;
return true;
}
if (tidFound[value])
{
log_error("ERROR: Value (%u) occurred more thane once\n",
value);
correct = false;
return true;
}
tidFound[value] = true;
}
return true;
}
};
static int test_atomic_flag_generic(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
bool useSVM)
{
int error = 0;
CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG,
useSVM);
EXECUTE_TEST(error,
test_flag.Execute(deviceID, context, queue, num_elements));
return error;
}
REGISTER_TEST(atomic_flag)
{
return test_atomic_flag_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_flag)
{
return test_atomic_flag_generic(device, context, queue, num_elements, true);
}
template <typename HostAtomicType, typename HostDataType>
class CBasicTestFence
: public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
struct TestDefinition
{
bool op1IsFence;
TExplicitMemoryOrderType op1MemOrder;
bool op2IsFence;
TExplicitMemoryOrderType op2MemOrder;
};
public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::DeclaredInProgram;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
using CBasicTestMemOrderScope<HostAtomicType,
HostDataType>::CurrentGroupSize;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
CBasicTestFence(TExplicitAtomicType dataType, bool useSVM)
: CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
useSVM)
{
StartValue(0);
OldValueCheck(false);
}
virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
{
return threadCount;
}
virtual cl_uint NumNonAtomicVariablesPerThread()
{
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1;
if (LocalMemory())
{
if (gIsEmbedded)
{
if (CurrentGroupSize() > 512) CurrentGroupSize(512);
return 2; // 1KB of local memory required by spec. Clamp group
// size to 512 and allow 2 variables per thread
}
else
return 32 * 1024 / 8 / CurrentGroupSize()
- 1; // 32KB of local memory required by spec
}
return 256;
}
virtual std::string SingleTestName()
{
std::string testName;
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
testName += "seq_cst fence, ";
else
testName +=
std::string(get_memory_order_type_name(_subCase.op1MemOrder))
.substr(sizeof("memory_order"))
+ (_subCase.op1IsFence ? " fence" : " atomic")
+ " synchronizes-with "
+ std::string(get_memory_order_type_name(_subCase.op2MemOrder))
.substr(sizeof("memory_order"))
+ (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
testName += std::string(", ")
+ std::string(get_memory_scope_type_name(MemoryScope()))
.substr(sizeof("memory"));
return testName;
}
virtual bool SVMDataBufferAllSVMConsistent()
{
// Although memory_scope_all_devices doesn't mention SVM it is just an
// alias for memory_scope_all_svm_devices. So both scopes interact with
// SVM allocations, on devices that support those, just the same.
return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
|| MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
}
virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
cl_context context,
cl_command_queue queue)
{
int error = 0;
// execute 3 (maximum) sub cases for each memory order
for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
{
EXECUTE_TEST(
error,
(CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
ExecuteForEachParameterSet(deviceID, context, queue)));
}
return error;
}
virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
cl_command_queue queue)
{
if (DeclaredInProgram() || UsedInFunction())
return 0; // skip test - not applicable - no overloaded fence
// functions for different address spaces
if (MemoryOrder() == MEMORY_ORDER_EMPTY
|| MemoryScope()
== MEMORY_SCOPE_EMPTY) // empty 'scope' not required since
// opencl20-openclc-rev15
return 0; // skip test - not applicable
if ((UseSVM() || gHost) && LocalMemory())
return 0; // skip test - not applicable for SVM and local memory
struct TestDefinition acqTests[] = {
// {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
{ false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
{ true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
{ true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
};
struct TestDefinition relTests[] = {
{ true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
{ true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
};
struct TestDefinition arTests[] = {
{ false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
{ true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
{ true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
};
switch (MemoryOrder())
{
case MEMORY_ORDER_ACQUIRE:
if (_subCaseId
>= sizeof(acqTests) / sizeof(struct TestDefinition))
return 0;
_subCase = acqTests[_subCaseId];
break;
case MEMORY_ORDER_RELEASE:
if (_subCaseId
>= sizeof(relTests) / sizeof(struct TestDefinition))
return 0;
_subCase = relTests[_subCaseId];
break;
case MEMORY_ORDER_ACQ_REL:
if (_subCaseId
>= sizeof(arTests) / sizeof(struct TestDefinition))
return 0;
_subCase = arTests[_subCaseId];
break;
case MEMORY_ORDER_SEQ_CST:
if (_subCaseId != 0) // one special case only
return 0;
break;
default: return 0;
}
LocalRefValues(LocalMemory());
return CBasicTestMemOrderScope<
HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
queue);
}
virtual std::string ProgramHeader(cl_uint maxNumDestItems)
{
std::string header;
if (gOldAPI)
{
if (MemoryScope() == MEMORY_SCOPE_EMPTY)
{
header += "#define atomic_work_item_fence(x,y) "
" mem_fence(x)\n";
}
else
{
header += "#define atomic_work_item_fence(x,y,z) "
" mem_fence(x)\n";
}
}
return header
+ CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
ProgramHeader(maxNumDestItems);
}
virtual std::string ProgramCore()
{
std::ostringstream naValues;
naValues << NumNonAtomicVariablesPerThread();
std::string program, fenceType, nonAtomic;
if (LocalMemory())
{
program = " size_t myId = get_local_id(0), hisId = "
"get_local_size(0)-1-myId;\n";
fenceType = "CLK_LOCAL_MEM_FENCE";
nonAtomic = "localValues";
}
else
{
program = " size_t myId = tid, hisId = threadCount-1-tid;\n";
fenceType = "CLK_GLOBAL_MEM_FENCE";
nonAtomic = "oldValues";
}
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
{
// All threads are divided into pairs.
// Each thread has its own atomic variable and performs the
// following actions:
// - increments its own variable
// - performs fence operation to propagate its value and to see
// value from other thread
// - reads value from other thread's variable
// - repeats the above steps when both values are the same (and less
// than 500000)
// - stores the last value read from other thread (in additional
// variable) At the end of execution at least one thread should know
// the last value from other thread
program += std::string("") + " " + DataType().RegularTypeName()
+ " myValue = 0, hisValue; \n"
" do {\n"
" myValue++;\n"
" atomic_store_explicit(&destMemory[myId], myValue, "
"memory_order_relaxed"
+ MemoryScopeStr()
+ ");\n"
" atomic_work_item_fence("
+ fenceType + ", memory_order_seq_cst" + MemoryScopeStr()
+ "); \n"
" hisValue = atomic_load_explicit(&destMemory[hisId], "
"memory_order_relaxed"
+ MemoryScopeStr()
+ ");\n"
" } while(myValue == hisValue && myValue < 500000);\n"
" "
+ nonAtomic + "[myId] = hisValue; \n";
}
else
{
// Each thread modifies one of its non-atomic variables, increments
// value of its atomic variable and reads values from another thread
// in typical synchronizes-with scenario with:
// - non-atomic variable (at index A) modification (value change
// from 0 to A)
// - release operation (additional fence or within atomic) + atomic
// variable modification (value A)
// - atomic variable read (value B) + acquire operation (additional
// fence or within atomic)
// - non-atomic variable (at index B) read (value C)
// Each thread verifies dependency between atomic and non-atomic
// value read from another thread The following condition must be
// true: B == C
program += std::string("") + " " + DataType().RegularTypeName()
+ " myValue = 0, hisAtomicValue, hisValue; \n"
" do {\n"
" myValue++;\n"
" "
+ nonAtomic + "[myId*" + naValues.str()
+ "+myValue] = myValue;\n";
if (_subCase.op1IsFence)
program += std::string("") + " atomic_work_item_fence("
+ fenceType + ", "
+ get_memory_order_type_name(_subCase.op1MemOrder)
+ MemoryScopeStr()
+ "); \n"
" atomic_store_explicit(&destMemory[myId], myValue, "
"memory_order_relaxed"
+ MemoryScopeStr() + ");\n";
else
program += std::string("")
+ " atomic_store_explicit(&destMemory[myId], myValue, "
+ get_memory_order_type_name(_subCase.op1MemOrder)
+ MemoryScopeStr() + ");\n";
if (_subCase.op2IsFence)
program += std::string("")
+ " hisAtomicValue = "
"atomic_load_explicit(&destMemory[hisId], "
"memory_order_relaxed"
+ MemoryScopeStr()
+ ");\n"
" atomic_work_item_fence("
+ fenceType + ", "
+ get_memory_order_type_name(_subCase.op2MemOrder)
+ MemoryScopeStr() + "); \n";
else
program += std::string("")
+ " hisAtomicValue = "
"atomic_load_explicit(&destMemory[hisId], "
+ get_memory_order_type_name(_subCase.op2MemOrder)
+ MemoryScopeStr() + ");\n";
program += " hisValue = " + nonAtomic + "[hisId*"
+ naValues.str() + "+hisAtomicValue]; \n";
if (LocalMemory())
program += " hisId = (hisId+1)%get_local_size(0);\n";
else
program += " hisId = (hisId+1)%threadCount;\n";
program += " } while(hisAtomicValue == hisValue && myValue < "
+ naValues.str()
+ "-1);\n"
" if(hisAtomicValue != hisValue)\n"
" { // fail\n"
" atomic_store_explicit(&destMemory[myId], myValue-1,"
" memory_order_relaxed, memory_scope_work_group);\n";
if (LocalMemory())
program += " hisId = "
"(hisId+get_local_size(0)-1)%get_local_size(0);\n";
else
program += " hisId = (hisId+threadCount-1)%threadCount;\n";
program += " if(myValue+1 < " + naValues.str()
+ ")\n"
" "
+ nonAtomic + "[myId*" + naValues.str()
+ "+myValue+1] = hisId;\n"
" if(myValue+2 < "
+ naValues.str()
+ ")\n"
" "
+ nonAtomic + "[myId*" + naValues.str()
+ "+myValue+2] = hisAtomicValue;\n"
" if(myValue+3 < "
+ naValues.str()
+ ")\n"
" "
+ nonAtomic + "[myId*" + naValues.str()
+ "+myValue+3] = hisValue;\n";
if (gDebug)
{
program += " printf(\"WI %d: atomic value (%d) at index %d "
"is different than non-atomic value (%d)\\n\", tid, "
"hisAtomicValue, hisId, hisValue);\n";
}
program += " }\n";
}
return program;
}
virtual void HostFunction(cl_uint tid, cl_uint threadCount,
volatile HostAtomicType *destMemory,
HostDataType *oldValues)
{
size_t myId = tid, hisId = threadCount - 1 - tid;
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
{
HostDataType myValue = 0, hisValue;
// CPU thread typically starts faster - wait for GPU thread
myValue++;
host_atomic_store<HostAtomicType, HostDataType>(
&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
while (host_atomic_load<HostAtomicType, HostDataType>(
&destMemory[hisId], MEMORY_ORDER_SEQ_CST)
== 0)
;
do
{
myValue++;
host_atomic_store<HostAtomicType, HostDataType>(
&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
host_atomic_thread_fence(MemoryOrder());
hisValue = host_atomic_load<HostAtomicType, HostDataType>(
&destMemory[hisId], MEMORY_ORDER_RELAXED);
} while (myValue == hisValue && hisValue < 500000);
oldValues[tid] = hisValue;
}
else
{
HostDataType myValue = 0, hisAtomicValue, hisValue;
do
{
myValue++;
oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] =
myValue;
if (_subCase.op1IsFence)
{
host_atomic_thread_fence(_subCase.op1MemOrder);
host_atomic_store<HostAtomicType, HostDataType>(
&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
}
else
host_atomic_store<HostAtomicType, HostDataType>(
&destMemory[myId], myValue, _subCase.op1MemOrder);
if (_subCase.op2IsFence)
{
hisAtomicValue =
host_atomic_load<HostAtomicType, HostDataType>(
&destMemory[hisId], MEMORY_ORDER_RELAXED);
host_atomic_thread_fence(_subCase.op2MemOrder);
}
else
hisAtomicValue =
host_atomic_load<HostAtomicType, HostDataType>(
&destMemory[hisId], _subCase.op2MemOrder);
hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread()
+ hisAtomicValue];
hisId = (hisId + 1) % threadCount;
} while (hisAtomicValue == hisValue
&& myValue
< (HostDataType)NumNonAtomicVariablesPerThread() - 1);
if (hisAtomicValue != hisValue)
{ // fail
host_atomic_store<HostAtomicType, HostDataType>(
&destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST);
if (gDebug)
{
hisId = (hisId + threadCount - 1) % threadCount;
printf("WI %d: atomic value (%d) at index %d is different "
"than non-atomic value (%d)\n",
tid, hisAtomicValue, hisId, hisValue);
}
}
}
}
virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
MTdata d)
{
for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread();
i++)
startRefValues[i] = 0;
return true;
}
virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
HostDataType *refValues,
HostAtomicType *finalValues)
{
correct = true;
cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
for (cl_uint workOffset = 0; workOffset < threadCount;
workOffset += workSize)
{
if (workOffset + workSize > threadCount)
// last workgroup (host threads)
workSize = threadCount - workOffset;
for (cl_uint i = 0; i < workSize && workOffset + i < threadCount;
i++)
{
HostAtomicType myValue = finalValues[workOffset + i];
if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
{
HostDataType hisValue = refValues[workOffset + i];
if (myValue == hisValue)
{
// a draw - both threads should reach final value
// 500000
if (myValue != 500000)
{
log_error("ERROR: Invalid reference value #%u (%d "
"instead of 500000)\n",
workOffset + i, myValue);
correct = false;
return true;
}
}
else
{
// slower thread (in total order of seq_cst operations)
// must know last value written by faster thread
HostAtomicType hisRealValue =
finalValues[workOffset + workSize - 1 - i];
HostDataType myValueReadByHim =
refValues[workOffset + workSize - 1 - i];
// who is the winner? - thread with lower private
// counter value
if (myValue == hisRealValue) // forbidden result - fence
// doesn't work
{
log_error("ERROR: Atomic counter values #%u and "
"#%u are the same (%u)\n",
workOffset + i,
workOffset + workSize - 1 - i, myValue);
log_error(
"ERROR: Both threads have outdated values read "
"from another thread (%u and %u)\n",
hisValue, myValueReadByHim);
correct = false;
return true;
}
if (myValue > hisRealValue) // I'm slower
{
if (hisRealValue != hisValue)
{
log_error("ERROR: Invalid reference value #%u "
"(%d instead of %d)\n",
workOffset + i, hisValue,
hisRealValue);
log_error(
"ERROR: Slower thread #%u should know "
"value written by faster thread #%u\n",
workOffset + i,
workOffset + workSize - 1 - i);
correct = false;
return true;
}
}
else // I'm faster
{
if (myValueReadByHim != myValue)
{
log_error("ERROR: Invalid reference value #%u "
"(%d instead of %d)\n",
workOffset + workSize - 1 - i,
myValueReadByHim, myValue);
log_error(
"ERROR: Slower thread #%u should know "
"value written by faster thread #%u\n",
workOffset + workSize - 1 - i,
workOffset + i);
correct = false;
return true;
}
}
}
}
else
{
if (myValue != NumNonAtomicVariablesPerThread() - 1)
{
log_error("ERROR: Invalid atomic value #%u (%d instead "
"of %d)\n",
workOffset + i, myValue,
NumNonAtomicVariablesPerThread() - 1);
log_error("ERROR: Thread #%u observed invalid values "
"in other thread's variables\n",
workOffset + i);
correct = false;
return true;
}
}
}
}
return true;
}
private:
size_t _subCaseId;
struct TestDefinition _subCase;
};
#if 0
// The tests below are likely incorrect and have been disabled.
// See https://github.com/KhronosGroup/OpenCL-CTS/issues/2544
static int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements,
bool useSVM)
{
int error = 0;
CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
useSVM);
EXECUTE_TEST(error,
test_int.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
useSVM);
EXECUTE_TEST(error,
test_uint.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
useSVM);
EXECUTE_TEST(error,
test_long.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
useSVM);
EXECUTE_TEST(error,
test_ulong.Execute(deviceID, context, queue, num_elements));
if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
{
CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
else
{
CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
TYPE_ATOMIC_INTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_intptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
EXECUTE_TEST(
error,
test_uintptr_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
TYPE_ATOMIC_SIZE_T, useSVM);
EXECUTE_TEST(
error, test_size_t.Execute(deviceID, context, queue, num_elements));
CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
EXECUTE_TEST(
error,
test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
}
return error;
}
REGISTER_TEST(atomic_fence)
{
return test_atomic_fence_generic(device, context, queue, num_elements,
false);
}
REGISTER_TEST(svm_atomic_fence)
{
return test_atomic_fence_generic(device, context, queue, num_elements,
true);
}
#endif