// // Copyright (c) 2024 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "harness/typeWrappers.h" #include "harness/conversions.h" #include "common.h" #include "host_atomics.h" #include #include #include #include template class CBasicTestStore : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTest::CheckCapabilities; using CBasicTestMemOrderScope::LocalMemory; CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_ACQ_REL) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable if (CBasicTestMemOrderScope::DataType() ._type == TYPE_ATOMIC_HALF) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " atomic_store" + postfix + "(&destMemory[tid], tid" + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = static_cast(whichDestValue); return true; } }; static int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestStore test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestStore test_half(TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestStore test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestStore test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_store) { return test_atomic_store_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_store) { return test_atomic_store_generic(device, context, queue, num_elements, true); } template class CBasicTestInit : public CBasicTest { public: using CBasicTest::OldValueCheck; CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual std::string ProgramCore() { return " atomic_init(&destMemory[tid], tid);\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_init(&destMemory[tid], (HostDataType)tid); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = (HostDataType)whichDestValue; return true; } }; static int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestInit test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestInit test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestInit test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_init) { return test_atomic_init_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_init) { return test_atomic_init_generic(device, context, queue, num_elements, true); } template class CBasicTestLoad : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTest::CheckCapabilities; using CBasicTestMemOrderScope::LocalMemory; CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder() == MEMORY_ORDER_RELEASE || MemoryOrder() == MEMORY_ORDER_ACQ_REL) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable if (CBasicTestMemOrderScope::DataType() ._type == TYPE_ATOMIC_HALF) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store // should be MEMORY_ORDER_RELEASE std::string memoryOrderScopeLoad = MemoryOrderScopeStr(); std::string memoryOrderScopeStore = (MemoryOrder() == MEMORY_ORDER_ACQUIRE) ? (", memory_order_release" + MemoryScopeStr()) : memoryOrderScopeLoad; std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit"); return " atomic_store" + postfix + "(&destMemory[tid], tid" + memoryOrderScopeStore + ");\n" " oldValues[tid] = atomic_load" + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST); oldValues[tid] = host_atomic_load( &destMemory[tid], MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = static_cast(whichDestValue); return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; for (cl_uint i = 0; i < threadCount; i++) { if (refValues[i] != (HostDataType)i) { log_error("Invalid value for thread %u\n", (cl_uint)i); correct = false; return true; } } return true; } }; static int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestLoad test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestLoad test_half(TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestLoad test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestLoad test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_load) { return test_atomic_load_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_load) { return test_atomic_load_generic(device, context, queue, num_elements, true); } template class CBasicTestExchange : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::Iterations; using CBasicTestMemOrderScope::IterationsStr; using CBasicTestMemOrderScope::LocalMemory; CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(1234); } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " oldValues[tid] = atomic_exchange" + postfix + "(&destMemory[0], tid" + memoryOrderScope + ");\n" " for(int i = 0; i < " + IterationsStr() + "; i++)\n" " oldValues[tid] = atomic_exchange" + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder()); for (int i = 0; i < Iterations(); i++) oldValues[tid] = host_atomic_exchange( &destMemory[0], oldValues[tid], MemoryOrder()); } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { OldValueCheck( Iterations() % 2 == 0); // check is valid for even number of iterations only correct = true; /* We are expecting values from 0 to size-1 and initial value from * atomic variable */ /* These values must be distributed across refValues array and atomic * variable finalVaue[0] */ /* Any repeated value is treated as an error */ std::vector tidFound(threadCount); bool startValueFound = false; cl_uint startVal = static_cast(StartValue()); for (cl_uint i = 0; i <= threadCount; i++) { cl_uint value = 0; if (i == threadCount) { value = static_cast( static_cast(finalValues[0])); } else { value = static_cast(refValues[i]); } if (value == startVal) { // Special initial value if (startValueFound) { log_error("ERROR: Starting reference value (%u) occurred " "more thane once\n", (cl_uint)StartValue()); correct = false; return true; } startValueFound = true; continue; } if (value >= threadCount) { log_error( "ERROR: Reference value %u outside of valid range! (%u)\n", i, value); correct = false; return true; } if (tidFound[value]) { log_error("ERROR: Value (%u) occurred more thane once\n", value); correct = false; return true; } tidFound[value] = true; } return true; } }; static int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestExchange test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestExchange test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_exchange) { return test_atomic_exchange_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_exchange) { return test_atomic_exchange_generic(device, context, queue, num_elements, true); } template class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope { public: using CBasicTestMemOrder2Scope::StartValue; using CBasicTestMemOrder2Scope::OldValueCheck; using CBasicTestMemOrder2Scope::MemoryOrder; using CBasicTestMemOrder2Scope::MemoryOrder2; using CBasicTestMemOrder2Scope::MemoryOrderScope; using CBasicTestMemOrder2Scope::MemoryScope; using CBasicTestMemOrder2Scope::DataType; using CBasicTestMemOrder2Scope::Iterations; using CBasicTestMemOrder2Scope::IterationsStr; using CBasicTest::CheckCapabilities; CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope(dataType, useSVM) { StartValue(123456); OldValueCheck(false); } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder2() == MEMORY_ORDER_RELEASE || MemoryOrder2() == MEMORY_ORDER_ACQ_REL) return 0; // not allowed as 'failure' argument if ((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) || (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST)) return 0; // failure argument shall be no stronger than the success if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable return CBasicTestMemOrder2Scope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScope(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" ") + DataType().RegularTypeName() + " expected, previous;\n" " int successCount = 0;\n" " oldValues[tid] = tid;\n" " expected = tid; // force failure at the beginning\n" " if(atomic_compare_exchange_strong" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + ") || expected == tid)\n" " oldValues[tid] = threadCount+1; //mark unexpected success " "with invalid value\n" " else\n" " {\n" " for(int i = 0; i < " + IterationsStr() + " || successCount == 0; i++)\n" " {\n" " previous = expected;\n" " if(atomic_compare_exchange_strong" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + "))\n" " {\n" " oldValues[tid] = expected;\n" " successCount++;\n" " }\n" " else\n" " {\n" " if(previous == expected) // spurious failure - " "shouldn't occur for 'strong'\n" " {\n" " oldValues[tid] = threadCount; //mark fail with " "invalid value\n" " break;\n" " }\n" " }\n" " }\n" " }\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { HostDataType expected = (HostDataType)StartValue(), previous; oldValues[tid] = (HostDataType)tid; for (int i = 0; i < Iterations(); i++) { previous = expected; if (host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2())) oldValues[tid] = expected; else { if (previous == expected) // shouldn't occur for 'strong' { oldValues[tid] = threadCount; // mark fail with invalid // value } } } } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; /* We are expecting values from 0 to size-1 and initial value from * atomic variable */ /* These values must be distributed across refValues array and atomic * variable finalVaue[0] */ /* Any repeated value is treated as an error */ std::vector tidFound(threadCount); bool startValueFound = false; cl_uint i; for (i = 0; i <= threadCount; i++) { cl_uint value; if (i == threadCount) value = (cl_uint)finalValues[0]; // additional value from atomic // variable (last written) else value = (cl_uint)refValues[i]; if (value == (cl_uint)StartValue()) { // Special initial value if (startValueFound) { log_error("ERROR: Starting reference value (%u) occurred " "more thane once\n", (cl_uint)StartValue()); correct = false; return true; } startValueFound = true; continue; } if (value >= threadCount) { if (value == threadCount) log_error("ERROR: Spurious failure detected for " "atomic_compare_exchange_strong\n"); log_error( "ERROR: Reference value %u outside of valid range! (%u)\n", i, value); correct = false; return true; } if (tidFound[value]) { log_error("ERROR: Value (%u) occurred more thane once\n", value); correct = false; return true; } tidFound[value] = true; } return true; } }; static int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestCompareStrong test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_compare_exchange_strong) { return test_atomic_compare_exchange_strong_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_compare_exchange_strong) { return test_atomic_compare_exchange_strong_generic(device, context, queue, num_elements, true); } template class CBasicTestCompareWeak : public CBasicTestCompareStrong { public: using CBasicTestCompareStrong::StartValue; using CBasicTestCompareStrong::MemoryOrderScope; using CBasicTestCompareStrong::DataType; using CBasicTestCompareStrong::Iterations; using CBasicTestCompareStrong::IterationsStr; CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong(dataType, useSVM) {} virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScope(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" ") + DataType().RegularTypeName() + " expected , previous;\n" " int successCount = 0;\n" " oldValues[tid] = tid;\n" " expected = tid; // force failure at the beginning\n" " if(atomic_compare_exchange_weak" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + ") || expected == tid)\n" " oldValues[tid] = threadCount+1; //mark unexpected success " "with invalid value\n" " else\n" " {\n" " for(int i = 0; i < " + IterationsStr() + " || successCount == 0; i++)\n" " {\n" " previous = expected;\n" " if(atomic_compare_exchange_weak" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + "))\n" " {\n" " oldValues[tid] = expected;\n" " successCount++;\n" " }\n" " }\n" " }\n"; } }; static int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestCompareWeak test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_compare_exchange_weak) { return test_atomic_compare_exchange_weak_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_compare_exchange_weak) { return test_atomic_compare_exchange_weak_generic(device, context, queue, num_elements, true); } template double kahan_sum(const std::vector &nums) { return 0.0; } template <> double kahan_sum(const std::vector &nums) { double sum = 0.0; double compensation = 0.0; for (double num : nums) { double y = num - compensation; double t = sum + y; compensation = (t - sum) - y; sum = t; } return sum; } template class CBasicTestFetchAdd : public CBasicTestMemOrderScope { double min_range; double max_range; double max_error; std::vector ref_vals; public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM), min_range(-999.0), max_range(999.0), max_error(0.0) { if constexpr (is_host_fp_v) { StartValue((HostDataType)0.0); CBasicTestMemOrderScope::OldValueCheck(false); // Narrow down range for half to avoid overflow to infinity if constexpr (std::is_same_v) { min_range = -50.0; max_range = 50.0; } } } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr (is_host_fp_v) { if (threadCount > ref_vals.size()) { ref_vals.resize(threadCount); for (cl_uint i = 0; i < threadCount; i++) if constexpr (std::is_same_v) ref_vals[i] = get_random_double(min_range, max_range, d); else ref_vals[i] = get_random_float(min_range, max_range, d); memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * ref_vals.size()); // Estimate highest possible summation error for given set. std::vector sums; std::sort(ref_vals.begin(), ref_vals.end()); sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(), static_cast(0.f))); sums.push_back(std::accumulate(ref_vals.rbegin(), ref_vals.rend(), static_cast(0.f))); std::sort(ref_vals.begin(), ref_vals.end(), [](HostDataType a, HostDataType b) { return std::abs(a) < std::abs(b); }); double precise = 0.0; if constexpr (std::is_same_v) precise = kahan_sum(ref_vals); else for (auto elem : ref_vals) precise += double(elem); sums.push_back(precise); sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(), static_cast(0.f))); sums.push_back(std::accumulate(ref_vals.rbegin(), ref_vals.rend(), static_cast(0.f))); std::sort(sums.begin(), sums.end()); assert(std::all_of(sums.begin(), sums.end(), [](const HostDataType &val) { return std::isfinite( static_cast(val)); }) && "Infinite summation value detected!"); max_error = std::abs(sums.front() - sums.back()); log_info("Max allowed error for %u elements: %.10f\n", threadCount, max_error); // restore unsorted order memcpy(ref_vals.data(), startRefValues, sizeof(HostDataType) * ref_vals.size()); } else { memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * threadCount); } return true; } return false; } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); if constexpr (is_host_fp_v) { return " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + memoryOrderScope + ");\n" + " oldValues[tid] = atomic_fetch_add" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")0" + memoryOrderScope + ");\n"; } else { return " oldValues[tid] = atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" " atomic_fetch_add" + postfix + "(&destMemory[0], ((" + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope + ");\n"; } } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { if constexpr (is_host_fp_v) { host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); oldValues[tid] = host_atomic_fetch_add( &destMemory[tid], (HostDataType)0, MemoryOrder()); } else { oldValues[tid] = host_atomic_fetch_add( &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add( &destMemory[0], (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), MemoryOrder()); } } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) expected += startRefValues[i]; } else { for (cl_uint i = 0; i < threadCount; i++) expected += ((HostDataType)i + 3) * 3 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) { if (whichDestValue == 0) return std::abs( static_cast(expected - static_cast( testValues[whichDestValue]))) > max_error; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { if constexpr (is_host_fp_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) { if (refValues[i] != StartValue()) { log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); correct = false; } } return !correct; } return CBasicTestMemOrderScope::VerifyRefs(correct, threadCount, refValues, finalValues); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr (is_host_fp_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; template class CBasicTestFetchAddSpecialFloats : public CBasicTestMemOrderScope { std::vector ref_vals; public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::LocalMemory; using CBasicTestMemOrderScope::DeclaredInProgram; CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { // StartValue is used as an index divisor in the following test // logic. It is set to the number of special values, which allows // threads to be mapped deterministically onto the input data array. // This enables repeated add operations arranged so that every // special value is added to every other one (“all-to-all”). if constexpr ( std::is_same_v< HostDataType, HOST_DOUBLE> || std::is_same_v) { auto spec_vals = GetSpecialValues(); StartValue(spec_vals.size()); CBasicTestMemOrderScope::OldValueCheck(false); } else if constexpr (std::is_same_v) { auto spec_vals = GetSpecialValues(); StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode)); CBasicTestMemOrderScope::OldValueCheck(false); } } static std::vector &GetSpecialValues() { static std::vector special_values; if constexpr ( std::is_same_v< HostDataType, HOST_DOUBLE> || std::is_same_v) { const HostDataType test_value_zero = static_cast(0.0f); const HostDataType test_value_minus_zero = static_cast(-0.0f); const HostDataType test_value_without_fraction = static_cast(2.0f); const HostDataType test_value_with_fraction = static_cast(2.2f); if (special_values.empty()) { special_values = { static_cast(test_value_minus_zero), static_cast(test_value_zero), static_cast(test_value_without_fraction), static_cast(test_value_with_fraction), std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN(), std::numeric_limits::signaling_NaN(), -std::numeric_limits::infinity(), -std::numeric_limits::quiet_NaN(), -std::numeric_limits::signaling_NaN(), std::numeric_limits::lowest(), std::numeric_limits::min(), std::numeric_limits::max(), }; if constexpr (std::is_same_v) { if (0 != (CL_FP_DENORM & gDoubleFPConfig)) { special_values.push_back( std::numeric_limits::denorm_min()); } } else if constexpr (std::is_same_v) { if (0 != (CL_FP_DENORM & gFloatFPConfig)) { special_values.push_back( std::numeric_limits::denorm_min()); } } } } else if constexpr (std::is_same_v) { if (special_values.empty()) { special_values = { 0xffff, 0x0000, 0x7c00, /*INFINITY*/ 0xfc00, /*-INFINITY*/ 0x8000, /*-0*/ 0x7bff, /*HALF_MAX*/ 0x0400, /*HALF_MIN*/ 0x3c00, /* 1 */ 0xbc00, /* -1 */ 0x3555, /*nearest value to 1/3*/ 0x3bff, /*largest number less than one*/ 0xc000, /* -2 */ 0xfbff, /* -HALF_MAX */ 0x8400, /* -HALF_MIN */ 0x4248, /* M_PI_H */ 0xc248, /* -M_PI_H */ 0xbbff, /* Largest negative fraction */ }; if (0 != (CL_FP_DENORM & gHalfFPConfig)) { special_values.push_back(0x0001 /* Smallest denormal */); special_values.push_back(0x03ff /* Largest denormal */); } } } return special_values; } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { if (threadCount > ref_vals.size()) { ref_vals.assign(threadCount, 0); auto spec_vals = GetSpecialValues(); cl_uint total_cnt = 0; while (total_cnt < threadCount) { cl_uint block_cnt = std::min((cl_int)(threadCount - total_cnt), (cl_int)spec_vals.size()); memcpy(&ref_vals.at(total_cnt), spec_vals.data(), sizeof(HostDataType) * block_cnt); total_cnt += block_cnt; } } memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * threadCount); return true; } return false; } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { // The start_value variable (set by StartValue) is used // as a divisor of the thread index when selecting the operand for // atomic_fetch_add. This groups threads into blocks corresponding // to the number of special values and implements an “all-to-all” // addition pattern. As a result, each destination element is // updated using different combinations of input values, enabling // consistent comparison between host and device execution. return std::string(DataType().AddSubOperandTypeName()) + " start_value = atomic_load_explicit(destMemory+tid, " "memory_order_relaxed, memory_scope_work_group);\n" " atomic_store_explicit(destMemory+tid, oldValues[tid], " "memory_order_relaxed, memory_scope_work_group);\n" " atomic_fetch_add" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid/(int)start_value]" + memoryOrderScope + ");\n"; } } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { auto spec_vals = GetSpecialValues(); host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], MEMORY_ORDER_SEQ_CST); host_atomic_fetch_add( &destMemory[tid], (HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder()); } } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr ( std::is_same_v< HostDataType, HOST_DOUBLE> || std::is_same_v) { auto spec_vals = GetSpecialValues(); expected = startRefValues[whichDestValue] + startRefValues[whichDestValue / spec_vals.size()]; } else if constexpr (std::is_same_v) { auto spec_vals = GetSpecialValues(); expected = cl_half_from_float( cl_half_to_float(startRefValues[whichDestValue]) + cl_half_to_float( startRefValues[whichDestValue / spec_vals.size()]), gHalfRoundingMode); } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (std::is_same_v) { return static_cast(expected) != testValues[whichDestValue]; } else { if (std::isnan(testValues[whichDestValue]) && std::isnan(expected)) return false; else return expected != testValues[whichDestValue]; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; if (!CBasicTestMemOrderScope::LocalMemory() && CBasicTestMemOrderScope::DeclaredInProgram()) { if ((gDoubleFPConfig & CL_FP_INF_NAN) == 0) return 0; } } if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; if (!CBasicTestMemOrderScope::LocalMemory() && CBasicTestMemOrderScope::DeclaredInProgram()) { if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0; } } else if constexpr (std::is_same_v) { if (DeclaredInProgram()) return 0; // skip test - not applicable if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; if (!CBasicTestMemOrderScope::LocalMemory() && CBasicTestMemOrderScope::DeclaredInProgram()) { if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0; } } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; static int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchAdd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestFetchAddSpecialFloats test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( error, test_spec_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAddSpecialFloats test_spec_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_spec_float.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAddSpecialFloats test_spec_half(TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST( error, test_spec_half.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( error, test_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_float.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_add) { return test_atomic_fetch_add_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_add) { return test_atomic_fetch_add_generic(device, context, queue, num_elements, true); } template double kahan_sub(const std::vector &nums) { return 0.0; } template <> double kahan_sub(const std::vector &nums) { double sum = 0.0; double compensation = 0.0; for (double num : nums) { double y = -num - compensation; double t = sum + y; compensation = (t - sum) - y; sum = t; } return sum; } template class CBasicTestFetchSub : public CBasicTestMemOrderScope { double min_range; double max_range; double max_error; std::vector ref_vals; public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM), min_range(-999.0), max_range(999.0), max_error(0.0) { if constexpr (is_host_fp_v) { StartValue(0); CBasicTestMemOrderScope::OldValueCheck(false); // Narrow down range for half to avoid overflow to infinity if constexpr (std::is_same_v) { min_range = -50.0; max_range = 50.0; } } } template HostDataType subtract(Iterator begin, Iterator end) { HostDataType res = 0; for (auto it = begin; it != end; ++it) res = res - *it; return res; } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr (is_host_fp_v) { if (threadCount > ref_vals.size()) { ref_vals.resize(threadCount); for (cl_uint i = 0; i < threadCount; i++) ref_vals[i] = (HostDataType)get_random_double(min_range, max_range, d); memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * ref_vals.size()); // Estimate highest possible subtraction error for given set. std::vector sums; std::sort(ref_vals.begin(), ref_vals.end()); sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); std::sort(ref_vals.begin(), ref_vals.end(), [](HostDataType a, HostDataType b) { return std::abs(a) < std::abs(b); }); double precise = 0.0; if constexpr (std::is_same_v) precise = kahan_sub(ref_vals); else for (auto elem : ref_vals) precise += double(elem); sums.push_back(precise); sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); std::sort(sums.begin(), sums.end()); assert(std::all_of(sums.begin(), sums.end(), [](const HostDataType &val) { return std::isfinite( static_cast(val)); }) && "Infinite subtraction value detected!"); max_error = std::abs(sums.front() - sums.back()); log_info("Max allowed error for %u elements: %.10f\n", threadCount, max_error); // restore unsorted order memcpy(ref_vals.data(), startRefValues, sizeof(HostDataType) * ref_vals.size()); } else { memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * threadCount); } return true; } return false; } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); if constexpr (is_host_fp_v) { return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + memoryOrderScope + ");\n" + " oldValues[tid] = atomic_fetch_sub" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")0" + memoryOrderScope + ");\n"; } else { return " oldValues[tid] = atomic_fetch_sub" + postfix + "(&destMemory[0], tid + 3 +(((" + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope + ");\n"; } } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { if constexpr (is_host_fp_v) { host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); oldValues[tid] = host_atomic_fetch_sub( &destMemory[tid], (HostDataType)0, MemoryOrder()); } else { oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3 + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), MemoryOrder()); } } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) expected -= startRefValues[i]; } else { for (cl_uint i = 0; i < threadCount; i++) expected -= (HostDataType)i + 3 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) { if (whichDestValue == 0) return std::abs( static_cast(expected - static_cast( testValues[whichDestValue]))) > max_error; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { if (std::is_same_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) { if (refValues[i] != StartValue()) { log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); correct = false; } } return !correct; } return CBasicTestMemOrderScope::VerifyRefs(correct, threadCount, refValues, finalValues); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr (is_host_fp_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; static int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchSub test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestFetchSub test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( error, test_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_sub) { return test_atomic_fetch_sub_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_sub) { return test_atomic_fetch_sub_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchOr : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { cl_uint numBits = DataType().Size(deviceID) * 8; return (threadCount + numBits - 1) / numBits; } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" size_t numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int whichResult = tid / numBits;\n" " int bitIndex = tid - (whichResult * numBits);\n" "\n" " oldValues[tid] = atomic_fetch_or" + postfix + "(&destMemory[whichResult], ((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { size_t numBits = sizeof(HostDataType) * 8; size_t whichResult = tid / numBits; size_t bitIndex = tid - (whichResult * numBits); oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) / (sizeof(HostDataType) * 8); if (whichDestValue < numValues - 1) { expected = ~(HostDataType)0; return true; } // Last item doesn't get or'ed on every bit, so we have to mask away cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType) * 8); expected = StartValue(); for (cl_uint i = 0; i < numBits; i++) expected |= ((HostDataType)1 << i); return true; } }; static int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchOr test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_or) { return test_atomic_fetch_or_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_or) { return test_atomic_fetch_or_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchXor : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::DataType; CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue((HostDataType)0x2f08ab418ba0541LL); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" int numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n" "\n" " oldValues[tid] = atomic_fetch_xor" + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { int numBits = sizeof(HostDataType) * 8; int bitIndex = (numBits - 1) * (tid + 1) / threadCount; oldValues[tid] = host_atomic_fetch_xor( &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { int numBits = sizeof(HostDataType) * 8; expected = StartValue(); for (cl_uint i = 0; i < threadCount; i++) { int bitIndex = (numBits - 1) * (i + 1) / threadCount; expected ^= ((HostDataType)1 << bitIndex); } return true; } }; static int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchXor test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_xor) { return test_atomic_fetch_xor_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_xor) { return test_atomic_fetch_xor_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchAnd : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(~(HostDataType)0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { cl_uint numBits = DataType().Size(deviceID) * 8; return (threadCount + numBits - 1) / numBits; } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" size_t numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int whichResult = tid / numBits;\n" " int bitIndex = tid - (whichResult * numBits);\n" "\n" " oldValues[tid] = atomic_fetch_and" + postfix + "(&destMemory[whichResult], ~((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { size_t numBits = sizeof(HostDataType) * 8; size_t whichResult = tid / numBits; size_t bitIndex = tid - (whichResult * numBits); oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) / (sizeof(HostDataType) * 8); if (whichDestValue < numValues - 1) { expected = 0; return true; } // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType) * 8); expected = StartValue(); for (size_t i = 0; i < numBits; i++) expected &= ~((HostDataType)1 << i); return true; } }; static int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchAnd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_and) { return test_atomic_fetch_and_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_and) { return test_atomic_fetch_and_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::Iterations; using CBasicTestMemOrderScope::IterationsStr; CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); } // each thread modifies (with OR and AND operations) and verifies // only one bit in atomic variable // other bits are modified by other threads but it must not affect current // thread operation virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" int bits = sizeof(") + DataType().RegularTypeName() + ")*8;\n" + " size_t valueInd = tid/bits;\n" " " + DataType().RegularTypeName() + " value, bitMask = (" + DataType().RegularTypeName() + ")1 << tid%bits;\n" " oldValues[tid] = 0;\n" " for(int i = 0; i < " + IterationsStr() + "; i++)\n" " {\n" " value = atomic_fetch_or" + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + ");\n" " if(value & bitMask) // bit should be set to 0\n" " oldValues[tid]++;\n" " value = atomic_fetch_and" + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope + ");\n" " if(!(value & bitMask)) // bit should be set to 1\n" " oldValues[tid]++;\n" " }\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { int bits = sizeof(HostDataType) * 8; size_t valueInd = tid / bits; HostDataType value, bitMask = (HostDataType)1 << tid % bits; oldValues[tid] = 0; for (int i = 0; i < Iterations(); i++) { value = host_atomic_fetch_or(destMemory + valueInd, bitMask, MemoryOrder()); if (value & bitMask) // bit should be set to 0 oldValues[tid]++; value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask, MemoryOrder()); if (!(value & bitMask)) // bit should be set to 1 oldValues[tid]++; } } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = 0; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; for (cl_uint i = 0; i < threadCount; i++) { if (refValues[i] > 0) { log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); correct = false; } } return true; } }; static int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchOrAnd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_orand) { return test_atomic_fetch_orand_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_orand) { return test_atomic_fetch_orand_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchXor2 : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::Iterations; using CBasicTestMemOrderScope::IterationsStr; CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); } // each thread modifies (with XOR operation) and verifies // only one bit in atomic variable // other bits are modified by other threads but it must not affect current // thread operation virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" int bits = sizeof(") + DataType().RegularTypeName() + ")*8;\n" + " size_t valueInd = tid/bits;\n" " " + DataType().RegularTypeName() + " value, bitMask = (" + DataType().RegularTypeName() + ")1 << tid%bits;\n" " oldValues[tid] = 0;\n" " for(int i = 0; i < " + IterationsStr() + "; i++)\n" " {\n" " value = atomic_fetch_xor" + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + ");\n" " if(value & bitMask) // bit should be set to 0\n" " oldValues[tid]++;\n" " value = atomic_fetch_xor" + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + ");\n" " if(!(value & bitMask)) // bit should be set to 1\n" " oldValues[tid]++;\n" " }\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { int bits = sizeof(HostDataType) * 8; size_t valueInd = tid / bits; HostDataType value, bitMask = (HostDataType)1 << tid % bits; oldValues[tid] = 0; for (int i = 0; i < Iterations(); i++) { value = host_atomic_fetch_xor(destMemory + valueInd, bitMask, MemoryOrder()); if (value & bitMask) // bit should be set to 0 oldValues[tid]++; value = host_atomic_fetch_xor(destMemory + valueInd, bitMask, MemoryOrder()); if (!(value & bitMask)) // bit should be set to 1 oldValues[tid]++; } } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = 0; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; for (cl_uint i = 0; i < threadCount; i++) { if (refValues[i] > 0) { log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]); correct = false; } } return true; } }; static int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchXor2 test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchXor2 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchXor2 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor2 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_xor2) { return test_atomic_fetch_xor2_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_xor2) { return test_atomic_fetch_xor2_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchMin : public CBasicTestMemOrderScope { double min_range; double max_range; public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM), min_range(-999.0), max_range(999.0) { StartValue(DataType().MaxValue()); if constexpr (is_host_fp_v) { CBasicTestMemOrderScope::OldValueCheck(false); } } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); if constexpr (is_host_fp_v) { return " atomic_fetch_min" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + " oldValues[tid] = atomic_fetch_min" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")0" + memoryOrderScope + ");\n"; } else { return " oldValues[tid] = atomic_fetch_min" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; } } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { if constexpr (is_host_fp_v) { host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder()); oldValues[tid] = host_atomic_fetch_min( &destMemory[tid], (HostDataType)0, MemoryOrder()); } else { oldValues[tid] = host_atomic_fetch_min( &destMemory[0], oldValues[tid], MemoryOrder()); } } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr (std::is_same_v) { for (cl_uint i = 0; i < threadCount; i++) { startRefValues[i] = cl_half_from_float( get_random_float(min_range, max_range, d), gHalfRoundingMode); } } else if constexpr ( std::is_same_v< HostDataType, HOST_FLOAT> || std::is_same_v) { for (cl_uint i = 0; i < threadCount; i++) { startRefValues[i] = get_random_float(min_range, max_range, d); } } else { for (cl_uint i = 0; i < threadCount; i++) { startRefValues[i] = genrand_int32(d); if (sizeof(HostDataType) >= 8) { cl_ulong v = startRefValues[i]; v |= (cl_ulong)genrand_int32(d) << 16; startRefValues[i] = v; } } } return true; } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) if (startRefValues[i] < expected) expected = startRefValues[i]; } else { for (cl_uint i = 0; i < threadCount; i++) { if (startRefValues[i] < expected) expected = startRefValues[i]; } } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) { if (whichDestValue == 0) return CBasicTestMemOrderScope:: IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); return false; // ignore all but 0 which stores final result } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { if constexpr (is_host_fp_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) { if (refValues[i] != StartValue()) { log_error( "Thread %d found %lf mismatch(es), start value=%lf\n", i, (double)refValues[i], (double)StartValue()); correct = false; } } return !correct; } return CBasicTestMemOrderScope::VerifyRefs(correct, threadCount, refValues, finalValues); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr (is_host_fp_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; template class CBasicTestFetchMinSpecialFloats : public CBasicTestMemOrderScope { std::vector ref_vals; public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMinSpecialFloats(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { // StartValue is used as an index divisor in the following test // logic. It is set to the number of special values, which allows // threads to be mapped deterministically onto the input data array. // This enables repeated add operations arranged so that every // special value is added to every other one (“all-to-all”). if constexpr (std::is_same_v) { auto spec_vals = GetSpecialValues(); StartValue(spec_vals.size()); CBasicTestMemOrderScope::OldValueCheck(false); } } static std::vector &GetSpecialValues() { static std::vector special_values; if constexpr (std::is_same_v) { const HostDataType test_value_zero = static_cast(0.0f); const HostDataType test_value_minus_zero = static_cast(-0.0f); const HostDataType test_value_without_fraction = static_cast(2.0f); const HostDataType test_value_with_fraction = static_cast(2.2f); if (special_values.empty()) { special_values = { static_cast(test_value_minus_zero), static_cast(test_value_zero), static_cast(test_value_without_fraction), static_cast(test_value_with_fraction), std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN(), std::numeric_limits::signaling_NaN(), -std::numeric_limits::infinity(), -std::numeric_limits::quiet_NaN(), -std::numeric_limits::signaling_NaN(), std::numeric_limits::lowest(), std::numeric_limits::min(), std::numeric_limits::max(), }; if (0 != (CL_FP_DENORM & gFloatFPConfig)) { special_values.push_back( std::numeric_limits::denorm_min()); } } } return special_values; } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr (std::is_same_v) { if (threadCount > ref_vals.size()) { ref_vals.assign(threadCount, 0); auto spec_vals = GetSpecialValues(); cl_uint total_cnt = 0; while (total_cnt < threadCount) { cl_uint block_cnt = std::min((cl_int)(threadCount - total_cnt), (cl_int)spec_vals.size()); memcpy(&ref_vals.at(total_cnt), spec_vals.data(), sizeof(HostDataType) * block_cnt); total_cnt += block_cnt; } } memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * threadCount); return true; } return false; } std::string ProgramCore() override { // The start_value variable (set by StartValue) is used // as a divisor of the thread index when selecting the operand for // atomic_fetch_add. This groups threads into blocks corresponding // to the number of special values and implements an “all-to-all” // addition pattern. As a result, each destination element is // updated using different combinations of input values, enabling // consistent comparison between host and device execution. std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(DataType().AddSubOperandTypeName()) + " start_value = atomic_load_explicit(destMemory+tid, " "memory_order_relaxed, memory_scope_work_group);\n" " atomic_store_explicit(destMemory+tid, oldValues[tid], " "memory_order_relaxed, memory_scope_work_group);\n" " atomic_fetch_min" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid/(int)start_value]" + memoryOrderScope + ");\n"; } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { auto spec_vals = GetSpecialValues(); host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], MEMORY_ORDER_SEQ_CST); host_atomic_fetch_min(&destMemory[tid], (HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder()); } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr (std::is_same_v) { auto spec_vals = GetSpecialValues(); expected = std::min(startRefValues[whichDestValue], startRefValues[whichDestValue / spec_vals.size()]); } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if (testValues[whichDestValue] != expected) { auto spec_vals = GetSpecialValues(); // special cases // min(-0, +0) = min(+0, -0) = +0 or -0, if (((startRefValues[whichDestValue] == -0.f) && (startRefValues[whichDestValue / spec_vals.size()] == 0.f)) || ((startRefValues[whichDestValue] == 0.f) && (startRefValues[whichDestValue / spec_vals.size()] == -0.f))) return false; else if (is_qnan(startRefValues[whichDestValue / spec_vals.size()]) || is_qnan(startRefValues[whichDestValue])) { // min(x, qNaN) = min(qNaN, x) = x, // min(qNaN, qNaN) = qNaN, if (is_qnan(startRefValues[whichDestValue / spec_vals.size()]) && is_qnan(startRefValues[whichDestValue])) return !is_qnan(testValues[whichDestValue]); else if (is_qnan( startRefValues[whichDestValue / spec_vals.size()])) return !std::isnan(testValues[whichDestValue]) && testValues[whichDestValue] != startRefValues[whichDestValue]; // NaN != NaN always // true else return !std::isnan(testValues[whichDestValue]) && testValues[whichDestValue] != startRefValues[whichDestValue / spec_vals.size()]; } else if (is_snan(startRefValues[whichDestValue / spec_vals.size()]) || is_snan(startRefValues[whichDestValue])) { // min(x, sNaN) = min(sNaN, x) = NaN or x, and // min(NaN, sNaN) = min(sNaN, NaN) = NaN if (std::isnan(testValues[whichDestValue]) || testValues[whichDestValue] == startRefValues[whichDestValue] || testValues[whichDestValue] == startRefValues[whichDestValue / spec_vals.size()]) return false; } } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; if (!CBasicTestMemOrderScope::LocalMemory() && CBasicTestMemOrderScope::DeclaredInProgram()) { if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0; } } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr (std::is_same_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; static int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchMin test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestFetchMinSpecialFloats test_spec_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_spec_float.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( error, test_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_float.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMin test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchMin test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMin test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_min) { return test_atomic_fetch_min_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_min) { return test_atomic_fetch_min_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchMax : public CBasicTestMemOrderScope { double min_range; double max_range; public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM), min_range(-999.0), max_range(999.0) { StartValue(DataType().MinValue()); if constexpr (is_host_fp_v) { CBasicTestMemOrderScope::OldValueCheck(false); } } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { return " atomic_fetch_max" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n" + " oldValues[tid] = atomic_fetch_max" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")0" + memoryOrderScope + ");\n"; } else { return " oldValues[tid] = atomic_fetch_max" + postfix + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; } } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { if constexpr (is_host_fp_v) { host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder()); oldValues[tid] = host_atomic_fetch_max( &destMemory[tid], (HostDataType)0, MemoryOrder()); } else { oldValues[tid] = host_atomic_fetch_max( &destMemory[0], oldValues[tid], MemoryOrder()); } } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr (is_host_fp_v) { for (cl_uint i = 0; i < threadCount; i++) { startRefValues[i] = static_cast( get_random_float(min_range, max_range, d)); } } else { for (cl_uint i = 0; i < threadCount; i++) { startRefValues[i] = genrand_int32(d); if (sizeof(HostDataType) >= 8) { cl_ulong v = startRefValues[i]; v |= (cl_ulong)genrand_int32(d) << 16; startRefValues[i] = v; } } } return true; } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) if (startRefValues[i] > expected) expected = startRefValues[i]; } else { for (cl_uint i = 0; i < threadCount; i++) { if (startRefValues[i] > expected) expected = startRefValues[i]; } } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) { if (whichDestValue == 0) return CBasicTestMemOrderScope:: IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); return false; // ignore all but 0 which stores final result } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { if constexpr (is_host_fp_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) { if (refValues[i] != StartValue()) { log_error( "Thread %d found %lf mismatch(es), start value=%lf\n", i, (double)refValues[i], (double)StartValue()); correct = false; } } return !correct; } return CBasicTestMemOrderScope::VerifyRefs(correct, threadCount, refValues, finalValues); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr (is_host_fp_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; static int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchMax test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestFetchMax test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( error, test_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_float.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchMax test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchMax test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchMax test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_max) { return test_atomic_fetch_max_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_max) { return test_atomic_fetch_max_generic(device, context, queue, num_elements, true); } template class CBasicTestFlag : public CBasicTestMemOrderScope { static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000; public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::UseSVM; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } TExplicitMemoryOrderType MemoryOrderForClear() { // Memory ordering for atomic_flag_clear function // ("shall not be memory_order_acquire nor memory_order_acq_rel") if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED; if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE; return MemoryOrder(); } std::string MemoryOrderScopeStrForClear() { std::string orderStr; if (MemoryOrder() != MEMORY_ORDER_EMPTY) orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear()); return orderStr + MemoryScopeStr(); } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { // This test assumes support for the memory_scope_device scope in the // case that LocalMemory() == false. Therefore we should skip this test // in that configuration on a 3.0 driver since supporting the // memory_scope_device scope is optionaly. if (get_device_cl_version(deviceID) >= Version{ 3, 0 }) { if (!LocalMemory() && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE)) { log_info("Skipping atomic_flag test due to use of " "atomic_scope_device " "which is optionally not supported on this device\n"); return 0; // skip test - not applicable } } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); std::string program = " uint cnt, stop = 0;\n" " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread " "must find critical section where it is the first visitor\n" " {\n" " bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n"; if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory()) program += " atomic_work_item_fence(" + std::string( LocalMemory() ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + "memory_order_acquire," + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device")) + ");\n"; program += " if (!set)\n" " {\n"; if (LocalMemory()) program += " uint csIndex = " "get_enqueued_local_size(0)*get_group_id(0)+cnt;\n"; else program += " uint csIndex = cnt;\n"; std::ostringstream csNotVisited; csNotVisited << CRITICAL_SECTION_NOT_VISITED; program += " // verify that thread is the first visitor\n" " if(oldValues[csIndex] == " + csNotVisited.str() + ")\n" " {\n" " oldValues[csIndex] = tid; // set the winner id for this " "critical section\n" " stop = 1;\n" " }\n"; if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory()) program += " atomic_work_item_fence(" + std::string( LocalMemory() ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + "memory_order_release," + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device")) + ");\n"; program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n" " }\n" " }\n"; return program; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { cl_uint cnt, stop = 0; for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the // first visitor\n" { if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder())) { cl_uint csIndex = cnt; // verify that thread is the first visitor\n" if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED) { oldValues[csIndex] = tid; // set the winner id for this critical section\n" stop = 1; } host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear()); } } } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = StartValue(); return true; } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { for (cl_uint i = 0; i < threadCount; i++) startRefValues[i] = CRITICAL_SECTION_NOT_VISITED; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; /* We are expecting unique values from 0 to threadCount-1 (each critical * section must be visited) */ /* These values must be distributed across refValues array */ std::vector tidFound(threadCount); cl_uint i; for (i = 0; i < threadCount; i++) { cl_uint value = (cl_uint)refValues[i]; if (value == CRITICAL_SECTION_NOT_VISITED) { // Special initial value log_error("ERROR: Critical section %u not visited\n", i); correct = false; return true; } if (value >= threadCount) { log_error( "ERROR: Reference value %u outside of valid range! (%u)\n", i, value); correct = false; return true; } if (tidFound[value]) { log_error("ERROR: Value (%u) occurred more thane once\n", value); correct = false; return true; } tidFound[value] = true; } return true; } }; static int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFlag test_flag(TYPE_ATOMIC_FLAG, useSVM); EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements)); return error; } REGISTER_TEST(atomic_flag) { return test_atomic_flag_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_flag) { return test_atomic_flag_generic(device, context, queue, num_elements, true); } template class CBasicTestFence : public CBasicTestMemOrderScope { struct TestDefinition { bool op1IsFence; TExplicitMemoryOrderType op1MemOrder; bool op2IsFence; TExplicitMemoryOrderType op2MemOrder; }; public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTestMemOrderScope::DeclaredInProgram; using CBasicTestMemOrderScope::UsedInFunction; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::CurrentGroupSize; using CBasicTestMemOrderScope::UseSVM; using CBasicTestMemOrderScope::LocalMemory; using CBasicTestMemOrderScope::LocalRefValues; CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual cl_uint NumNonAtomicVariablesPerThread() { if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1; if (LocalMemory()) { if (gIsEmbedded) { if (CurrentGroupSize() > 512) CurrentGroupSize(512); return 2; // 1KB of local memory required by spec. Clamp group // size to 512 and allow 2 variables per thread } else return 32 * 1024 / 8 / CurrentGroupSize() - 1; // 32KB of local memory required by spec } return 256; } virtual std::string SingleTestName() { std::string testName; if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) testName += "seq_cst fence, "; else testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)) .substr(sizeof("memory_order")) + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with " + std::string(get_memory_order_type_name(_subCase.op2MemOrder)) .substr(sizeof("memory_order")) + (_subCase.op2IsFence ? " fence" : " atomic") + ", "; testName += CBasicTest::SingleTestName(); testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())) .substr(sizeof("memory")); return testName; } virtual bool SVMDataBufferAllSVMConsistent() { // Although memory_scope_all_devices doesn't mention SVM it is just an // alias for memory_scope_all_svm_devices. So both scopes interact with // SVM allocations, on devices that support those, just the same. return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES; } virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) { int error = 0; // execute 3 (maximum) sub cases for each memory order for (_subCaseId = 0; _subCaseId < 3; _subCaseId++) { EXECUTE_TEST( error, (CBasicTestMemOrderScope:: ExecuteForEachParameterSet(deviceID, context, queue))); } return error; } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (DeclaredInProgram() || UsedInFunction()) return 0; // skip test - not applicable - no overloaded fence // functions for different address spaces if (MemoryOrder() == MEMORY_ORDER_EMPTY || MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since // opencl20-openclc-rev15 return 0; // skip test - not applicable if ((UseSVM() || gHost) && LocalMemory()) return 0; // skip test - not applicable for SVM and local memory struct TestDefinition acqTests[] = { // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder} { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE } }; struct TestDefinition relTests[] = { { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE }, { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL } }; struct TestDefinition arTests[] = { { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }, { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE }, { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL } }; switch (MemoryOrder()) { case MEMORY_ORDER_ACQUIRE: if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition)) return 0; _subCase = acqTests[_subCaseId]; break; case MEMORY_ORDER_RELEASE: if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition)) return 0; _subCase = relTests[_subCaseId]; break; case MEMORY_ORDER_ACQ_REL: if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition)) return 0; _subCase = arTests[_subCaseId]; break; case MEMORY_ORDER_SEQ_CST: if (_subCaseId != 0) // one special case only return 0; break; default: return 0; } LocalRefValues(LocalMemory()); return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramHeader(cl_uint maxNumDestItems) { std::string header; if (gOldAPI) { if (MemoryScope() == MEMORY_SCOPE_EMPTY) { header += "#define atomic_work_item_fence(x,y) " " mem_fence(x)\n"; } else { header += "#define atomic_work_item_fence(x,y,z) " " mem_fence(x)\n"; } } return header + CBasicTestMemOrderScope:: ProgramHeader(maxNumDestItems); } virtual std::string ProgramCore() { std::ostringstream naValues; naValues << NumNonAtomicVariablesPerThread(); std::string program, fenceType, nonAtomic; if (LocalMemory()) { program = " size_t myId = get_local_id(0), hisId = " "get_local_size(0)-1-myId;\n"; fenceType = "CLK_LOCAL_MEM_FENCE"; nonAtomic = "localValues"; } else { program = " size_t myId = tid, hisId = threadCount-1-tid;\n"; fenceType = "CLK_GLOBAL_MEM_FENCE"; nonAtomic = "oldValues"; } if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) { // All threads are divided into pairs. // Each thread has its own atomic variable and performs the // following actions: // - increments its own variable // - performs fence operation to propagate its value and to see // value from other thread // - reads value from other thread's variable // - repeats the above steps when both values are the same (and less // than 500000) // - stores the last value read from other thread (in additional // variable) At the end of execution at least one thread should know // the last value from other thread program += std::string("") + " " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n" " do {\n" " myValue++;\n" " atomic_store_explicit(&destMemory[myId], myValue, " "memory_order_relaxed" + MemoryScopeStr() + ");\n" " atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n" " hisValue = atomic_load_explicit(&destMemory[hisId], " "memory_order_relaxed" + MemoryScopeStr() + ");\n" " } while(myValue == hisValue && myValue < 500000);\n" " " + nonAtomic + "[myId] = hisValue; \n"; } else { // Each thread modifies one of its non-atomic variables, increments // value of its atomic variable and reads values from another thread // in typical synchronizes-with scenario with: // - non-atomic variable (at index A) modification (value change // from 0 to A) // - release operation (additional fence or within atomic) + atomic // variable modification (value A) // - atomic variable read (value B) + acquire operation (additional // fence or within atomic) // - non-atomic variable (at index B) read (value C) // Each thread verifies dependency between atomic and non-atomic // value read from another thread The following condition must be // true: B == C program += std::string("") + " " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n" " do {\n" " myValue++;\n" " " + nonAtomic + "[myId*" + naValues.str() + "+myValue] = myValue;\n"; if (_subCase.op1IsFence) program += std::string("") + " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n" " atomic_store_explicit(&destMemory[myId], myValue, " "memory_order_relaxed" + MemoryScopeStr() + ");\n"; else program += std::string("") + " atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n"; if (_subCase.op2IsFence) program += std::string("") + " hisAtomicValue = " "atomic_load_explicit(&destMemory[hisId], " "memory_order_relaxed" + MemoryScopeStr() + ");\n" " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n"; else program += std::string("") + " hisAtomicValue = " "atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n"; program += " hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n"; if (LocalMemory()) program += " hisId = (hisId+1)%get_local_size(0);\n"; else program += " hisId = (hisId+1)%threadCount;\n"; program += " } while(hisAtomicValue == hisValue && myValue < " + naValues.str() + "-1);\n" " if(hisAtomicValue != hisValue)\n" " { // fail\n" " atomic_store_explicit(&destMemory[myId], myValue-1," " memory_order_relaxed, memory_scope_work_group);\n"; if (LocalMemory()) program += " hisId = " "(hisId+get_local_size(0)-1)%get_local_size(0);\n"; else program += " hisId = (hisId+threadCount-1)%threadCount;\n"; program += " if(myValue+1 < " + naValues.str() + ")\n" " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n" " if(myValue+2 < " + naValues.str() + ")\n" " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n" " if(myValue+3 < " + naValues.str() + ")\n" " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n"; if (gDebug) { program += " printf(\"WI %d: atomic value (%d) at index %d " "is different than non-atomic value (%d)\\n\", tid, " "hisAtomicValue, hisId, hisValue);\n"; } program += " }\n"; } return program; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { size_t myId = tid, hisId = threadCount - 1 - tid; if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) { HostDataType myValue = 0, hisValue; // CPU thread typically starts faster - wait for GPU thread myValue++; host_atomic_store( &destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST); while (host_atomic_load( &destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0) ; do { myValue++; host_atomic_store( &destMemory[myId], myValue, MEMORY_ORDER_RELAXED); host_atomic_thread_fence(MemoryOrder()); hisValue = host_atomic_load( &destMemory[hisId], MEMORY_ORDER_RELAXED); } while (myValue == hisValue && hisValue < 500000); oldValues[tid] = hisValue; } else { HostDataType myValue = 0, hisAtomicValue, hisValue; do { myValue++; oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] = myValue; if (_subCase.op1IsFence) { host_atomic_thread_fence(_subCase.op1MemOrder); host_atomic_store( &destMemory[myId], myValue, MEMORY_ORDER_RELAXED); } else host_atomic_store( &destMemory[myId], myValue, _subCase.op1MemOrder); if (_subCase.op2IsFence) { hisAtomicValue = host_atomic_load( &destMemory[hisId], MEMORY_ORDER_RELAXED); host_atomic_thread_fence(_subCase.op2MemOrder); } else hisAtomicValue = host_atomic_load( &destMemory[hisId], _subCase.op2MemOrder); hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread() + hisAtomicValue]; hisId = (hisId + 1) % threadCount; } while (hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread() - 1); if (hisAtomicValue != hisValue) { // fail host_atomic_store( &destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST); if (gDebug) { hisId = (hisId + threadCount - 1) % threadCount; printf("WI %d: atomic value (%d) at index %d is different " "than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue); } } } } virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) { for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread(); i++) startRefValues[i] = 0; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount; for (cl_uint workOffset = 0; workOffset < threadCount; workOffset += workSize) { if (workOffset + workSize > threadCount) // last workgroup (host threads) workSize = threadCount - workOffset; for (cl_uint i = 0; i < workSize && workOffset + i < threadCount; i++) { HostAtomicType myValue = finalValues[workOffset + i]; if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) { HostDataType hisValue = refValues[workOffset + i]; if (myValue == hisValue) { // a draw - both threads should reach final value // 500000 if (myValue != 500000) { log_error("ERROR: Invalid reference value #%u (%d " "instead of 500000)\n", workOffset + i, myValue); correct = false; return true; } } else { // slower thread (in total order of seq_cst operations) // must know last value written by faster thread HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i]; HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i]; // who is the winner? - thread with lower private // counter value if (myValue == hisRealValue) // forbidden result - fence // doesn't work { log_error("ERROR: Atomic counter values #%u and " "#%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue); log_error( "ERROR: Both threads have outdated values read " "from another thread (%u and %u)\n", hisValue, myValueReadByHim); correct = false; return true; } if (myValue > hisRealValue) // I'm slower { if (hisRealValue != hisValue) { log_error("ERROR: Invalid reference value #%u " "(%d instead of %d)\n", workOffset + i, hisValue, hisRealValue); log_error( "ERROR: Slower thread #%u should know " "value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i); correct = false; return true; } } else // I'm faster { if (myValueReadByHim != myValue) { log_error("ERROR: Invalid reference value #%u " "(%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue); log_error( "ERROR: Slower thread #%u should know " "value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i); correct = false; return true; } } } } else { if (myValue != NumNonAtomicVariablesPerThread() - 1) { log_error("ERROR: Invalid atomic value #%u (%d instead " "of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread() - 1); log_error("ERROR: Thread #%u observed invalid values " "in other thread's variables\n", workOffset + i); correct = false; return true; } } } } return true; } private: size_t _subCaseId; struct TestDefinition _subCase; }; #if 0 // The tests below are likely incorrect and have been disabled. // See https://github.com/KhronosGroup/OpenCL-CTS/issues/2544 static int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFence test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFence test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFence test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFence test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fence) { return test_atomic_fence_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fence) { return test_atomic_fence_generic(device, context, queue, num_elements, true); } #endif