// // Copyright (c) 2024 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "harness/testHarness.h" #include "harness/kernelHelpers.h" #include "harness/typeWrappers.h" #include "harness/conversions.h" #include "common.h" #include "host_atomics.h" #include #include #include #include template class CBasicTestStore : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTest::CheckCapabilities; using CBasicTestMemOrderScope::LocalMemory; CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_ACQ_REL) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable if (CBasicTestMemOrderScope::DataType() ._type == TYPE_ATOMIC_HALF) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " atomic_store" + postfix + "(&destMemory[tid], tid" + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = static_cast(whichDestValue); return true; } }; static int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestStore test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestStore test_half(TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestStore test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestStore test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_store) { return test_atomic_store_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_store) { return test_atomic_store_generic(device, context, queue, num_elements, true); } template class CBasicTestInit : public CBasicTest { public: using CBasicTest::OldValueCheck; CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual std::string ProgramCore() { return " atomic_init(&destMemory[tid], tid);\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_init(&destMemory[tid], (HostDataType)tid); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = (HostDataType)whichDestValue; return true; } }; static int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestInit test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestInit test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestInit test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_init) { return test_atomic_init_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_init) { return test_atomic_init_generic(device, context, queue, num_elements, true); } template class CBasicTestLoad : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTest::CheckCapabilities; using CBasicTestMemOrderScope::LocalMemory; CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { OldValueCheck(false); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return threadCount; } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder() == MEMORY_ORDER_RELEASE || MemoryOrder() == MEMORY_ORDER_ACQ_REL) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable if (CBasicTestMemOrderScope::DataType() ._type == TYPE_ATOMIC_HALF) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store // should be MEMORY_ORDER_RELEASE std::string memoryOrderScopeLoad = MemoryOrderScopeStr(); std::string memoryOrderScopeStore = (MemoryOrder() == MEMORY_ORDER_ACQUIRE) ? (", memory_order_release" + MemoryScopeStr()) : memoryOrderScopeLoad; std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit"); return " atomic_store" + postfix + "(&destMemory[tid], tid" + memoryOrderScopeStore + ");\n" " oldValues[tid] = atomic_load" + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST); oldValues[tid] = host_atomic_load( &destMemory[tid], MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = static_cast(whichDestValue); return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; for (cl_uint i = 0; i < threadCount; i++) { if (refValues[i] != (HostDataType)i) { log_error("Invalid value for thread %u\n", (cl_uint)i); correct = false; return true; } } return true; } }; static int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestLoad test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ulong(TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestLoad test_half(TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestLoad test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestLoad test_intptr_t( TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_load) { return test_atomic_load_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_load) { return test_atomic_load_generic(device, context, queue, num_elements, true); } template class CBasicTestExchange : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::OldValueCheck; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::Iterations; using CBasicTestMemOrderScope::IterationsStr; using CBasicTestMemOrderScope::LocalMemory; CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(1234); } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return " oldValues[tid] = atomic_exchange" + postfix + "(&destMemory[0], tid" + memoryOrderScope + ");\n" " for(int i = 0; i < " + IterationsStr() + "; i++)\n" " oldValues[tid] = atomic_exchange" + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder()); for (int i = 0; i < Iterations(); i++) oldValues[tid] = host_atomic_exchange( &destMemory[0], oldValues[tid], MemoryOrder()); } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { OldValueCheck( Iterations() % 2 == 0); // check is valid for even number of iterations only correct = true; /* We are expecting values from 0 to size-1 and initial value from * atomic variable */ /* These values must be distributed across refValues array and atomic * variable finalVaue[0] */ /* Any repeated value is treated as an error */ std::vector tidFound(threadCount); bool startValueFound = false; cl_uint startVal = static_cast(StartValue()); for (cl_uint i = 0; i <= threadCount; i++) { cl_uint value = 0; if (i == threadCount) { value = static_cast( static_cast(finalValues[0])); } else { value = static_cast(refValues[i]); } if (value == startVal) { // Special initial value if (startValueFound) { log_error("ERROR: Starting reference value (%u) occurred " "more thane once\n", (cl_uint)StartValue()); correct = false; return true; } startValueFound = true; continue; } if (value >= threadCount) { log_error( "ERROR: Reference value %u outside of valid range! (%u)\n", i, value); correct = false; return true; } if (tidFound[value]) { log_error("ERROR: Value (%u) occurred more thane once\n", value); correct = false; return true; } tidFound[value] = true; } return true; } }; static int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestExchange test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestExchange test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_exchange) { return test_atomic_exchange_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_exchange) { return test_atomic_exchange_generic(device, context, queue, num_elements, true); } template class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope { public: using CBasicTestMemOrder2Scope::StartValue; using CBasicTestMemOrder2Scope::OldValueCheck; using CBasicTestMemOrder2Scope::MemoryOrder; using CBasicTestMemOrder2Scope::MemoryOrder2; using CBasicTestMemOrder2Scope::MemoryOrderScope; using CBasicTestMemOrder2Scope::MemoryScope; using CBasicTestMemOrder2Scope::DataType; using CBasicTestMemOrder2Scope::Iterations; using CBasicTestMemOrder2Scope::IterationsStr; using CBasicTest::CheckCapabilities; CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope(dataType, useSVM) { StartValue(123456); OldValueCheck(false); } virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) { if (MemoryOrder2() == MEMORY_ORDER_RELEASE || MemoryOrder2() == MEMORY_ORDER_ACQ_REL) return 0; // not allowed as 'failure' argument if ((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) || (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST)) return 0; // failure argument shall be no stronger than the success if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF) return 0; // skip test - not applicable return CBasicTestMemOrder2Scope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScope(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" ") + DataType().RegularTypeName() + " expected, previous;\n" " int successCount = 0;\n" " oldValues[tid] = tid;\n" " expected = tid; // force failure at the beginning\n" " if(atomic_compare_exchange_strong" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + ") || expected == tid)\n" " oldValues[tid] = threadCount+1; //mark unexpected success " "with invalid value\n" " else\n" " {\n" " for(int i = 0; i < " + IterationsStr() + " || successCount == 0; i++)\n" " {\n" " previous = expected;\n" " if(atomic_compare_exchange_strong" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + "))\n" " {\n" " oldValues[tid] = expected;\n" " successCount++;\n" " }\n" " else\n" " {\n" " if(previous == expected) // spurious failure - " "shouldn't occur for 'strong'\n" " {\n" " oldValues[tid] = threadCount; //mark fail with " "invalid value\n" " break;\n" " }\n" " }\n" " }\n" " }\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { HostDataType expected = (HostDataType)StartValue(), previous; oldValues[tid] = (HostDataType)tid; for (int i = 0; i < Iterations(); i++) { previous = expected; if (host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2())) oldValues[tid] = expected; else { if (previous == expected) // shouldn't occur for 'strong' { oldValues[tid] = threadCount; // mark fail with invalid // value } } } } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; /* We are expecting values from 0 to size-1 and initial value from * atomic variable */ /* These values must be distributed across refValues array and atomic * variable finalVaue[0] */ /* Any repeated value is treated as an error */ std::vector tidFound(threadCount); bool startValueFound = false; cl_uint i; for (i = 0; i <= threadCount; i++) { cl_uint value; if (i == threadCount) value = (cl_uint)finalValues[0]; // additional value from atomic // variable (last written) else value = (cl_uint)refValues[i]; if (value == (cl_uint)StartValue()) { // Special initial value if (startValueFound) { log_error("ERROR: Starting reference value (%u) occurred " "more thane once\n", (cl_uint)StartValue()); correct = false; return true; } startValueFound = true; continue; } if (value >= threadCount) { if (value == threadCount) log_error("ERROR: Spurious failure detected for " "atomic_compare_exchange_strong\n"); log_error( "ERROR: Reference value %u outside of valid range! (%u)\n", i, value); correct = false; return true; } if (tidFound[value]) { log_error("ERROR: Value (%u) occurred more thane once\n", value); correct = false; return true; } tidFound[value] = true; } return true; } }; static int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestCompareStrong test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_compare_exchange_strong) { return test_atomic_compare_exchange_strong_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_compare_exchange_strong) { return test_atomic_compare_exchange_strong_generic(device, context, queue, num_elements, true); } template class CBasicTestCompareWeak : public CBasicTestCompareStrong { public: using CBasicTestCompareStrong::StartValue; using CBasicTestCompareStrong::MemoryOrderScope; using CBasicTestCompareStrong::DataType; using CBasicTestCompareStrong::Iterations; using CBasicTestCompareStrong::IterationsStr; CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong(dataType, useSVM) {} virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScope(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" ") + DataType().RegularTypeName() + " expected , previous;\n" " int successCount = 0;\n" " oldValues[tid] = tid;\n" " expected = tid; // force failure at the beginning\n" " if(atomic_compare_exchange_weak" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + ") || expected == tid)\n" " oldValues[tid] = threadCount+1; //mark unexpected success " "with invalid value\n" " else\n" " {\n" " for(int i = 0; i < " + IterationsStr() + " || successCount == 0; i++)\n" " {\n" " previous = expected;\n" " if(atomic_compare_exchange_weak" + postfix + "(&destMemory[0], &expected, oldValues[tid]" + memoryOrderScope + "))\n" " {\n" " oldValues[tid] = expected;\n" " successCount++;\n" " }\n" " }\n" " }\n"; } }; static int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestCompareWeak test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_compare_exchange_weak) { return test_atomic_compare_exchange_weak_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_compare_exchange_weak) { return test_atomic_compare_exchange_weak_generic(device, context, queue, num_elements, true); } template double kahan_sum(const std::vector &nums) { return 0.0; } template <> double kahan_sum(const std::vector &nums) { double sum = 0.0; double compensation = 0.0; for (double num : nums) { double y = num - compensation; double t = sum + y; compensation = (t - sum) - y; sum = t; } return sum; } template class CBasicTestFetchAdd : public CBasicTestMemOrderScope { double min_range; double max_range; double max_error; std::vector ref_vals; public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM), min_range(-999.0), max_range(999.0), max_error(0.0) { if constexpr (is_host_fp_v) { StartValue((HostDataType)0.0); CBasicTestMemOrderScope::OldValueCheck(false); // Narrow down range for half to avoid overflow to infinity if constexpr (std::is_same_v) { min_range = -50.0; max_range = 50.0; } } } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr (is_host_fp_v) { if (threadCount > ref_vals.size()) { ref_vals.resize(threadCount); for (cl_uint i = 0; i < threadCount; i++) if constexpr (std::is_same_v) ref_vals[i] = get_random_double(min_range, max_range, d); else ref_vals[i] = get_random_float(min_range, max_range, d); memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * ref_vals.size()); // Estimate highest possible summation error for given set. std::vector sums; std::sort(ref_vals.begin(), ref_vals.end()); sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(), static_cast(0.f))); sums.push_back(std::accumulate(ref_vals.rbegin(), ref_vals.rend(), static_cast(0.f))); std::sort(ref_vals.begin(), ref_vals.end(), [](HostDataType a, HostDataType b) { return std::abs(a) < std::abs(b); }); double precise = 0.0; if constexpr (std::is_same_v) precise = kahan_sum(ref_vals); else for (auto elem : ref_vals) precise += double(elem); sums.push_back(precise); sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(), static_cast(0.f))); sums.push_back(std::accumulate(ref_vals.rbegin(), ref_vals.rend(), static_cast(0.f))); std::sort(sums.begin(), sums.end()); assert(std::all_of(sums.begin(), sums.end(), [](const HostDataType &val) { return std::isfinite( static_cast(val)); }) && "Infinite summation value detected!"); max_error = std::abs(sums.front() - sums.back()); log_info("Max allowed error for %u elements: %.10f\n", threadCount, max_error); // restore unsorted order memcpy(ref_vals.data(), startRefValues, sizeof(HostDataType) * ref_vals.size()); } else { memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * threadCount); } return true; } return false; } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); if constexpr (is_host_fp_v) { return " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + memoryOrderScope + ");\n" + " oldValues[tid] = atomic_fetch_add" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")0" + memoryOrderScope + ");\n"; } else { return " oldValues[tid] = atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" " atomic_fetch_add" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + ");\n" " atomic_fetch_add" + postfix + "(&destMemory[0], ((" + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope + ");\n"; } } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { if constexpr (is_host_fp_v) { host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); oldValues[tid] = host_atomic_fetch_add( &destMemory[tid], (HostDataType)0, MemoryOrder()); } else { oldValues[tid] = host_atomic_fetch_add( &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); host_atomic_fetch_add( &destMemory[0], (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), MemoryOrder()); } } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) expected += startRefValues[i]; } else { for (cl_uint i = 0; i < threadCount; i++) expected += ((HostDataType)i + 3) * 3 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) { if (whichDestValue == 0) return std::abs( static_cast(expected - static_cast( testValues[whichDestValue]))) > max_error; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { if constexpr (is_host_fp_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) { if (refValues[i] != StartValue()) { log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); correct = false; } } return !correct; } return CBasicTestMemOrderScope::VerifyRefs(correct, threadCount, refValues, finalValues); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr (is_host_fp_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; template class CBasicTestFetchAddSpecialFloats : public CBasicTestMemOrderScope { std::vector ref_vals; public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::LocalMemory; using CBasicTestMemOrderScope::DeclaredInProgram; CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { // StartValue is used as an index divisor in the following test // logic. It is set to the number of special values, which allows // threads to be mapped deterministically onto the input data array. // This enables repeated add operations arranged so that every // special value is added to every other one (“all-to-all”). if constexpr ( std::is_same_v< HostDataType, HOST_DOUBLE> || std::is_same_v) { auto spec_vals = GetSpecialValues(); StartValue(spec_vals.size()); CBasicTestMemOrderScope::OldValueCheck(false); } else if constexpr (std::is_same_v) { auto spec_vals = GetSpecialValues(); StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode)); CBasicTestMemOrderScope::OldValueCheck(false); } } static std::vector &GetSpecialValues() { static std::vector special_values; if constexpr ( std::is_same_v< HostDataType, HOST_DOUBLE> || std::is_same_v) { const HostDataType test_value_zero = static_cast(0.0f); const HostDataType test_value_minus_zero = static_cast(-0.0f); const HostDataType test_value_without_fraction = static_cast(2.0f); const HostDataType test_value_with_fraction = static_cast(2.2f); if (special_values.empty()) { special_values = { static_cast(test_value_minus_zero), static_cast(test_value_zero), static_cast(test_value_without_fraction), static_cast(test_value_with_fraction), std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN(), std::numeric_limits::signaling_NaN(), -std::numeric_limits::infinity(), -std::numeric_limits::quiet_NaN(), -std::numeric_limits::signaling_NaN(), std::numeric_limits::lowest(), std::numeric_limits::min(), std::numeric_limits::max(), }; if constexpr (std::is_same_v) { if (0 != (CL_FP_DENORM & gDoubleFPConfig)) { special_values.push_back( std::numeric_limits::denorm_min()); } } else if constexpr (std::is_same_v) { if (0 != (CL_FP_DENORM & gFloatFPConfig)) { special_values.push_back( std::numeric_limits::denorm_min()); } } } } else if constexpr (std::is_same_v) { if (special_values.empty()) { special_values = { 0xffff, 0x0000, 0x7c00, /*INFINITY*/ 0xfc00, /*-INFINITY*/ 0x8000, /*-0*/ 0x7bff, /*HALF_MAX*/ 0x0400, /*HALF_MIN*/ 0x3c00, /* 1 */ 0xbc00, /* -1 */ 0x3555, /*nearest value to 1/3*/ 0x3bff, /*largest number less than one*/ 0xc000, /* -2 */ 0xfbff, /* -HALF_MAX */ 0x8400, /* -HALF_MIN */ 0x4248, /* M_PI_H */ 0xc248, /* -M_PI_H */ 0xbbff, /* Largest negative fraction */ }; if (0 != (CL_FP_DENORM & gHalfFPConfig)) { special_values.push_back(0x0001 /* Smallest denormal */); special_values.push_back(0x03ff /* Largest denormal */); } } } return special_values; } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { if (threadCount > ref_vals.size()) { ref_vals.assign(threadCount, 0); auto spec_vals = GetSpecialValues(); cl_uint total_cnt = 0; while (total_cnt < threadCount) { cl_uint block_cnt = std::min((cl_int)(threadCount - total_cnt), (cl_int)spec_vals.size()); memcpy(&ref_vals.at(total_cnt), spec_vals.data(), sizeof(HostDataType) * block_cnt); total_cnt += block_cnt; } } memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * threadCount); return true; } return false; } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { // The start_value variable (set by StartValue) is used // as a divisor of the thread index when selecting the operand for // atomic_fetch_add. This groups threads into blocks corresponding // to the number of special values and implements an “all-to-all” // addition pattern. As a result, each destination element is // updated using different combinations of input values, enabling // consistent comparison between host and device execution. return std::string(DataType().AddSubOperandTypeName()) + " start_value = atomic_load_explicit(destMemory+tid, " "memory_order_relaxed, memory_scope_work_group);\n" " atomic_store_explicit(destMemory+tid, oldValues[tid], " "memory_order_relaxed, memory_scope_work_group);\n" " atomic_fetch_add" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid/(int)start_value]" + memoryOrderScope + ");\n"; } } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { auto spec_vals = GetSpecialValues(); host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid], MEMORY_ORDER_SEQ_CST); host_atomic_fetch_add( &destMemory[tid], (HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder()); } } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr ( std::is_same_v< HostDataType, HOST_DOUBLE> || std::is_same_v) { auto spec_vals = GetSpecialValues(); expected = startRefValues[whichDestValue] + startRefValues[whichDestValue / spec_vals.size()]; } else if constexpr (std::is_same_v) { auto spec_vals = GetSpecialValues(); expected = cl_half_from_float( cl_half_to_float(startRefValues[whichDestValue]) + cl_half_to_float( startRefValues[whichDestValue / spec_vals.size()]), gHalfRoundingMode); } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (std::is_same_v) { return static_cast(expected) != testValues[whichDestValue]; } else { if (std::isnan(testValues[whichDestValue]) && std::isnan(expected)) return false; else return expected != testValues[whichDestValue]; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; if (!CBasicTestMemOrderScope::LocalMemory() && CBasicTestMemOrderScope::DeclaredInProgram()) { if ((gDoubleFPConfig & CL_FP_INF_NAN) == 0) return 0; } } if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; if (!CBasicTestMemOrderScope::LocalMemory() && CBasicTestMemOrderScope::DeclaredInProgram()) { if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0; } } else if constexpr (std::is_same_v) { if (DeclaredInProgram()) return 0; // skip test - not applicable if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; if (!CBasicTestMemOrderScope::LocalMemory() && CBasicTestMemOrderScope::DeclaredInProgram()) { if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0; } } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr ( std::is_same_v< HostDataType, HOST_HALF> || std::is_same_v || std::is_same_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; static int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchAdd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestFetchAddSpecialFloats test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( error, test_spec_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAddSpecialFloats test_spec_float(TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_spec_float.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAddSpecialFloats test_spec_half(TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST( error, test_spec_half.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( error, test_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_float.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_add) { return test_atomic_fetch_add_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_add) { return test_atomic_fetch_add_generic(device, context, queue, num_elements, true); } template double kahan_sub(const std::vector &nums) { return 0.0; } template <> double kahan_sub(const std::vector &nums) { double sum = 0.0; double compensation = 0.0; for (double num : nums) { double y = -num - compensation; double t = sum + y; compensation = (t - sum) - y; sum = t; } return sum; } template class CBasicTestFetchSub : public CBasicTestMemOrderScope { double min_range; double max_range; double max_error; std::vector ref_vals; public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM), min_range(-999.0), max_range(999.0), max_error(0.0) { if constexpr (is_host_fp_v) { StartValue(0); CBasicTestMemOrderScope::OldValueCheck(false); // Narrow down range for half to avoid overflow to infinity if constexpr (std::is_same_v) { min_range = -50.0; max_range = 50.0; } } } template HostDataType subtract(Iterator begin, Iterator end) { HostDataType res = 0; for (auto it = begin; it != end; ++it) res = res - *it; return res; } bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) override { if constexpr (is_host_fp_v) { if (threadCount > ref_vals.size()) { ref_vals.resize(threadCount); for (cl_uint i = 0; i < threadCount; i++) ref_vals[i] = (HostDataType)get_random_double(min_range, max_range, d); memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * ref_vals.size()); // Estimate highest possible subtraction error for given set. std::vector sums; std::sort(ref_vals.begin(), ref_vals.end()); sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); std::sort(ref_vals.begin(), ref_vals.end(), [](HostDataType a, HostDataType b) { return std::abs(a) < std::abs(b); }); double precise = 0.0; if constexpr (std::is_same_v) precise = kahan_sub(ref_vals); else for (auto elem : ref_vals) precise += double(elem); sums.push_back(precise); sums.push_back(subtract(ref_vals.begin(), ref_vals.end())); sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend())); std::sort(sums.begin(), sums.end()); assert(std::all_of(sums.begin(), sums.end(), [](const HostDataType &val) { return std::isfinite( static_cast(val)); }) && "Infinite subtraction value detected!"); max_error = std::abs(sums.front() - sums.back()); log_info("Max allowed error for %u elements: %.10f\n", threadCount, max_error); // restore unsorted order memcpy(ref_vals.data(), startRefValues, sizeof(HostDataType) * ref_vals.size()); } else { memcpy(startRefValues, ref_vals.data(), sizeof(HostDataType) * threadCount); } return true; } return false; } std::string ProgramCore() override { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); if constexpr (is_host_fp_v) { return " atomic_fetch_sub" + postfix + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + ")oldValues[tid]" + memoryOrderScope + ");\n" + " oldValues[tid] = atomic_fetch_sub" + postfix + "(&destMemory[tid], (" + DataType().AddSubOperandTypeName() + ")0" + memoryOrderScope + ");\n"; } else { return " oldValues[tid] = atomic_fetch_sub" + postfix + "(&destMemory[0], tid + 3 +(((" + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope + ");\n"; } } void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) override { if constexpr (is_host_fp_v) { host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid], MemoryOrder()); oldValues[tid] = host_atomic_fetch_sub( &destMemory[tid], (HostDataType)0, MemoryOrder()); } else { oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3 + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), MemoryOrder()); } } bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) override { expected = StartValue(); if constexpr (is_host_fp_v) { if (whichDestValue == 0) for (cl_uint i = 0; i < threadCount; i++) expected -= startRefValues[i]; } else { for (cl_uint i = 0; i < threadCount; i++) expected -= (HostDataType)i + 3 + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); } return true; } bool IsTestNotAsExpected(const HostDataType &expected, const std::vector &testValues, const std::vector &startRefValues, cl_uint whichDestValue) override { if constexpr (is_host_fp_v) { if (whichDestValue == 0) return std::abs( static_cast(expected - static_cast( testValues[whichDestValue]))) > max_error; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::IsTestNotAsExpected(expected, testValues, startRefValues, whichDestValue); } bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) override { if (std::is_same_v) { correct = true; for (cl_uint i = 1; i < threadCount; i++) { if (refValues[i] != StartValue()) { log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); correct = false; } } return !correct; } return CBasicTestMemOrderScope::VerifyRefs(correct, threadCount, refValues, finalValues); } int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) override { if constexpr (std::is_same_v) { if (LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } else if constexpr (std::is_same_v) { if (LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0) return 0; // skip test - not applicable if (!LocalMemory() && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0) return 0; } return CBasicTestMemOrderScope< HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue); } cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override { if constexpr (is_host_fp_v) { return threadCount; } return CBasicTestMemOrderScope::NumResults(threadCount, deviceID); } }; static int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchSub test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (gFloatAtomicsSupported) { CBasicTestFetchSub test_double( TYPE_ATOMIC_DOUBLE, useSVM); EXECUTE_TEST( error, test_double.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_float( TYPE_ATOMIC_FLOAT, useSVM); EXECUTE_TEST( error, test_float.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_half( TYPE_ATOMIC_HALF, useSVM); EXECUTE_TEST(error, test_half.Execute(deviceID, context, queue, num_elements)); } if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_sub) { return test_atomic_fetch_sub_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_sub) { return test_atomic_fetch_sub_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchOr : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { cl_uint numBits = DataType().Size(deviceID) * 8; return (threadCount + numBits - 1) / numBits; } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" size_t numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int whichResult = tid / numBits;\n" " int bitIndex = tid - (whichResult * numBits);\n" "\n" " oldValues[tid] = atomic_fetch_or" + postfix + "(&destMemory[whichResult], ((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { size_t numBits = sizeof(HostDataType) * 8; size_t whichResult = tid / numBits; size_t bitIndex = tid - (whichResult * numBits); oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) / (sizeof(HostDataType) * 8); if (whichDestValue < numValues - 1) { expected = ~(HostDataType)0; return true; } // Last item doesn't get or'ed on every bit, so we have to mask away cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType) * 8); expected = StartValue(); for (cl_uint i = 0; i < numBits; i++) expected |= ((HostDataType)1 << i); return true; } }; static int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchOr test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_or) { return test_atomic_fetch_or_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_or) { return test_atomic_fetch_or_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchXor : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::DataType; CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue((HostDataType)0x2f08ab418ba0541LL); } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" int numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n" "\n" " oldValues[tid] = atomic_fetch_xor" + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { int numBits = sizeof(HostDataType) * 8; int bitIndex = (numBits - 1) * (tid + 1) / threadCount; oldValues[tid] = host_atomic_fetch_xor( &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { int numBits = sizeof(HostDataType) * 8; expected = StartValue(); for (cl_uint i = 0; i < threadCount; i++) { int bitIndex = (numBits - 1) * (i + 1) / threadCount; expected ^= ((HostDataType)1 << bitIndex); } return true; } }; static int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchXor test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_xor) { return test_atomic_fetch_xor_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_xor) { return test_atomic_fetch_xor_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchAnd : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(~(HostDataType)0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { cl_uint numBits = DataType().Size(deviceID) * 8; return (threadCount + numBits - 1) / numBits; } virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" size_t numBits = sizeof(") + DataType().RegularTypeName() + ") * 8;\n" " int whichResult = tid / numBits;\n" " int bitIndex = tid - (whichResult * numBits);\n" "\n" " oldValues[tid] = atomic_fetch_and" + postfix + "(&destMemory[whichResult], ~((" + DataType().RegularTypeName() + ")1 << bitIndex) " + memoryOrderScope + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { size_t numBits = sizeof(HostDataType) * 8; size_t whichResult = tid / numBits; size_t bitIndex = tid - (whichResult * numBits); oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder()); } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) / (sizeof(HostDataType) * 8); if (whichDestValue < numValues - 1) { expected = 0; return true; } // Last item doesn't get and'ed on every bit, so we have to mask away size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType) * 8); expected = StartValue(); for (size_t i = 0; i < numBits; i++) expected &= ~((HostDataType)1 << i); return true; } }; static int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchAnd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uint(TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_long(TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; } REGISTER_TEST(atomic_fetch_and) { return test_atomic_fetch_and_generic(device, context, queue, num_elements, false); } REGISTER_TEST(svm_atomic_fetch_and) { return test_atomic_fetch_and_generic(device, context, queue, num_elements, true); } template class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope { public: using CBasicTestMemOrderScope::StartValue; using CBasicTestMemOrderScope::DataType; using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryOrderScopeStr; using CBasicTestMemOrderScope::Iterations; using CBasicTestMemOrderScope::IterationsStr; CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { StartValue(0); } virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) { return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); } // each thread modifies (with OR and AND operations) and verifies // only one bit in atomic variable // other bits are modified by other threads but it must not affect current // thread operation virtual std::string ProgramCore() { std::string memoryOrderScope = MemoryOrderScopeStr(); std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); return std::string(" int bits = sizeof(") + DataType().RegularTypeName() + ")*8;\n" + " size_t valueInd = tid/bits;\n" " " + DataType().RegularTypeName() + " value, bitMask = (" + DataType().RegularTypeName() + ")1 << tid%bits;\n" " oldValues[tid] = 0;\n" " for(int i = 0; i < " + IterationsStr() + "; i++)\n" " {\n" " value = atomic_fetch_or" + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + ");\n" " if(value & bitMask) // bit should be set to 0\n" " oldValues[tid]++;\n" " value = atomic_fetch_and" + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope + ");\n" " if(!(value & bitMask)) // bit should be set to 1\n" " oldValues[tid]++;\n" " }\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) { int bits = sizeof(HostDataType) * 8; size_t valueInd = tid / bits; HostDataType value, bitMask = (HostDataType)1 << tid % bits; oldValues[tid] = 0; for (int i = 0; i < Iterations(); i++) { value = host_atomic_fetch_or(destMemory + valueInd, bitMask, MemoryOrder()); if (value & bitMask) // bit should be set to 0 oldValues[tid]++; value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask, MemoryOrder()); if (!(value & bitMask)) // bit should be set to 1 oldValues[tid]++; } } virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) { expected = 0; return true; } virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) { correct = true; for (cl_uint i = 0; i < threadCount; i++) { if (refValues[i] > 0) { log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); correct = false; } } return true; } }; static int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) { int error = 0; CBasicTestFetchOrAnd test_int(TYPE_ATOMIC_INT, useSVM); EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_uint( TYPE_ATOMIC_UINT, useSVM); EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_long( TYPE_ATOMIC_LONG, useSVM); EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_ulong( TYPE_ATOMIC_ULONG, useSVM); EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); EXECUTE_TEST( error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_size_t( TYPE_ATOMIC_SIZE_T, useSVM); EXECUTE_TEST( error, test_size_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); EXECUTE_TEST( error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } else { CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); EXECUTE_TEST( error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); CBasicTestFetchOrAnd