From 388944c01cbfc4272d11b3a9d520e2eed2d1288d Mon Sep 17 00:00:00 2001 From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com> Date: Tue, 6 Sep 2022 17:53:12 +0100 Subject: [PATCH] Minimum 2 non atomic variables per thread for the c11 atomic fence test for embedded profile devices. (#1452) * Minimum 2 Non atomic variables per thread for an embedded profile device - https://github.com/KhronosGroup/OpenCL-CTS/issues/1274 * Formatting --- test_conformance/c11_atomics/common.h | 5 +- test_conformance/c11_atomics/test_atomics.cpp | 5359 ++++++++++------- 2 files changed, 3186 insertions(+), 2178 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 5bb9e5b7..6c7d0b12 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -1361,9 +1361,8 @@ int CBasicTest::ExecuteSingleTest( error = clSetKernelArg(kernel, argInd++, LocalRefValues() ? typeSize - * ((CurrentGroupSize() - * NumNonAtomicVariablesPerThread()) - + 4) + * (CurrentGroupSize() + * NumNonAtomicVariablesPerThread()) : 1, NULL); test_error(error, "Unable to set indexed kernel argument"); diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 38b4e9a7..09c14ed1 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -23,2200 +23,3209 @@ #include #include -template -class CBasicTestStore : public CBasicTestMemOrderScope -{ +template +class CBasicTestStore + : public CBasicTestMemOrderScope { public: - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScope; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTest::CheckCapabilities; - CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(MemoryOrder() == MEMORY_ORDER_ACQUIRE || - MemoryOrder() == MEMORY_ORDER_ACQ_REL) - return 0; //skip test - not applicable - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - return CBasicTestMemOrderScope::ExecuteSingleTest(deviceID, context, queue); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " atomic_store"+postfix+"(&destMemory[tid], tid"+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = (HostDataType)whichDestValue; - return true; - } -}; - -int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestStore test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestStore test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestStore test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestStore test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_store_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_store_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestInit : public CBasicTest -{ -public: - using CBasicTest::OldValueCheck; - CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest(dataType, useSVM) - { - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - virtual std::string ProgramCore() - { - return - " atomic_init(&destMemory[tid], tid);\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - host_atomic_init(&destMemory[tid], (HostDataType)tid); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = (HostDataType)whichDestValue; - return true; - } -}; - -int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestInit test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestInit test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestInit test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestInit test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_init_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_init_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestLoad : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScope; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::MemoryScopeStr; - using CBasicTest::CheckCapabilities; - CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(MemoryOrder() == MEMORY_ORDER_RELEASE || - MemoryOrder() == MEMORY_ORDER_ACQ_REL) - return 0; //skip test - not applicable - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - return CBasicTestMemOrderScope::ExecuteSingleTest(deviceID, context, queue); - } - virtual std::string ProgramCore() - { - // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store - // should be MEMORY_ORDER_RELEASE - std::string memoryOrderScopeLoad = MemoryOrderScopeStr(); - std::string memoryOrderScopeStore = - (MemoryOrder() == MEMORY_ORDER_ACQUIRE) - ? (", memory_order_release" + MemoryScopeStr()) - : memoryOrderScopeLoad; - std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit"); - return " atomic_store" + postfix + "(&destMemory[tid], tid" - + memoryOrderScopeStore - + ");\n" - " oldValues[tid] = atomic_load" - + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST); - oldValues[tid] = host_atomic_load(&destMemory[tid], MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = (HostDataType)whichDestValue; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - for(cl_uint i = 0; i < threadCount; i++ ) + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScope; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTest::CheckCapabilities; + CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) { - if(refValues[i] != (HostDataType)i) - { - log_error("Invalid value for thread %u\n", (cl_uint)i); - correct = false; + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (MemoryOrder() == MEMORY_ORDER_ACQUIRE + || MemoryOrder() == MEMORY_ORDER_ACQ_REL) + return 0; // skip test - not applicable + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " atomic_store" + postfix + "(&destMemory[tid], tid" + + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = (HostDataType)whichDestValue; return true; - } } - return true; - } }; -int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_store_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestLoad test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestLoad test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestLoad test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestLoad test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_load_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_load_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestExchange : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::Iterations; - using CBasicTestMemOrderScope::IterationsStr; - CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(123456); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], tid"+memoryOrderScope+");\n" - " for(int i = 0; i < "+IterationsStr()+"; i++)\n" - " oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], oldValues[tid]"+memoryOrderScope+");\n"; - } - - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder()); - for(int i = 0; i < Iterations(); i++) - oldValues[tid] = host_atomic_exchange(&destMemory[0], oldValues[tid], MemoryOrder()); - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - OldValueCheck(Iterations()%2 == 0); //check is valid for even number of iterations only - correct = true; - /* We are expecting values from 0 to size-1 and initial value from atomic variable */ - /* These values must be distributed across refValues array and atomic variable finalVaue[0] */ - /* Any repeated value is treated as an error */ - std::vector tidFound(threadCount); - bool startValueFound = false; - cl_uint i; - - for(i = 0; i <= threadCount; i++) - { - cl_uint value; - if(i == threadCount) - value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written) - else - value = (cl_uint)refValues[i]; - if(value == (cl_uint)StartValue()) - { - // Special initial value - if(startValueFound) - { - log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue()); - correct = false; - return true; - } - startValueFound = true; - continue; - } - if(value >= threadCount) - { - log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value); - correct = false; - return true; - } - if(tidFound[value]) - { - log_error("ERROR: Value (%u) occurred more thane once\n", value); - correct = false; - return true; - } - tidFound[value] = true; - } - return true; - } -}; - -int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestExchange test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_float(TYPE_ATOMIC_FLOAT, useSVM); - EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_double(TYPE_ATOMIC_DOUBLE, useSVM); - EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestExchange test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestExchange test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope -{ -public: - using CBasicTestMemOrder2Scope::StartValue; - using CBasicTestMemOrder2Scope::OldValueCheck; - using CBasicTestMemOrder2Scope::MemoryOrder; - using CBasicTestMemOrder2Scope::MemoryOrder2; - using CBasicTestMemOrder2Scope::MemoryOrderScope; - using CBasicTestMemOrder2Scope::MemoryScope; - using CBasicTestMemOrder2Scope::DataType; - using CBasicTestMemOrder2Scope::Iterations; - using CBasicTestMemOrder2Scope::IterationsStr; - using CBasicTest::CheckCapabilities; - CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope(dataType, useSVM) - { - StartValue(123456); - OldValueCheck(false); - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(MemoryOrder2() == MEMORY_ORDER_RELEASE || - MemoryOrder2() == MEMORY_ORDER_ACQ_REL) - return 0; // not allowed as 'failure' argument - if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) || - (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST)) - return 0; // failure argument shall be no stronger than the success - - if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF) - return 0; // skip test - not applicable - - return CBasicTestMemOrder2Scope::ExecuteSingleTest(deviceID, context, queue); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScope(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" ")+DataType().RegularTypeName()+" expected, previous;\n" - " int successCount = 0;\n" - " oldValues[tid] = tid;\n" - " expected = tid; // force failure at the beginning\n" - " if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n" - " oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n" - " else\n" - " {\n" - " for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n" - " {\n" - " previous = expected;\n" - " if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n" - " {\n" - " oldValues[tid] = expected;\n" - " successCount++;\n" - " }\n" - " else\n" - " {\n" - " if(previous == expected) // spurious failure - shouldn't occur for 'strong'\n" - " {\n" - " oldValues[tid] = threadCount; //mark fail with invalid value\n" - " break;\n" - " }\n" - " }\n" - " }\n" - " }\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - HostDataType expected = (HostDataType)StartValue(), previous; - oldValues[tid] = (HostDataType)tid; - for(int i = 0; i < Iterations(); i++) - { - previous = expected; - if(host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2())) - oldValues[tid] = expected; - else - { - if(previous == expected) // shouldn't occur for 'strong' - { - oldValues[tid] = threadCount; //mark fail with invalid value - } - } - } - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - /* We are expecting values from 0 to size-1 and initial value from atomic variable */ - /* These values must be distributed across refValues array and atomic variable finalVaue[0] */ - /* Any repeated value is treated as an error */ - std::vector tidFound(threadCount); - bool startValueFound = false; - cl_uint i; - - for(i = 0; i <= threadCount; i++) - { - cl_uint value; - if(i == threadCount) - value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written) - else - value = (cl_uint)refValues[i]; - if(value == (cl_uint)StartValue()) - { - // Special initial value - if(startValueFound) - { - log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue()); - correct = false; - return true; - } - startValueFound = true; - continue; - } - if(value >= threadCount) - { - if(value == threadCount) - log_error("ERROR: Spurious failure detected for atomic_compare_exchange_strong\n"); - log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value); - correct = false; - return true; - } - if(tidFound[value]) - { - log_error("ERROR: Value (%u) occurred more thane once\n", value); - correct = false; - return true; - } - tidFound[value] = true; - } - return true; - } -}; - -int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestCompareStrong test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestCompareStrong test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareStrong test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestCompareWeak : public CBasicTestCompareStrong -{ -public: - using CBasicTestCompareStrong::StartValue; - using CBasicTestCompareStrong::MemoryOrderScope; - using CBasicTestCompareStrong::DataType; - using CBasicTestCompareStrong::Iterations; - using CBasicTestCompareStrong::IterationsStr; - CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong(dataType, useSVM) - { - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScope(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" ")+DataType().RegularTypeName()+" expected , previous;\n" - " int successCount = 0;\n" - " oldValues[tid] = tid;\n" - " expected = tid; // force failure at the beginning\n" - " if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n" - " oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n" - " else\n" - " {\n" - " for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n" - " {\n" - " previous = expected;\n" - " if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n" - " {\n" - " oldValues[tid] = expected;\n" - " successCount++;\n" - " }\n" - " }\n" - " }\n"; - } -}; - -int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestCompareWeak test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestCompareWeak test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestCompareWeak test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchAdd : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"+ - " atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n" - " atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n" - " atomic_fetch_add"+postfix+"(&destMemory[0], (("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8"+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder()); - host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8, MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - expected += ((HostDataType)i+3)*3+(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8); - return true; - } -}; - -int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchAdd test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchAdd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAdd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchSub : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_fetch_sub"+postfix+"(&destMemory[0], tid + 3 +((("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8)"+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3+(((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8), MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - expected -= (HostDataType)i + 3 +(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8); - return true; - } -}; - -int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchSub test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchSub test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchSub test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchOr : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - cl_uint numBits = DataType().Size(deviceID) * 8; - - return (threadCount + numBits - 1) / numBits; - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n" - " int whichResult = tid / numBits;\n" - " int bitIndex = tid - (whichResult * numBits);\n" - "\n" - " oldValues[tid] = atomic_fetch_or"+postfix+"(&destMemory[whichResult], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - size_t numBits = sizeof(HostDataType) * 8; - size_t whichResult = tid / numBits; - size_t bitIndex = tid - (whichResult * numBits); - - oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8); - if(whichDestValue < numValues - 1) - { - expected = ~(HostDataType)0; - return true; - } - // Last item doesn't get or'ed on every bit, so we have to mask away - cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8); - expected = StartValue(); - for(cl_uint i = 0; i < numBits; i++) - expected |= ((HostDataType)1 << i); - return true; - } -}; - -int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchOr test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchOr test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOr test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchXor : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::DataType; - CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue((HostDataType)0x2f08ab418ba0541LL); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" int numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n" - " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n" - "\n" - " oldValues[tid] = atomic_fetch_xor"+postfix+"(&destMemory[0], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - int numBits = sizeof(HostDataType) * 8; - int bitIndex = (numBits-1)*(tid+1)/threadCount; - - oldValues[tid] = host_atomic_fetch_xor(&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - int numBits = sizeof(HostDataType)*8; - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - { - int bitIndex = (numBits-1)*(i+1)/threadCount; - expected ^= ((HostDataType)1 << bitIndex); - } - return true; - } -}; - -int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchXor test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchXor test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchAnd : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(~(HostDataType)0); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - cl_uint numBits = DataType().Size(deviceID) * 8; - - return (threadCount + numBits - 1) / numBits; - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n" - " int whichResult = tid / numBits;\n" - " int bitIndex = tid - (whichResult * numBits);\n" - "\n" - " oldValues[tid] = atomic_fetch_and"+postfix+"(&destMemory[whichResult], ~(("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - size_t numBits = sizeof(HostDataType) * 8; - size_t whichResult = tid / numBits; - size_t bitIndex = tid - (whichResult * numBits); - - oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder()); - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8); - if(whichDestValue < numValues - 1) - { - expected = 0; - return true; - } - // Last item doesn't get and'ed on every bit, so we have to mask away - size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8); - expected = StartValue(); - for(size_t i = 0; i < numBits; i++) - expected &= ~((HostDataType)1 << i); - return true; - } -}; - -int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchAnd test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::Iterations; - using CBasicTestMemOrderScope::IterationsStr; - CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return 1+(threadCount-1)/(DataType().Size(deviceID)*8); - } - // each thread modifies (with OR and AND operations) and verifies - // only one bit in atomic variable - // other bits are modified by other threads but it must not affect current thread operation - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+ - " size_t valueInd = tid/bits;\n" - " "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n" - " oldValues[tid] = 0;\n" - " for(int i = 0; i < "+IterationsStr()+"; i++)\n" - " {\n" - " value = atomic_fetch_or"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n" - " if(value & bitMask) // bit should be set to 0\n" - " oldValues[tid]++;\n" - " value = atomic_fetch_and"+postfix+"(destMemory+valueInd, ~bitMask"+memoryOrderScope+");\n" - " if(!(value & bitMask)) // bit should be set to 1\n" - " oldValues[tid]++;\n" - " }\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - int bits = sizeof(HostDataType)*8; - size_t valueInd = tid/bits; - HostDataType value, bitMask = (HostDataType)1 << tid%bits; - oldValues[tid] = 0; - for(int i = 0; i < Iterations(); i++) - { - value = host_atomic_fetch_or(destMemory+valueInd, bitMask, MemoryOrder()); - if(value & bitMask) // bit should be set to 0 - oldValues[tid]++; - value = host_atomic_fetch_and(destMemory+valueInd, ~bitMask, MemoryOrder()); - if(!(value & bitMask)) // bit should be set to 1 - oldValues[tid]++; - } - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = 0; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - for(cl_uint i = 0; i < threadCount; i++) - { - if(refValues[i] > 0) - { - log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]); - correct = false; - } - } - return true; - } -}; - -int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchOrAnd test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchOrAnd test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchOrAnd test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchXor2 : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::Iterations; - using CBasicTestMemOrderScope::IterationsStr; - CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return 1+(threadCount-1)/(DataType().Size(deviceID)*8); - } - // each thread modifies (with XOR operation) and verifies - // only one bit in atomic variable - // other bits are modified by other threads but it must not affect current thread operation - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - std::string(" int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+ - " size_t valueInd = tid/bits;\n" - " "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n" - " oldValues[tid] = 0;\n" - " for(int i = 0; i < "+IterationsStr()+"; i++)\n" - " {\n" - " value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n" - " if(value & bitMask) // bit should be set to 0\n" - " oldValues[tid]++;\n" - " value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n" - " if(!(value & bitMask)) // bit should be set to 1\n" - " oldValues[tid]++;\n" - " }\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - int bits = sizeof(HostDataType)*8; - size_t valueInd = tid/bits; - HostDataType value, bitMask = (HostDataType)1 << tid%bits; - oldValues[tid] = 0; - for(int i = 0; i < Iterations(); i++) - { - value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder()); - if(value & bitMask) // bit should be set to 0 - oldValues[tid]++; - value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder()); - if(!(value & bitMask)) // bit should be set to 1 - oldValues[tid]++; - } - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = 0; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - for(cl_uint i = 0; i < threadCount; i++) - { - if(refValues[i] > 0) - { - log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]); - correct = false; - } - } - return true; - } -}; - -int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchXor2 test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchXor2 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchXor2 test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchXor2 test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchMin : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(DataType().MaxValue()); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_fetch_min"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - for(cl_uint i = 0; i < threadCount; i++) - { - startRefValues[i] = genrand_int32(d); - if(sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; - } - return true; - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - { - if(startRefValues[ i ] < expected) - expected = startRefValues[ i ]; - } - return true; - } -}; - -int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchMin test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchMin test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchMin test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMin test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFetchMax : public CBasicTestMemOrderScope -{ -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(DataType().MinValue()); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " oldValues[tid] = atomic_fetch_max"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n"; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder()); - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - for(cl_uint i = 0; i < threadCount; i++) - { - startRefValues[i] = genrand_int32(d); - if(sizeof(HostDataType) >= 8) - startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; - } - return true; - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - for(cl_uint i = 0; i < threadCount; i++) - { - if(startRefValues[ i ] > expected) - expected = startRefValues[ i ]; - } - return true; - } -}; - -int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFetchMax test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFetchMax test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFetchMax test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFetchMax test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; -} - -int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFlag : public CBasicTestMemOrderScope -{ - static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000; -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScopeStr; - using CBasicTestMemOrderScope::MemoryOrderScopeStr; - using CBasicTestMemOrderScope::UseSVM; - using CBasicTestMemOrderScope::LocalMemory; - CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - TExplicitMemoryOrderType MemoryOrderForClear() - { - // Memory ordering for atomic_flag_clear function - // ("shall not be memory_order_acquire nor memory_order_acq_rel") - if(MemoryOrder() == MEMORY_ORDER_ACQUIRE) - return MEMORY_ORDER_RELAXED; - if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) - return MEMORY_ORDER_RELEASE; - return MemoryOrder(); - } - std::string MemoryOrderScopeStrForClear() - { - std::string orderStr; - if (MemoryOrder() != MEMORY_ORDER_EMPTY) - orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear()); - return orderStr + MemoryScopeStr(); - } - - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, - cl_command_queue queue) - { - // This test assumes support for the memory_scope_device scope in the case - // that LocalMemory() == false. Therefore we should skip this test in that - // configuration on a 3.0 driver since supporting the memory_scope_device - // scope is optionaly. - if (get_device_cl_version(deviceID) >= Version{ 3, 0 }) - { - if (!LocalMemory() - && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE)) - { - log_info( - "Skipping atomic_flag test due to use of atomic_scope_device " - "which is optionally not supported on this device\n"); - return 0; // skip test - not applicable - } - } - return CBasicTestMemOrderScope::ExecuteSingleTest(deviceID, - context, - queue); - } - virtual std::string ProgramCore() - { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - std::string program = - " uint cnt, stop = 0;\n" - " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n" - " {\n" - " bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n"; - if (MemoryOrder() == MEMORY_ORDER_RELAXED - || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory()) - program += " atomic_work_item_fence(" - + std::string(LocalMemory() - ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " - : "CLK_GLOBAL_MEM_FENCE, ") - + "memory_order_acquire," - + std::string(LocalMemory() - ? "memory_scope_work_group" - : (UseSVM() ? "memory_scope_all_svm_devices" - : "memory_scope_device")) - + ");\n"; - - program += - " if (!set)\n" - " {\n"; - - if (LocalMemory()) - program += " uint csIndex = get_enqueued_local_size(0)*get_group_id(0)+cnt;\n"; - else - program += " uint csIndex = cnt;\n"; - - std::ostringstream csNotVisited; - csNotVisited << CRITICAL_SECTION_NOT_VISITED; - program += - " // verify that thread is the first visitor\n" - " if(oldValues[csIndex] == "+csNotVisited.str()+")\n" - " {\n" - " oldValues[csIndex] = tid; // set the winner id for this critical section\n" - " stop = 1;\n" - " }\n"; - - if (MemoryOrder() == MEMORY_ORDER_ACQUIRE - || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory()) - program += " atomic_work_item_fence(" - + std::string(LocalMemory() - ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " - : "CLK_GLOBAL_MEM_FENCE, ") - + "memory_order_release," - + std::string(LocalMemory() - ? "memory_scope_work_group" - : (UseSVM() ? "memory_scope_all_svm_devices" - : "memory_scope_device")) - + ");\n"; - - program += - " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n" - " }\n" - " }\n"; - return program; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - cl_uint cnt, stop = 0; - for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n" - { - if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder())) - { - cl_uint csIndex = cnt; - // verify that thread is the first visitor\n" - if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED) - { - oldValues[csIndex] = tid; // set the winner id for this critical section\n" - stop = 1; - } - host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear()); - } - } - } - virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue) - { - expected = StartValue(); - return true; - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - for(cl_uint i = 0 ; i < threadCount; i++) - startRefValues[i] = CRITICAL_SECTION_NOT_VISITED; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - /* We are expecting unique values from 0 to threadCount-1 (each critical section must be visited) */ - /* These values must be distributed across refValues array */ - std::vector tidFound(threadCount); - cl_uint i; - - for (i = 0; i < threadCount; i++) - { - cl_uint value = (cl_uint)refValues[i]; - if (value == CRITICAL_SECTION_NOT_VISITED) - { - // Special initial value - log_error("ERROR: Critical section %u not visited\n", i); - correct = false; - return true; - } - if (value >= threadCount) - { - log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value); - correct = false; - return true; - } - if (tidFound[value]) - { - log_error("ERROR: Value (%u) occurred more thane once\n", value); - correct = false; - return true; - } - tidFound[value] = true; - } - return true; - } -}; - -int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) -{ - int error = 0; - CBasicTestFlag test_flag(TYPE_ATOMIC_FLAG, useSVM); - EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements)); - return error; -} - -int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_flag_generic(deviceID, context, queue, num_elements, false); -} - -int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_atomic_flag_generic(deviceID, context, queue, num_elements, true); -} - -template -class CBasicTestFence : public CBasicTestMemOrderScope -{ - struct TestDefinition { - bool op1IsFence; - TExplicitMemoryOrderType op1MemOrder; - bool op2IsFence; - TExplicitMemoryOrderType op2MemOrder; - }; -public: - using CBasicTestMemOrderScope::StartValue; - using CBasicTestMemOrderScope::OldValueCheck; - using CBasicTestMemOrderScope::MemoryOrder; - using CBasicTestMemOrderScope::MemoryScope; - using CBasicTestMemOrderScope::MemoryScopeStr; - using CBasicTestMemOrderScope::DeclaredInProgram; - using CBasicTestMemOrderScope::UsedInFunction; - using CBasicTestMemOrderScope::DataType; - using CBasicTestMemOrderScope::CurrentGroupSize; - using CBasicTestMemOrderScope::UseSVM; - using CBasicTestMemOrderScope::LocalMemory; - using CBasicTestMemOrderScope::LocalRefValues; - CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) - { - StartValue(0); - OldValueCheck(false); - } - virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) - { - return threadCount; - } - virtual cl_uint NumNonAtomicVariablesPerThread() - { - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - return 1; - if (LocalMemory()) - { - if (gIsEmbedded) - { - if (CurrentGroupSize() > 1024) - CurrentGroupSize(1024); - return 1; //1KB of local memory required by spec. Clamp group size to 1k and allow 1 variable per thread - } - else - return 32 * 1024 / 8 / CurrentGroupSize() - 1; //32KB of local memory required by spec - } - return 256; - } - virtual std::string SingleTestName() - { - std::string testName; - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - testName += "seq_cst fence, "; - else - testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)).substr(sizeof("memory_order")) - + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with " - + std::string(get_memory_order_type_name(_subCase.op2MemOrder)).substr(sizeof("memory_order")) - + (_subCase.op2IsFence ? " fence" : " atomic") + ", "; - testName += CBasicTest::SingleTestName(); - testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory")); - return testName; - } - virtual bool SVMDataBufferAllSVMConsistent() - { - // Although memory_scope_all_devices doesn't mention SVM it is just an - // alias for memory_scope_all_svm_devices. So both scopes interact with - // SVM allocations, on devices that support those, just the same. - return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES - || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES; - } - virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { int error = 0; - // execute 3 (maximum) sub cases for each memory order - for (_subCaseId = 0; _subCaseId < 3; _subCaseId++) + CBasicTestStore test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_ulong(TYPE_ATOMIC_ULONG, + useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_float(TYPE_ATOMIC_FLOAT, + useSVM); + EXECUTE_TEST(error, + test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_double.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) { - EXECUTE_TEST(error, (CBasicTestMemOrderScope::ExecuteForEachParameterSet(deviceID, context, queue))); + CBasicTestStore test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestStore test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestStore + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); } return error; - } - virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue) - { - if(DeclaredInProgram() || UsedInFunction()) - return 0; //skip test - not applicable - no overloaded fence functions for different address spaces - if(MemoryOrder() == MEMORY_ORDER_EMPTY || - MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since opencl20-openclc-rev15 - return 0; //skip test - not applicable - if((UseSVM() || gHost) - && LocalMemory()) - return 0; // skip test - not applicable for SVM and local memory - struct TestDefinition acqTests[] = { - // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder} - { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, - { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, - { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE } - }; - struct TestDefinition relTests[] = { - { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE }, - { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL } - }; - struct TestDefinition arTests[] = { - { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }, - { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE }, - { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL } - }; - switch (MemoryOrder()) - { - case MEMORY_ORDER_ACQUIRE: - if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition)) - return 0; - _subCase = acqTests[_subCaseId]; - break; - case MEMORY_ORDER_RELEASE: - if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition)) - return 0; - _subCase = relTests[_subCaseId]; - break; - case MEMORY_ORDER_ACQ_REL: - if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition)) - return 0; - _subCase = arTests[_subCaseId]; - break; - case MEMORY_ORDER_SEQ_CST: - if (_subCaseId != 0) // one special case only - return 0; - break; - default: - return 0; - } - LocalRefValues(LocalMemory()); - return CBasicTestMemOrderScope::ExecuteSingleTest(deviceID, context, queue); - } - virtual std::string ProgramHeader(cl_uint maxNumDestItems) - { - std::string header; - if(gOldAPI) - { - if(MemoryScope() == MEMORY_SCOPE_EMPTY) - { - header += "#define atomic_work_item_fence(x,y) mem_fence(x)\n"; - } - else - { - header += "#define atomic_work_item_fence(x,y,z) mem_fence(x)\n"; - } - } - return header+CBasicTestMemOrderScope::ProgramHeader(maxNumDestItems); - } - virtual std::string ProgramCore() - { - std::ostringstream naValues; - naValues << NumNonAtomicVariablesPerThread(); - std::string program, fenceType, nonAtomic; - if (LocalMemory()) - { - program = " size_t myId = get_local_id(0), hisId = get_local_size(0)-1-myId;\n"; - fenceType = "CLK_LOCAL_MEM_FENCE"; - nonAtomic = "localValues"; - } - else - { - program = " size_t myId = tid, hisId = threadCount-1-tid;\n"; - fenceType = "CLK_GLOBAL_MEM_FENCE"; - nonAtomic = "oldValues"; - } - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - { - // All threads are divided into pairs. - // Each thread has its own atomic variable and performs the following actions: - // - increments its own variable - // - performs fence operation to propagate its value and to see value from other thread - // - reads value from other thread's variable - // - repeats the above steps when both values are the same (and less than 1000000) - // - stores the last value read from other thread (in additional variable) - // At the end of execution at least one thread should know the last value from other thread - program += std::string("") + - " " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n" - " do {\n" - " myValue++;\n" - " atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n" - " atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n" - " hisValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n" - " } while(myValue == hisValue && myValue < 1000000);\n" - " " + nonAtomic + "[myId] = hisValue; \n"; - } - else - { - // Each thread modifies one of its non-atomic variables, increments value of its atomic variable - // and reads values from another thread in typical synchronizes-with scenario with: - // - non-atomic variable (at index A) modification (value change from 0 to A) - // - release operation (additional fence or within atomic) + atomic variable modification (value A) - // - atomic variable read (value B) + acquire operation (additional fence or within atomic) - // - non-atomic variable (at index B) read (value C) - // Each thread verifies dependency between atomic and non-atomic value read from another thread - // The following condition must be true: B == C - program += std::string("") + - " " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n" - " do {\n" - " myValue++;\n" - " " + nonAtomic + "[myId*" + naValues.str() +"+myValue] = myValue;\n"; - if (_subCase.op1IsFence) - program += std::string("") + - " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n" - " atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n"; - else - program += std::string("") + - " atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n"; - if (_subCase.op2IsFence) - program += std::string("") + - " hisAtomicValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n" - " atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n"; - else - program += std::string("") + - " hisAtomicValue = atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n"; - program += - " hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n"; - if (LocalMemory()) - program += " hisId = (hisId+1)%get_local_size(0);\n"; - else - program += " hisId = (hisId+1)%threadCount;\n"; - program += - " } while(hisAtomicValue == hisValue && myValue < "+naValues.str()+"-1);\n" - " if(hisAtomicValue != hisValue)\n" - " { // fail\n" - " atomic_store(&destMemory[myId], myValue-1);\n"; - if (LocalMemory()) - program += " hisId = (hisId+get_local_size(0)-1)%get_local_size(0);\n"; - else - program += " hisId = (hisId+threadCount-1)%threadCount;\n"; - program += - " if(myValue+1 < " + naValues.str() + ")\n" - " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n" - " if(myValue+2 < " + naValues.str() + ")\n" - " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n" - " if(myValue+3 < " + naValues.str() + ")\n" - " " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n"; - if (gDebug) - { - program += - " printf(\"WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\\n\", tid, hisAtomicValue, hisId, hisValue);\n"; - } - program += - " }\n"; - } - return program; - } - virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) - { - size_t myId = tid, hisId = threadCount - 1 - tid; - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - { - HostDataType myValue = 0, hisValue; - // CPU thread typically starts faster - wait for GPU thread - myValue++; - host_atomic_store(&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST); - while (host_atomic_load(&destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0); - do { - myValue++; - host_atomic_store(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED); - host_atomic_thread_fence(MemoryOrder()); - hisValue = host_atomic_load(&destMemory[hisId], MEMORY_ORDER_RELAXED); - } while (myValue == hisValue && hisValue < 1000000); - oldValues[tid] = hisValue; - } - else - { - HostDataType myValue = 0, hisAtomicValue, hisValue; - do { - myValue++; - oldValues[myId*NumNonAtomicVariablesPerThread()+myValue] = myValue; - if (_subCase.op1IsFence) - { - host_atomic_thread_fence(_subCase.op1MemOrder); - host_atomic_store(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED); - } - else - host_atomic_store(&destMemory[myId], myValue, _subCase.op1MemOrder); - if (_subCase.op2IsFence) - { - hisAtomicValue = host_atomic_load(&destMemory[hisId], MEMORY_ORDER_RELAXED); - host_atomic_thread_fence(_subCase.op2MemOrder); - } - else - hisAtomicValue = host_atomic_load(&destMemory[hisId], _subCase.op2MemOrder); - hisValue = oldValues[hisId*NumNonAtomicVariablesPerThread() + hisAtomicValue]; - hisId = (hisId + 1) % threadCount; - } while(hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread()-1); - if(hisAtomicValue != hisValue) - { // fail - host_atomic_store(&destMemory[myId], myValue-1, MEMORY_ORDER_SEQ_CST); - if (gDebug) - { - hisId = (hisId + threadCount - 1) % threadCount; - printf("WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue); - } - } - } - } - virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d) - { - for(cl_uint i = 0 ; i < threadCount*NumNonAtomicVariablesPerThread(); i++) - startRefValues[i] = 0; - return true; - } - virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues) - { - correct = true; - cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount; - for(cl_uint workOffset = 0; workOffset < threadCount; workOffset+= workSize) - { - if(workOffset+workSize > threadCount) - // last workgroup (host threads) - workSize = threadCount-workOffset; - for(cl_uint i = 0 ; i < workSize && workOffset+i < threadCount; i++) - { - HostAtomicType myValue = finalValues[workOffset + i]; - if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) - { - HostDataType hisValue = refValues[workOffset + i]; - if (myValue == hisValue) - { - // a draw - both threads should reach final value 1000000 - if (myValue != 1000000) - { - log_error("ERROR: Invalid reference value #%u (%d instead of 1000000)\n", workOffset + i, myValue); - correct = false; - return true; - } - } - else - { - //slower thread (in total order of seq_cst operations) must know last value written by faster thread - HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i]; - HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i]; +} - // who is the winner? - thread with lower private counter value - if (myValue == hisRealValue) // forbidden result - fence doesn't work - { - log_error("ERROR: Atomic counter values #%u and #%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue); - log_error("ERROR: Both threads have outdated values read from another thread (%u and %u)\n", hisValue, myValueReadByHim); - correct = false; - return true; - } - if (myValue > hisRealValue) // I'm slower - { - if (hisRealValue != hisValue) - { - log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + i, hisValue, hisRealValue); - log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i); - correct = false; - return true; - } - } - else // I'm faster - { - if (myValueReadByHim != myValue) - { - log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue); - log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i); - correct = false; - return true; - } - } - } - } - else - { - if (myValue != NumNonAtomicVariablesPerThread()-1) - { - log_error("ERROR: Invalid atomic value #%u (%d instead of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread()-1); - log_error("ERROR: Thread #%u observed invalid values in other thread's variables\n", workOffset + i, myValue); - correct = false; - return true; - } - } - } +int test_atomic_store(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_store_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_store(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_store_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestInit : public CBasicTest { +public: + using CBasicTest::OldValueCheck; + CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) + : CBasicTest(dataType, useSVM) + { + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + virtual std::string ProgramCore() + { + return " atomic_init(&destMemory[tid], tid);\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + host_atomic_init(&destMemory[tid], (HostDataType)tid); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = (HostDataType)whichDestValue; + return true; } - return true; - } -private: - int _subCaseId; - struct TestDefinition _subCase; }; -int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM) +int test_atomic_init_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) { - int error = 0; - CBasicTestFence test_int(TYPE_ATOMIC_INT, useSVM); - EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_uint(TYPE_ATOMIC_UINT, useSVM); - EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_long(TYPE_ATOMIC_LONG, useSVM); - EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_ulong(TYPE_ATOMIC_ULONG, useSVM); - EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements)); - if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) - { - CBasicTestFence test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - else - { - CBasicTestFence test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); - EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); - EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); - EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements)); - CBasicTestFence test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); - EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); - } - return error; + int error = 0; + CBasicTestInit test_int(TYPE_ATOMIC_INT, useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_ulong(TYPE_ATOMIC_ULONG, + useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_float(TYPE_ATOMIC_FLOAT, + useSVM); + EXECUTE_TEST(error, + test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_double.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestInit test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestInit test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestInit + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; } -int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_atomic_init(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fence_generic(deviceID, context, queue, num_elements, false); + return test_atomic_init_generic(deviceID, context, queue, num_elements, + false); } -int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_svm_atomic_init(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - return test_atomic_fence_generic(deviceID, context, queue, num_elements, true); + return test_atomic_init_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestLoad + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScope; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::MemoryScopeStr; + using CBasicTest::CheckCapabilities; + CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (MemoryOrder() == MEMORY_ORDER_RELEASE + || MemoryOrder() == MEMORY_ORDER_ACQ_REL) + return 0; // skip test - not applicable + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramCore() + { + // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store + // should be MEMORY_ORDER_RELEASE + std::string memoryOrderScopeLoad = MemoryOrderScopeStr(); + std::string memoryOrderScopeStore = + (MemoryOrder() == MEMORY_ORDER_ACQUIRE) + ? (", memory_order_release" + MemoryScopeStr()) + : memoryOrderScopeLoad; + std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit"); + return " atomic_store" + postfix + "(&destMemory[tid], tid" + + memoryOrderScopeStore + + ");\n" + " oldValues[tid] = atomic_load" + + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + host_atomic_store(&destMemory[tid], (HostDataType)tid, + MEMORY_ORDER_SEQ_CST); + oldValues[tid] = host_atomic_load( + &destMemory[tid], MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = (HostDataType)whichDestValue; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + for (cl_uint i = 0; i < threadCount; i++) + { + if (refValues[i] != (HostDataType)i) + { + log_error("Invalid value for thread %u\n", (cl_uint)i); + correct = false; + return true; + } + } + return true; + } +}; + +int test_atomic_load_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestLoad test_int(TYPE_ATOMIC_INT, useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_ulong(TYPE_ATOMIC_ULONG, + useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_float(TYPE_ATOMIC_FLOAT, + useSVM); + EXECUTE_TEST(error, + test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_double.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestLoad test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestLoad test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestLoad + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_load(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_load_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_load(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_load_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestExchange + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::Iterations; + using CBasicTestMemOrderScope::IterationsStr; + CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(123456); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_exchange" + postfix + + "(&destMemory[0], tid" + memoryOrderScope + + ");\n" + " for(int i = 0; i < " + + IterationsStr() + + "; i++)\n" + " oldValues[tid] = atomic_exchange" + + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope + + ");\n"; + } + + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, + MemoryOrder()); + for (int i = 0; i < Iterations(); i++) + oldValues[tid] = host_atomic_exchange( + &destMemory[0], oldValues[tid], MemoryOrder()); + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + OldValueCheck( + Iterations() % 2 + == 0); // check is valid for even number of iterations only + correct = true; + /* We are expecting values from 0 to size-1 and initial value from + * atomic variable */ + /* These values must be distributed across refValues array and atomic + * variable finalVaue[0] */ + /* Any repeated value is treated as an error */ + std::vector tidFound(threadCount); + bool startValueFound = false; + cl_uint i; + + for (i = 0; i <= threadCount; i++) + { + cl_uint value; + if (i == threadCount) + value = (cl_uint)finalValues[0]; // additional value from atomic + // variable (last written) + else + value = (cl_uint)refValues[i]; + if (value == (cl_uint)StartValue()) + { + // Special initial value + if (startValueFound) + { + log_error("ERROR: Starting reference value (%u) occurred " + "more thane once\n", + (cl_uint)StartValue()); + correct = false; + return true; + } + startValueFound = true; + continue; + } + if (value >= threadCount) + { + log_error( + "ERROR: Reference value %u outside of valid range! (%u)\n", + i, value); + correct = false; + return true; + } + if (tidFound[value]) + { + log_error("ERROR: Value (%u) occurred more thane once\n", + value); + correct = false; + return true; + } + tidFound[value] = true; + } + return true; + } +}; + +int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestExchange test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_float( + TYPE_ATOMIC_FLOAT, useSVM); + EXECUTE_TEST(error, + test_float.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_double( + TYPE_ATOMIC_DOUBLE, useSVM); + EXECUTE_TEST(error, + test_double.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestExchange + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestExchange + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestExchange + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_exchange(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_exchange_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_exchange_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestCompareStrong + : public CBasicTestMemOrder2Scope { +public: + using CBasicTestMemOrder2Scope::StartValue; + using CBasicTestMemOrder2Scope::OldValueCheck; + using CBasicTestMemOrder2Scope::MemoryOrder; + using CBasicTestMemOrder2Scope::MemoryOrder2; + using CBasicTestMemOrder2Scope::MemoryOrderScope; + using CBasicTestMemOrder2Scope::MemoryScope; + using CBasicTestMemOrder2Scope::DataType; + using CBasicTestMemOrder2Scope::Iterations; + using CBasicTestMemOrder2Scope::IterationsStr; + using CBasicTest::CheckCapabilities; + CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrder2Scope(dataType, + useSVM) + { + StartValue(123456); + OldValueCheck(false); + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (MemoryOrder2() == MEMORY_ORDER_RELEASE + || MemoryOrder2() == MEMORY_ORDER_ACQ_REL) + return 0; // not allowed as 'failure' argument + if ((MemoryOrder() == MEMORY_ORDER_RELAXED + && MemoryOrder2() != MEMORY_ORDER_RELAXED) + || (MemoryOrder() != MEMORY_ORDER_SEQ_CST + && MemoryOrder2() == MEMORY_ORDER_SEQ_CST)) + return 0; // failure argument shall be no stronger than the success + + if (CheckCapabilities(MemoryScope(), MemoryOrder()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + if (CheckCapabilities(MemoryScope(), MemoryOrder2()) + == TEST_SKIPPED_ITSELF) + return 0; // skip test - not applicable + + return CBasicTestMemOrder2Scope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScope(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" ") + DataType().RegularTypeName() + + " expected, previous;\n" + " int successCount = 0;\n" + " oldValues[tid] = tid;\n" + " expected = tid; // force failure at the beginning\n" + " if(atomic_compare_exchange_strong" + + postfix + "(&destMemory[0], &expected, oldValues[tid]" + + memoryOrderScope + + ") || expected == tid)\n" + " oldValues[tid] = threadCount+1; //mark unexpected success " + "with invalid value\n" + " else\n" + " {\n" + " for(int i = 0; i < " + + IterationsStr() + + " || successCount == 0; i++)\n" + " {\n" + " previous = expected;\n" + " if(atomic_compare_exchange_strong" + + postfix + "(&destMemory[0], &expected, oldValues[tid]" + + memoryOrderScope + + "))\n" + " {\n" + " oldValues[tid] = expected;\n" + " successCount++;\n" + " }\n" + " else\n" + " {\n" + " if(previous == expected) // spurious failure - " + "shouldn't occur for 'strong'\n" + " {\n" + " oldValues[tid] = threadCount; //mark fail with " + "invalid value\n" + " break;\n" + " }\n" + " }\n" + " }\n" + " }\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + HostDataType expected = (HostDataType)StartValue(), previous; + oldValues[tid] = (HostDataType)tid; + for (int i = 0; i < Iterations(); i++) + { + previous = expected; + if (host_atomic_compare_exchange(&destMemory[0], &expected, + oldValues[tid], MemoryOrder(), + MemoryOrder2())) + oldValues[tid] = expected; + else + { + if (previous == expected) // shouldn't occur for 'strong' + { + oldValues[tid] = threadCount; // mark fail with invalid + // value + } + } + } + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + /* We are expecting values from 0 to size-1 and initial value from + * atomic variable */ + /* These values must be distributed across refValues array and atomic + * variable finalVaue[0] */ + /* Any repeated value is treated as an error */ + std::vector tidFound(threadCount); + bool startValueFound = false; + cl_uint i; + + for (i = 0; i <= threadCount; i++) + { + cl_uint value; + if (i == threadCount) + value = (cl_uint)finalValues[0]; // additional value from atomic + // variable (last written) + else + value = (cl_uint)refValues[i]; + if (value == (cl_uint)StartValue()) + { + // Special initial value + if (startValueFound) + { + log_error("ERROR: Starting reference value (%u) occurred " + "more thane once\n", + (cl_uint)StartValue()); + correct = false; + return true; + } + startValueFound = true; + continue; + } + if (value >= threadCount) + { + if (value == threadCount) + log_error("ERROR: Spurious failure detected for " + "atomic_compare_exchange_strong\n"); + log_error( + "ERROR: Reference value %u outside of valid range! (%u)\n", + i, value); + correct = false; + return true; + } + if (tidFound[value]) + { + log_error("ERROR: Value (%u) occurred more thane once\n", + value); + correct = false; + return true; + } + tidFound[value] = true; + } + return true; + } +}; + +int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements, bool useSVM) +{ + int error = 0; + CBasicTestCompareStrong test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong test_uint( + TYPE_ATOMIC_UINT, useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong test_long( + TYPE_ATOMIC_LONG, useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestCompareStrong + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestCompareStrong + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_size_t(TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareStrong + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_compare_exchange_strong(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, + num_elements, false); +} + +int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, + num_elements, true); +} + +template +class CBasicTestCompareWeak + : public CBasicTestCompareStrong { +public: + using CBasicTestCompareStrong::StartValue; + using CBasicTestCompareStrong::MemoryOrderScope; + using CBasicTestCompareStrong::DataType; + using CBasicTestCompareStrong::Iterations; + using CBasicTestCompareStrong::IterationsStr; + CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestCompareStrong(dataType, + useSVM) + {} + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScope(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" ") + DataType().RegularTypeName() + + " expected , previous;\n" + " int successCount = 0;\n" + " oldValues[tid] = tid;\n" + " expected = tid; // force failure at the beginning\n" + " if(atomic_compare_exchange_weak" + + postfix + "(&destMemory[0], &expected, oldValues[tid]" + + memoryOrderScope + + ") || expected == tid)\n" + " oldValues[tid] = threadCount+1; //mark unexpected success " + "with invalid value\n" + " else\n" + " {\n" + " for(int i = 0; i < " + + IterationsStr() + + " || successCount == 0; i++)\n" + " {\n" + " previous = expected;\n" + " if(atomic_compare_exchange_weak" + + postfix + "(&destMemory[0], &expected, oldValues[tid]" + + memoryOrderScope + + "))\n" + " {\n" + " oldValues[tid] = expected;\n" + " successCount++;\n" + " }\n" + " }\n" + " }\n"; + } +}; + +int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements, bool useSVM) +{ + int error = 0; + CBasicTestCompareWeak test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_uint( + TYPE_ATOMIC_UINT, useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_long( + TYPE_ATOMIC_LONG, useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestCompareWeak + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestCompareWeak + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestCompareWeak + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, + num_elements, false); +} + +int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, + num_elements, true); +} + +template +class CBasicTestFetchAdd + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + {} + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_fetch_add" + postfix + + "(&destMemory[0], (" + DataType().AddSubOperandTypeName() + + ")tid + 3" + memoryOrderScope + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], (" + + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope + + ");\n" + " atomic_fetch_add" + + postfix + "(&destMemory[0], ((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope + + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_fetch_add( + &destMemory[0], (HostDataType)tid + 3, MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, + MemoryOrder()); + host_atomic_fetch_add(&destMemory[0], + ((HostDataType)tid + 3) + << (sizeof(HostDataType) - 1) * 8, + MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + expected += ((HostDataType)i + 3) * 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + return true; + } +}; + +int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchAdd test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchAdd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchAdd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAdd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestFetchSub + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + {} + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_fetch_sub" + postfix + + "(&destMemory[0], tid + 3 +(((" + + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof(" + + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope + + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_fetch_sub( + &destMemory[0], + (HostDataType)tid + 3 + + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8), + MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + expected -= (HostDataType)i + 3 + + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8); + return true; + } +}; + +int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchSub test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchSub + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchSub + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchSub + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestFetchOr + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + cl_uint numBits = DataType().Size(deviceID) * 8; + + return (threadCount + numBits - 1) / numBits; + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" size_t numBits = sizeof(") + + DataType().RegularTypeName() + + ") * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - (whichResult * numBits);\n" + "\n" + " oldValues[tid] = atomic_fetch_or" + + postfix + "(&destMemory[whichResult], ((" + + DataType().RegularTypeName() + ")1 << bitIndex) " + + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + size_t numBits = sizeof(HostDataType) * 8; + size_t whichResult = tid / numBits; + size_t bitIndex = tid - (whichResult * numBits); + + oldValues[tid] = + host_atomic_fetch_or(&destMemory[whichResult], + ((HostDataType)1 << bitIndex), MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) + / (sizeof(HostDataType) * 8); + if (whichDestValue < numValues - 1) + { + expected = ~(HostDataType)0; + return true; + } + // Last item doesn't get or'ed on every bit, so we have to mask away + cl_uint numBits = + threadCount - whichDestValue * (sizeof(HostDataType) * 8); + expected = StartValue(); + for (cl_uint i = 0; i < numBits; i++) + expected |= ((HostDataType)1 << i); + return true; + } +}; + +int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchOr test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchOr + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchOr + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOr + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestFetchXor + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::DataType; + CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue((HostDataType)0x2f08ab418ba0541LL); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" int numBits = sizeof(") + + DataType().RegularTypeName() + + ") * 8;\n" + " int bitIndex = (numBits-1)*(tid+1)/threadCount;\n" + "\n" + " oldValues[tid] = atomic_fetch_xor" + + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName() + + ")1 << bitIndex) " + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + int numBits = sizeof(HostDataType) * 8; + int bitIndex = (numBits - 1) * (tid + 1) / threadCount; + + oldValues[tid] = host_atomic_fetch_xor( + &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + int numBits = sizeof(HostDataType) * 8; + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + { + int bitIndex = (numBits - 1) * (i + 1) / threadCount; + expected ^= ((HostDataType)1 << bitIndex); + } + return true; + } +}; + +int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchXor test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchXor + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchXor + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestFetchAnd + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(~(HostDataType)0); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + cl_uint numBits = DataType().Size(deviceID) * 8; + + return (threadCount + numBits - 1) / numBits; + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" size_t numBits = sizeof(") + + DataType().RegularTypeName() + + ") * 8;\n" + " int whichResult = tid / numBits;\n" + " int bitIndex = tid - (whichResult * numBits);\n" + "\n" + " oldValues[tid] = atomic_fetch_and" + + postfix + "(&destMemory[whichResult], ~((" + + DataType().RegularTypeName() + ")1 << bitIndex) " + + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + size_t numBits = sizeof(HostDataType) * 8; + size_t whichResult = tid / numBits; + size_t bitIndex = tid - (whichResult * numBits); + + oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], + ~((HostDataType)1 << bitIndex), + MemoryOrder()); + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1)) + / (sizeof(HostDataType) * 8); + if (whichDestValue < numValues - 1) + { + expected = 0; + return true; + } + // Last item doesn't get and'ed on every bit, so we have to mask away + size_t numBits = + threadCount - whichDestValue * (sizeof(HostDataType) * 8); + expected = StartValue(); + for (size_t i = 0; i < numBits; i++) + expected &= ~((HostDataType)1 << i); + return true; + } +}; + +int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchAnd test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchAnd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchAnd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchAnd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestFetchOrAnd + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::Iterations; + using CBasicTestMemOrderScope::IterationsStr; + CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); + } + // each thread modifies (with OR and AND operations) and verifies + // only one bit in atomic variable + // other bits are modified by other threads but it must not affect current + // thread operation + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" int bits = sizeof(") + + DataType().RegularTypeName() + ")*8;\n" + + " size_t valueInd = tid/bits;\n" + " " + + DataType().RegularTypeName() + " value, bitMask = (" + + DataType().RegularTypeName() + + ")1 << tid%bits;\n" + " oldValues[tid] = 0;\n" + " for(int i = 0; i < " + + IterationsStr() + + "; i++)\n" + " {\n" + " value = atomic_fetch_or" + + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + + ");\n" + " if(value & bitMask) // bit should be set to 0\n" + " oldValues[tid]++;\n" + " value = atomic_fetch_and" + + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope + + ");\n" + " if(!(value & bitMask)) // bit should be set to 1\n" + " oldValues[tid]++;\n" + " }\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + int bits = sizeof(HostDataType) * 8; + size_t valueInd = tid / bits; + HostDataType value, bitMask = (HostDataType)1 << tid % bits; + oldValues[tid] = 0; + for (int i = 0; i < Iterations(); i++) + { + value = host_atomic_fetch_or(destMemory + valueInd, bitMask, + MemoryOrder()); + if (value & bitMask) // bit should be set to 0 + oldValues[tid]++; + value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask, + MemoryOrder()); + if (!(value & bitMask)) // bit should be set to 1 + oldValues[tid]++; + } + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = 0; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + for (cl_uint i = 0; i < threadCount; i++) + { + if (refValues[i] > 0) + { + log_error("Thread %d found %d mismatch(es)\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return true; + } +}; + +int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchOrAnd test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_uint( + TYPE_ATOMIC_UINT, useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_long( + TYPE_ATOMIC_LONG, useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchOrAnd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchOrAnd + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchOrAnd + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_orand_generic(deviceID, context, queue, + num_elements, false); +} + +int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_orand_generic(deviceID, context, queue, + num_elements, true); +} + +template +class CBasicTestFetchXor2 + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::Iterations; + using CBasicTestMemOrderScope::IterationsStr; + CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8); + } + // each thread modifies (with XOR operation) and verifies + // only one bit in atomic variable + // other bits are modified by other threads but it must not affect current + // thread operation + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return std::string(" int bits = sizeof(") + + DataType().RegularTypeName() + ")*8;\n" + + " size_t valueInd = tid/bits;\n" + " " + + DataType().RegularTypeName() + " value, bitMask = (" + + DataType().RegularTypeName() + + ")1 << tid%bits;\n" + " oldValues[tid] = 0;\n" + " for(int i = 0; i < " + + IterationsStr() + + "; i++)\n" + " {\n" + " value = atomic_fetch_xor" + + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + + ");\n" + " if(value & bitMask) // bit should be set to 0\n" + " oldValues[tid]++;\n" + " value = atomic_fetch_xor" + + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope + + ");\n" + " if(!(value & bitMask)) // bit should be set to 1\n" + " oldValues[tid]++;\n" + " }\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + int bits = sizeof(HostDataType) * 8; + size_t valueInd = tid / bits; + HostDataType value, bitMask = (HostDataType)1 << tid % bits; + oldValues[tid] = 0; + for (int i = 0; i < Iterations(); i++) + { + value = host_atomic_fetch_xor(destMemory + valueInd, bitMask, + MemoryOrder()); + if (value & bitMask) // bit should be set to 0 + oldValues[tid]++; + value = host_atomic_fetch_xor(destMemory + valueInd, bitMask, + MemoryOrder()); + if (!(value & bitMask)) // bit should be set to 1 + oldValues[tid]++; + } + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = 0; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + for (cl_uint i = 0; i < threadCount; i++) + { + if (refValues[i] > 0) + { + log_error("Thread %d found %d mismatches\n", i, + (cl_uint)refValues[i]); + correct = false; + } + } + return true; + } +}; + +int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchXor2 test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchXor2 + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchXor2 + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchXor2 + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_xor2_generic(deviceID, context, queue, + num_elements, false); +} + +int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_xor2_generic(deviceID, context, queue, + num_elements, true); +} + +template +class CBasicTestFetchMin + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(DataType().MaxValue()); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_fetch_min" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], + MemoryOrder()); + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + } + return true; + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] < expected) expected = startRefValues[i]; + } + return true; + } +}; + +int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchMin test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchMin + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchMin + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMin + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestFetchMax + : public CBasicTestMemOrderScope { +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(DataType().MinValue()); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + return " oldValues[tid] = atomic_fetch_max" + postfix + + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], + MemoryOrder()); + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + for (cl_uint i = 0; i < threadCount; i++) + { + startRefValues[i] = genrand_int32(d); + if (sizeof(HostDataType) >= 8) + startRefValues[i] |= (HostDataType)genrand_int32(d) << 16; + } + return true; + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + for (cl_uint i = 0; i < threadCount; i++) + { + if (startRefValues[i] > expected) expected = startRefValues[i]; + } + return true; + } +}; + +int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFetchMax test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_ulong( + TYPE_ATOMIC_ULONG, useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFetchMax + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFetchMax + test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFetchMax + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestFlag + : public CBasicTestMemOrderScope { + static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000; + +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScopeStr; + using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::UseSVM; + using CBasicTestMemOrderScope::LocalMemory; + CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + TExplicitMemoryOrderType MemoryOrderForClear() + { + // Memory ordering for atomic_flag_clear function + // ("shall not be memory_order_acquire nor memory_order_acq_rel") + if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED; + if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE; + return MemoryOrder(); + } + std::string MemoryOrderScopeStrForClear() + { + std::string orderStr; + if (MemoryOrder() != MEMORY_ORDER_EMPTY) + orderStr = std::string(", ") + + get_memory_order_type_name(MemoryOrderForClear()); + return orderStr + MemoryScopeStr(); + } + + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + // This test assumes support for the memory_scope_device scope in the + // case that LocalMemory() == false. Therefore we should skip this test + // in that configuration on a 3.0 driver since supporting the + // memory_scope_device scope is optionaly. + if (get_device_cl_version(deviceID) >= Version{ 3, 0 }) + { + if (!LocalMemory() + && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE)) + { + log_info("Skipping atomic_flag test due to use of " + "atomic_scope_device " + "which is optionally not supported on this device\n"); + return 0; // skip test - not applicable + } + } + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramCore() + { + std::string memoryOrderScope = MemoryOrderScopeStr(); + std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); + std::string program = + " uint cnt, stop = 0;\n" + " for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread " + "must find critical section where it is the first visitor\n" + " {\n" + " bool set = atomic_flag_test_and_set" + + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n"; + if (MemoryOrder() == MEMORY_ORDER_RELAXED + || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory()) + program += " atomic_work_item_fence(" + + std::string( + LocalMemory() + ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " + : "CLK_GLOBAL_MEM_FENCE, ") + + "memory_order_acquire," + + std::string(LocalMemory() + ? "memory_scope_work_group" + : (UseSVM() ? "memory_scope_all_svm_devices" + : "memory_scope_device")) + + ");\n"; + + program += " if (!set)\n" + " {\n"; + + if (LocalMemory()) + program += " uint csIndex = " + "get_enqueued_local_size(0)*get_group_id(0)+cnt;\n"; + else + program += " uint csIndex = cnt;\n"; + + std::ostringstream csNotVisited; + csNotVisited << CRITICAL_SECTION_NOT_VISITED; + program += " // verify that thread is the first visitor\n" + " if(oldValues[csIndex] == " + + csNotVisited.str() + + ")\n" + " {\n" + " oldValues[csIndex] = tid; // set the winner id for this " + "critical section\n" + " stop = 1;\n" + " }\n"; + + if (MemoryOrder() == MEMORY_ORDER_ACQUIRE + || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory()) + program += " atomic_work_item_fence(" + + std::string( + LocalMemory() + ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, " + : "CLK_GLOBAL_MEM_FENCE, ") + + "memory_order_release," + + std::string(LocalMemory() + ? "memory_scope_work_group" + : (UseSVM() ? "memory_scope_all_svm_devices" + : "memory_scope_device")) + + ");\n"; + + program += " atomic_flag_clear" + postfix + "(&destMemory[cnt]" + + MemoryOrderScopeStrForClear() + + ");\n" + " }\n" + " }\n"; + return program; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + cl_uint cnt, stop = 0; + for (cnt = 0; !stop && cnt < threadCount; + cnt++) // each thread must find critical section where it is the + // first visitor\n" + { + if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder())) + { + cl_uint csIndex = cnt; + // verify that thread is the first visitor\n" + if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED) + { + oldValues[csIndex] = + tid; // set the winner id for this critical section\n" + stop = 1; + } + host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear()); + } + } + } + virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, + HostDataType *startRefValues, + cl_uint whichDestValue) + { + expected = StartValue(); + return true; + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + for (cl_uint i = 0; i < threadCount; i++) + startRefValues[i] = CRITICAL_SECTION_NOT_VISITED; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + /* We are expecting unique values from 0 to threadCount-1 (each critical + * section must be visited) */ + /* These values must be distributed across refValues array */ + std::vector tidFound(threadCount); + cl_uint i; + + for (i = 0; i < threadCount; i++) + { + cl_uint value = (cl_uint)refValues[i]; + if (value == CRITICAL_SECTION_NOT_VISITED) + { + // Special initial value + log_error("ERROR: Critical section %u not visited\n", i); + correct = false; + return true; + } + if (value >= threadCount) + { + log_error( + "ERROR: Reference value %u outside of valid range! (%u)\n", + i, value); + correct = false; + return true; + } + if (tidFound[value]) + { + log_error("ERROR: Value (%u) occurred more thane once\n", + value); + correct = false; + return true; + } + tidFound[value] = true; + } + return true; + } +}; + +int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFlag test_flag(TYPE_ATOMIC_FLAG, + useSVM); + EXECUTE_TEST(error, + test_flag.Execute(deviceID, context, queue, num_elements)); + return error; +} + +int test_atomic_flag(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_flag_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_flag_generic(deviceID, context, queue, num_elements, + true); +} + +template +class CBasicTestFence + : public CBasicTestMemOrderScope { + struct TestDefinition + { + bool op1IsFence; + TExplicitMemoryOrderType op1MemOrder; + bool op2IsFence; + TExplicitMemoryOrderType op2MemOrder; + }; + +public: + using CBasicTestMemOrderScope::StartValue; + using CBasicTestMemOrderScope::OldValueCheck; + using CBasicTestMemOrderScope::MemoryOrder; + using CBasicTestMemOrderScope::MemoryScope; + using CBasicTestMemOrderScope::MemoryScopeStr; + using CBasicTestMemOrderScope::DeclaredInProgram; + using CBasicTestMemOrderScope::UsedInFunction; + using CBasicTestMemOrderScope::DataType; + using CBasicTestMemOrderScope::CurrentGroupSize; + using CBasicTestMemOrderScope::UseSVM; + using CBasicTestMemOrderScope::LocalMemory; + using CBasicTestMemOrderScope::LocalRefValues; + CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) + : CBasicTestMemOrderScope(dataType, + useSVM) + { + StartValue(0); + OldValueCheck(false); + } + virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) + { + return threadCount; + } + virtual cl_uint NumNonAtomicVariablesPerThread() + { + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1; + if (LocalMemory()) + { + if (gIsEmbedded) + { + if (CurrentGroupSize() > 512) CurrentGroupSize(512); + return 2; // 1KB of local memory required by spec. Clamp group + // size to 512 and allow 2 variables per thread + } + else + return 32 * 1024 / 8 / CurrentGroupSize() + - 1; // 32KB of local memory required by spec + } + return 256; + } + virtual std::string SingleTestName() + { + std::string testName; + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + testName += "seq_cst fence, "; + else + testName += + std::string(get_memory_order_type_name(_subCase.op1MemOrder)) + .substr(sizeof("memory_order")) + + (_subCase.op1IsFence ? " fence" : " atomic") + + " synchronizes-with " + + std::string(get_memory_order_type_name(_subCase.op2MemOrder)) + .substr(sizeof("memory_order")) + + (_subCase.op2IsFence ? " fence" : " atomic") + ", "; + testName += CBasicTest::SingleTestName(); + testName += std::string(", ") + + std::string(get_memory_scope_type_name(MemoryScope())) + .substr(sizeof("memory")); + return testName; + } + virtual bool SVMDataBufferAllSVMConsistent() + { + // Although memory_scope_all_devices doesn't mention SVM it is just an + // alias for memory_scope_all_svm_devices. So both scopes interact with + // SVM allocations, on devices that support those, just the same. + return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES + || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES; + } + virtual int ExecuteForEachParameterSet(cl_device_id deviceID, + cl_context context, + cl_command_queue queue) + { + int error = 0; + // execute 3 (maximum) sub cases for each memory order + for (_subCaseId = 0; _subCaseId < 3; _subCaseId++) + { + EXECUTE_TEST( + error, + (CBasicTestMemOrderScope:: + ExecuteForEachParameterSet(deviceID, context, queue))); + } + return error; + } + virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, + cl_command_queue queue) + { + if (DeclaredInProgram() || UsedInFunction()) + return 0; // skip test - not applicable - no overloaded fence + // functions for different address spaces + if (MemoryOrder() == MEMORY_ORDER_EMPTY + || MemoryScope() + == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since + // opencl20-openclc-rev15 + return 0; // skip test - not applicable + if ((UseSVM() || gHost) && LocalMemory()) + return 0; // skip test - not applicable for SVM and local memory + struct TestDefinition acqTests[] = { + // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder} + { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, + { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE }, + { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE } + }; + struct TestDefinition relTests[] = { + { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE }, + { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL } + }; + struct TestDefinition arTests[] = { + { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }, + { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE }, + { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL } + }; + switch (MemoryOrder()) + { + case MEMORY_ORDER_ACQUIRE: + if (_subCaseId + >= sizeof(acqTests) / sizeof(struct TestDefinition)) + return 0; + _subCase = acqTests[_subCaseId]; + break; + case MEMORY_ORDER_RELEASE: + if (_subCaseId + >= sizeof(relTests) / sizeof(struct TestDefinition)) + return 0; + _subCase = relTests[_subCaseId]; + break; + case MEMORY_ORDER_ACQ_REL: + if (_subCaseId + >= sizeof(arTests) / sizeof(struct TestDefinition)) + return 0; + _subCase = arTests[_subCaseId]; + break; + case MEMORY_ORDER_SEQ_CST: + if (_subCaseId != 0) // one special case only + return 0; + break; + default: return 0; + } + LocalRefValues(LocalMemory()); + return CBasicTestMemOrderScope< + HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, + queue); + } + virtual std::string ProgramHeader(cl_uint maxNumDestItems) + { + std::string header; + if (gOldAPI) + { + if (MemoryScope() == MEMORY_SCOPE_EMPTY) + { + header += "#define atomic_work_item_fence(x,y) " + " mem_fence(x)\n"; + } + else + { + header += "#define atomic_work_item_fence(x,y,z) " + " mem_fence(x)\n"; + } + } + return header + + CBasicTestMemOrderScope:: + ProgramHeader(maxNumDestItems); + } + virtual std::string ProgramCore() + { + std::ostringstream naValues; + naValues << NumNonAtomicVariablesPerThread(); + std::string program, fenceType, nonAtomic; + if (LocalMemory()) + { + program = " size_t myId = get_local_id(0), hisId = " + "get_local_size(0)-1-myId;\n"; + fenceType = "CLK_LOCAL_MEM_FENCE"; + nonAtomic = "localValues"; + } + else + { + program = " size_t myId = tid, hisId = threadCount-1-tid;\n"; + fenceType = "CLK_GLOBAL_MEM_FENCE"; + nonAtomic = "oldValues"; + } + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + { + // All threads are divided into pairs. + // Each thread has its own atomic variable and performs the + // following actions: + // - increments its own variable + // - performs fence operation to propagate its value and to see + // value from other thread + // - reads value from other thread's variable + // - repeats the above steps when both values are the same (and less + // than 1000000) + // - stores the last value read from other thread (in additional + // variable) At the end of execution at least one thread should know + // the last value from other thread + program += std::string("") + " " + DataType().RegularTypeName() + + " myValue = 0, hisValue; \n" + " do {\n" + " myValue++;\n" + " atomic_store_explicit(&destMemory[myId], myValue, " + "memory_order_relaxed" + + MemoryScopeStr() + + ");\n" + " atomic_work_item_fence(" + + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + + "); \n" + " hisValue = atomic_load_explicit(&destMemory[hisId], " + "memory_order_relaxed" + + MemoryScopeStr() + + ");\n" + " } while(myValue == hisValue && myValue < 1000000);\n" + " " + + nonAtomic + "[myId] = hisValue; \n"; + } + else + { + // Each thread modifies one of its non-atomic variables, increments + // value of its atomic variable and reads values from another thread + // in typical synchronizes-with scenario with: + // - non-atomic variable (at index A) modification (value change + // from 0 to A) + // - release operation (additional fence or within atomic) + atomic + // variable modification (value A) + // - atomic variable read (value B) + acquire operation (additional + // fence or within atomic) + // - non-atomic variable (at index B) read (value C) + // Each thread verifies dependency between atomic and non-atomic + // value read from another thread The following condition must be + // true: B == C + program += std::string("") + " " + DataType().RegularTypeName() + + " myValue = 0, hisAtomicValue, hisValue; \n" + " do {\n" + " myValue++;\n" + " " + + nonAtomic + "[myId*" + naValues.str() + + "+myValue] = myValue;\n"; + if (_subCase.op1IsFence) + program += std::string("") + " atomic_work_item_fence(" + + fenceType + ", " + + get_memory_order_type_name(_subCase.op1MemOrder) + + MemoryScopeStr() + + "); \n" + " atomic_store_explicit(&destMemory[myId], myValue, " + "memory_order_relaxed" + + MemoryScopeStr() + ");\n"; + else + program += std::string("") + + " atomic_store_explicit(&destMemory[myId], myValue, " + + get_memory_order_type_name(_subCase.op1MemOrder) + + MemoryScopeStr() + ");\n"; + if (_subCase.op2IsFence) + program += std::string("") + + " hisAtomicValue = " + "atomic_load_explicit(&destMemory[hisId], " + "memory_order_relaxed" + + MemoryScopeStr() + + ");\n" + " atomic_work_item_fence(" + + fenceType + ", " + + get_memory_order_type_name(_subCase.op2MemOrder) + + MemoryScopeStr() + "); \n"; + else + program += std::string("") + + " hisAtomicValue = " + "atomic_load_explicit(&destMemory[hisId], " + + get_memory_order_type_name(_subCase.op2MemOrder) + + MemoryScopeStr() + ");\n"; + program += " hisValue = " + nonAtomic + "[hisId*" + + naValues.str() + "+hisAtomicValue]; \n"; + if (LocalMemory()) + program += " hisId = (hisId+1)%get_local_size(0);\n"; + else + program += " hisId = (hisId+1)%threadCount;\n"; + program += " } while(hisAtomicValue == hisValue && myValue < " + + naValues.str() + + "-1);\n" + " if(hisAtomicValue != hisValue)\n" + " { // fail\n" + " atomic_store(&destMemory[myId], myValue-1);\n"; + if (LocalMemory()) + program += " hisId = " + "(hisId+get_local_size(0)-1)%get_local_size(0);\n"; + else + program += " hisId = (hisId+threadCount-1)%threadCount;\n"; + program += " if(myValue+1 < " + naValues.str() + + ")\n" + " " + + nonAtomic + "[myId*" + naValues.str() + + "+myValue+1] = hisId;\n" + " if(myValue+2 < " + + naValues.str() + + ")\n" + " " + + nonAtomic + "[myId*" + naValues.str() + + "+myValue+2] = hisAtomicValue;\n" + " if(myValue+3 < " + + naValues.str() + + ")\n" + " " + + nonAtomic + "[myId*" + naValues.str() + + "+myValue+3] = hisValue;\n"; + if (gDebug) + { + program += " printf(\"WI %d: atomic value (%d) at index %d " + "is different than non-atomic value (%d)\\n\", tid, " + "hisAtomicValue, hisId, hisValue);\n"; + } + program += " }\n"; + } + return program; + } + virtual void HostFunction(cl_uint tid, cl_uint threadCount, + volatile HostAtomicType *destMemory, + HostDataType *oldValues) + { + size_t myId = tid, hisId = threadCount - 1 - tid; + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + { + HostDataType myValue = 0, hisValue; + // CPU thread typically starts faster - wait for GPU thread + myValue++; + host_atomic_store( + &destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST); + while (host_atomic_load( + &destMemory[hisId], MEMORY_ORDER_SEQ_CST) + == 0) + ; + do + { + myValue++; + host_atomic_store( + &destMemory[myId], myValue, MEMORY_ORDER_RELAXED); + host_atomic_thread_fence(MemoryOrder()); + hisValue = host_atomic_load( + &destMemory[hisId], MEMORY_ORDER_RELAXED); + } while (myValue == hisValue && hisValue < 1000000); + oldValues[tid] = hisValue; + } + else + { + HostDataType myValue = 0, hisAtomicValue, hisValue; + do + { + myValue++; + oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] = + myValue; + if (_subCase.op1IsFence) + { + host_atomic_thread_fence(_subCase.op1MemOrder); + host_atomic_store( + &destMemory[myId], myValue, MEMORY_ORDER_RELAXED); + } + else + host_atomic_store( + &destMemory[myId], myValue, _subCase.op1MemOrder); + if (_subCase.op2IsFence) + { + hisAtomicValue = + host_atomic_load( + &destMemory[hisId], MEMORY_ORDER_RELAXED); + host_atomic_thread_fence(_subCase.op2MemOrder); + } + else + hisAtomicValue = + host_atomic_load( + &destMemory[hisId], _subCase.op2MemOrder); + hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread() + + hisAtomicValue]; + hisId = (hisId + 1) % threadCount; + } while (hisAtomicValue == hisValue + && myValue + < (HostDataType)NumNonAtomicVariablesPerThread() - 1); + if (hisAtomicValue != hisValue) + { // fail + host_atomic_store( + &destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST); + if (gDebug) + { + hisId = (hisId + threadCount - 1) % threadCount; + printf("WI %d: atomic value (%d) at index %d is different " + "than non-atomic value (%d)\n", + tid, hisAtomicValue, hisId, hisValue); + } + } + } + } + virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, + MTdata d) + { + for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread(); + i++) + startRefValues[i] = 0; + return true; + } + virtual bool VerifyRefs(bool &correct, cl_uint threadCount, + HostDataType *refValues, + HostAtomicType *finalValues) + { + correct = true; + cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount; + for (cl_uint workOffset = 0; workOffset < threadCount; + workOffset += workSize) + { + if (workOffset + workSize > threadCount) + // last workgroup (host threads) + workSize = threadCount - workOffset; + for (cl_uint i = 0; i < workSize && workOffset + i < threadCount; + i++) + { + HostAtomicType myValue = finalValues[workOffset + i]; + if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) + { + HostDataType hisValue = refValues[workOffset + i]; + if (myValue == hisValue) + { + // a draw - both threads should reach final value + // 1000000 + if (myValue != 1000000) + { + log_error("ERROR: Invalid reference value #%u (%d " + "instead of 1000000)\n", + workOffset + i, myValue); + correct = false; + return true; + } + } + else + { + // slower thread (in total order of seq_cst operations) + // must know last value written by faster thread + HostAtomicType hisRealValue = + finalValues[workOffset + workSize - 1 - i]; + HostDataType myValueReadByHim = + refValues[workOffset + workSize - 1 - i]; + + // who is the winner? - thread with lower private + // counter value + if (myValue == hisRealValue) // forbidden result - fence + // doesn't work + { + log_error("ERROR: Atomic counter values #%u and " + "#%u are the same (%u)\n", + workOffset + i, + workOffset + workSize - 1 - i, myValue); + log_error( + "ERROR: Both threads have outdated values read " + "from another thread (%u and %u)\n", + hisValue, myValueReadByHim); + correct = false; + return true; + } + if (myValue > hisRealValue) // I'm slower + { + if (hisRealValue != hisValue) + { + log_error("ERROR: Invalid reference value #%u " + "(%d instead of %d)\n", + workOffset + i, hisValue, + hisRealValue); + log_error( + "ERROR: Slower thread #%u should know " + "value written by faster thread #%u\n", + workOffset + i, + workOffset + workSize - 1 - i); + correct = false; + return true; + } + } + else // I'm faster + { + if (myValueReadByHim != myValue) + { + log_error("ERROR: Invalid reference value #%u " + "(%d instead of %d)\n", + workOffset + workSize - 1 - i, + myValueReadByHim, myValue); + log_error( + "ERROR: Slower thread #%u should know " + "value written by faster thread #%u\n", + workOffset + workSize - 1 - i, + workOffset + i); + correct = false; + return true; + } + } + } + } + else + { + if (myValue != NumNonAtomicVariablesPerThread() - 1) + { + log_error("ERROR: Invalid atomic value #%u (%d instead " + "of %d)\n", + workOffset + i, myValue, + NumNonAtomicVariablesPerThread() - 1); + log_error("ERROR: Thread #%u observed invalid values " + "in other thread's variables\n", + workOffset + i, myValue); + correct = false; + return true; + } + } + } + } + return true; + } + +private: + int _subCaseId; + struct TestDefinition _subCase; +}; + +int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements, + bool useSVM) +{ + int error = 0; + CBasicTestFence test_int(TYPE_ATOMIC_INT, + useSVM); + EXECUTE_TEST(error, + test_int.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_uint(TYPE_ATOMIC_UINT, + useSVM); + EXECUTE_TEST(error, + test_uint.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_long(TYPE_ATOMIC_LONG, + useSVM); + EXECUTE_TEST(error, + test_long.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_ulong(TYPE_ATOMIC_ULONG, + useSVM); + EXECUTE_TEST(error, + test_ulong.Execute(deviceID, context, queue, num_elements)); + if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4) + { + CBasicTestFence test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + else + { + CBasicTestFence test_intptr_t( + TYPE_ATOMIC_INTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_intptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence + test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM); + EXECUTE_TEST( + error, + test_uintptr_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence test_size_t( + TYPE_ATOMIC_SIZE_T, useSVM); + EXECUTE_TEST( + error, test_size_t.Execute(deviceID, context, queue, num_elements)); + CBasicTestFence + test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM); + EXECUTE_TEST( + error, + test_ptrdiff_t.Execute(deviceID, context, queue, num_elements)); + } + return error; +} + +int test_atomic_fence(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fence_generic(deviceID, context, queue, num_elements, + false); +} + +int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_atomic_fence_generic(deviceID, context, queue, num_elements, + true); }