From 9178524d028b632b548383a9e4116b46305c765e Mon Sep 17 00:00:00 2001 From: Jack Frankland <30410009+FranklandJack@users.noreply.github.com> Date: Tue, 22 Sep 2020 18:08:32 +0200 Subject: [PATCH] Change Behaviour of C11 Atomic Tests for OpenCL-3.0 (#944) * Change setup code in `KernelCode()` to use `_explicit` builtin variants that are common to both OpenCL-2.X and OpenCL-3.0. * Only test optional supported builtin variants (`_explicit` signature memory_order/scope) for OpenCL-3.0. * Disable program scope global variable and generic address space tests for a OpenCL-3.0 driver which does not optionally support these features. --- test_common/harness/errorHelpers.h | 10 +- test_conformance/c11_atomics/common.cpp | 74 ++++++++++++++ test_conformance/c11_atomics/common.h | 98 +++++++++++-------- test_conformance/c11_atomics/main.cpp | 26 +++++ test_conformance/c11_atomics/test_atomics.cpp | 19 +++- 5 files changed, 179 insertions(+), 48 deletions(-) diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h index 3238a956..0b083dd5 100644 --- a/test_common/harness/errorHelpers.h +++ b/test_common/harness/errorHelpers.h @@ -62,7 +62,15 @@ return TEST_FAIL; \ } #define test_error(errCode,msg) test_error_ret(errCode,msg,errCode) -#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } } +#define test_error_ret(errCode, msg, retValue) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + print_error(errCodeResult, msg); \ + return retValue; \ + } \ + } #define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ ); #define test_missing_feature(errCode, msg) test_missing_feature_ret(errCode, msg, errCode) diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp index bebad895..c0cd265a 100644 --- a/test_conformance/c11_atomics/common.cpp +++ b/test_conformance/c11_atomics/common.cpp @@ -206,3 +206,77 @@ template<> cl_long AtomicTypeExtendedInfo::MaxValue() {return CL_LONG_M template<> cl_ulong AtomicTypeExtendedInfo::MaxValue() {return CL_ULONG_MAX;} template<> cl_float AtomicTypeExtendedInfo::MaxValue() {return CL_FLT_MAX;} template<> cl_double AtomicTypeExtendedInfo::MaxValue() {return CL_DBL_MAX;} + +cl_int getSupportedMemoryOrdersAndScopes( + cl_device_id device, std::vector &memoryOrders, + std::vector &memoryScopes) +{ + // The CL_DEVICE_ATOMIC_MEMORY_CAPABILITES is missing before 3.0, but since + // all orderings and scopes are required for 2.X devices and this test is + // skipped before 2.0 we can safely return all orderings and scopes if the + // device is 2.X. Query device for the supported orders. + if (get_device_cl_version(device) < Version{ 3, 0 }) + { + memoryOrders.push_back(MEMORY_ORDER_EMPTY); + memoryOrders.push_back(MEMORY_ORDER_RELAXED); + memoryOrders.push_back(MEMORY_ORDER_ACQUIRE); + memoryOrders.push_back(MEMORY_ORDER_RELEASE); + memoryOrders.push_back(MEMORY_ORDER_ACQ_REL); + memoryOrders.push_back(MEMORY_ORDER_SEQ_CST); + memoryScopes.push_back(MEMORY_SCOPE_EMPTY); + memoryScopes.push_back(MEMORY_SCOPE_WORK_GROUP); + memoryScopes.push_back(MEMORY_SCOPE_DEVICE); + memoryScopes.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES); + return CL_SUCCESS; + } + + // For a 3.0 device we can query the supported orderings and scopes + // directly. + cl_device_atomic_capabilities atomic_capabilities{}; + test_error( + clGetDeviceInfo(device, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(atomic_capabilities), &atomic_capabilities, + nullptr), + "clGetDeviceInfo failed for CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES\n"); + + // Provided we succeeded, we can start filling the vectors. + if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) + { + memoryOrders.push_back(MEMORY_ORDER_RELAXED); + } + + if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) + { + memoryOrders.push_back(MEMORY_ORDER_ACQUIRE); + memoryOrders.push_back(MEMORY_ORDER_RELEASE); + memoryOrders.push_back(MEMORY_ORDER_ACQ_REL); + } + + if (atomic_capabilities & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) + { + // The functions not ending in explicit have the same semantics as the + // corresponding explicit function with memory_order_seq_cst for the + // memory_order argument. + memoryOrders.push_back(MEMORY_ORDER_EMPTY); + memoryOrders.push_back(MEMORY_ORDER_SEQ_CST); + } + + if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) + { + memoryScopes.push_back(MEMORY_SCOPE_WORK_GROUP); + } + + if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) + { + // The functions that do not have memory_scope argument have the same + // semantics as the corresponding functions with the memory_scope + // argument set to memory_scope_device. + memoryScopes.push_back(MEMORY_SCOPE_EMPTY); + memoryScopes.push_back(MEMORY_SCOPE_DEVICE); + } + if (atomic_capabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) + { + memoryScopes.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES); + } + return CL_SUCCESS; +} diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index a69feb06..360ab45e 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -71,6 +71,10 @@ extern cl_device_atomic_capabilities gAtomicMemCap, extern const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType); extern const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType); +extern cl_int getSupportedMemoryOrdersAndScopes( + cl_device_id device, std::vector &memoryOrders, + std::vector &memoryScopes); + class AtomicTypeInfo { public: @@ -487,16 +491,11 @@ public: std::vector memoryScope; int error = 0; - memoryOrder.push_back(MEMORY_ORDER_EMPTY); - memoryOrder.push_back(MEMORY_ORDER_RELAXED); - memoryOrder.push_back(MEMORY_ORDER_ACQUIRE); - memoryOrder.push_back(MEMORY_ORDER_RELEASE); - memoryOrder.push_back(MEMORY_ORDER_ACQ_REL); - memoryOrder.push_back(MEMORY_ORDER_SEQ_CST); - memoryScope.push_back(MEMORY_SCOPE_EMPTY); - memoryScope.push_back(MEMORY_SCOPE_WORK_GROUP); - memoryScope.push_back(MEMORY_SCOPE_DEVICE); - memoryScope.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES); + // For OpenCL-3.0 and later some orderings and scopes are optional, so here + // we query for the supported ones. + test_error_ret( + getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope), + "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); for(unsigned oi = 0; oi < memoryOrder.size(); oi++) { @@ -582,16 +581,11 @@ public: std::vector memoryScope; int error = 0; - memoryOrder.push_back(MEMORY_ORDER_EMPTY); - memoryOrder.push_back(MEMORY_ORDER_RELAXED); - memoryOrder.push_back(MEMORY_ORDER_ACQUIRE); - memoryOrder.push_back(MEMORY_ORDER_RELEASE); - memoryOrder.push_back(MEMORY_ORDER_ACQ_REL); - memoryOrder.push_back(MEMORY_ORDER_SEQ_CST); - memoryScope.push_back(MEMORY_SCOPE_EMPTY); - memoryScope.push_back(MEMORY_SCOPE_WORK_GROUP); - memoryScope.push_back(MEMORY_SCOPE_DEVICE); - memoryScope.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES); + // For OpenCL-3.0 and later some orderings and scopes are optional, so here + // we query for the supported ones. + test_error_ret( + getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope), + "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL); for(unsigned oi = 0; oi < memoryOrder.size(); oi++) { @@ -800,23 +794,35 @@ std::string CBasicTest::KernelCode(cl_uint maxNumD "\n"; if(LocalMemory()) { - code += - " // initialize atomics not reachable from host (first thread is doing this, other threads are waiting on barrier)\n" - " if(get_local_id(0) == 0)\n" - " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n" - " {\n"; - if(aTypeName == "atomic_flag") - { - code += - " if(finalDest[dstItemIdx])\n" - " atomic_flag_test_and_set(destMemory+dstItemIdx);\n" - " else\n" - " atomic_flag_clear(destMemory+dstItemIdx);\n"; - } + // memory_order_relaxed is sufficient for these initialization operations + // as the barrier below will act as a fence, providing an order to the + // operations. memory_scope_work_group is sufficient as local memory is + // only visible within the work-group. + code += R"( + // initialize atomics not reachable from host (first thread + // is doing this, other threads are waiting on barrier) + if(get_local_id(0) == 0) + for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++) + {)"; + if (aTypeName == "atomic_flag") + { + code += R"( + if(finalDest[dstItemIdx]) + atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, + memory_order_relaxed, + memory_scope_work_group); + else + atomic_flag_clear_explicit(destMemory+dstItemIdx, + memory_order_relaxed, + memory_scope_work_group);)"; + } else { - code += - " atomic_store(destMemory+dstItemIdx, finalDest[dstItemIdx]);\n"; + code += R"( + atomic_store_explicit(destMemory+dstItemIdx, + finalDest[dstItemIdx], + memory_order_relaxed, + memory_scope_work_group);)"; } code += " }\n" @@ -873,20 +879,28 @@ std::string CBasicTest::KernelCode(cl_uint maxNumD " if(get_local_id(0) == 0) // first thread in workgroup\n"; else // global atomics declared in program scope - code += - " if(atomic_fetch_add(&finishedThreads, 1) == get_global_size(0)-1)\n" - " // last finished thread\n"; + code += R"( + if(atomic_fetch_add_explicit(&finishedThreads, 1, + memory_order_relaxed, + memory_scope_work_group) + == get_global_size(0)-1) // last finished thread)"; code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n"; if(aTypeName == "atomic_flag") { - code += - " finalDest[dstItemIdx] = atomic_flag_test_and_set(destMemory+dstItemIdx);\n"; + code += R"( + finalDest[dstItemIdx] = + atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, + memory_order_relaxed, + memory_scope_work_group);)"; } else { - code += - " finalDest[dstItemIdx] = atomic_load(destMemory+dstItemIdx);\n"; + code += R"( + finalDest[dstItemIdx] = + atomic_load_explicit(destMemory+dstItemIdx, + memory_order_relaxed, + memory_scope_work_group);)"; } } code += "}\n" diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp index 41b253a0..3132c40d 100644 --- a/test_conformance/c11_atomics/main.cpp +++ b/test_conformance/c11_atomics/main.cpp @@ -159,6 +159,32 @@ test_status InitCL(cl_device_id device) { "Minimum atomic memory capabilities unsupported by device\n"); return TEST_FAIL; } + + // Disable program scope global variable testing in the case that it is + // not supported on an OpenCL-3.0 driver. + size_t max_global_variable_size{}; + test_error_ret(clGetDeviceInfo(device, + CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, + sizeof(max_global_variable_size), + &max_global_variable_size, nullptr), + "Unable to get max global variable size\n", TEST_FAIL); + if (0 == max_global_variable_size) + { + gNoGlobalVariables = true; + } + + // Disable generic address space testing in the case that it is not + // supported on an OpenCL-3.0 driver. + cl_bool generic_address_space_support{}; + test_error_ret( + clGetDeviceInfo(device, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, + sizeof(generic_address_space_support), + &generic_address_space_support, nullptr), + "Unable to get generic address space support\n", TEST_FAIL); + if (CL_FALSE == generic_address_space_support) + { + gNoGenericAddressSpace = true; + } } else { diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index c1e153be..6aff4214 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -206,6 +206,7 @@ public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScope; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTest::CheckCapabilities; CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { @@ -228,11 +229,19 @@ public: } virtual std::string ProgramCore() { - std::string memoryOrderScope = MemoryOrderScopeStr(); - std::string postfix(memoryOrderScope.empty() ? "" : "_explicit"); - return - " atomic_store(&destMemory[tid], tid);\n" - " oldValues[tid] = atomic_load"+postfix+"(&destMemory[tid]"+memoryOrderScope+");\n"; + // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store + // should be MEMORY_ORDER_RELEASE + std::string memoryOrderScopeLoad = MemoryOrderScopeStr(); + std::string memoryOrderScopeStore = + (MemoryOrder() == MEMORY_ORDER_ACQUIRE) + ? (", memory_order_release" + MemoryScopeStr()) + : memoryOrderScopeLoad; + std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit"); + return " atomic_store" + postfix + "(&destMemory[tid], tid" + + memoryOrderScopeStore + + ");\n" + " oldValues[tid] = atomic_load" + + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n"; } virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues) {