From 5d6ca3e9d1374ef32644847c1eefeb503a27b732 Mon Sep 17 00:00:00 2001 From: Jeremy Kemp Date: Thu, 28 Apr 2022 23:34:08 +0100 Subject: [PATCH] Change memory order and scope for atomics that gate final results being stored. (#1377) * Change memory order and scope for atomics that gate final results being stored. memory_order_acq_rel with memory_scope_device is now used to guarantee that the correct memory consistency is observed before final results are stored. Previously it was possible for kernels to be generated that all used relaxed memory ordering, which could lead to false-positive failures. Fixes #1370 * Disable atomics tests with global, in-program atomics. If the device does not support `memory_order_relaxed` or `memory_scope_device`, disable atomics tests that declare their atomics in-program with global memory. There is now an implicit requirement to support `memory_order_relaxed` and `memory_scope_device` for these tests. * Fix misplaced parentheses. * Change memory scope for atomic fetch and load calls in kernel Change the memory scope from memory_scope_work_group to memory_scope_device so the ordering applies across all work items Co-authored-by: Sreelakshmi Haridas --- test_conformance/c11_atomics/common.h | 53 +++++++++++++++++++++------ 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index d30259f0..42fe32b6 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -1031,20 +1031,11 @@ CBasicTest::KernelCode(cl_uint maxNumDestItems) } code += "\n"; } - if (LocalMemory() || DeclaredInProgram()) + if (LocalMemory()) { code += " // Copy final values to host reachable buffer\n"; - if (LocalMemory()) - code += " barrier(CLK_LOCAL_MEM_FENCE);\n" - " if(get_local_id(0) == 0) // first thread in workgroup\n"; - else - // global atomics declared in program scope - code += R"( - if(atomic_fetch_add_explicit(&finishedThreads, 1u, - memory_order_relaxed, - memory_scope_work_group) - == get_global_size(0)-1) // last finished thread - )"; + code += " barrier(CLK_LOCAL_MEM_FENCE);\n" + " if(get_local_id(0) == 0) // first thread in workgroup\n"; code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; " "dstItemIdx++)\n"; if (aTypeName == "atomic_flag") @@ -1064,6 +1055,35 @@ CBasicTest::KernelCode(cl_uint maxNumDestItems) memory_scope_work_group);)"; } } + else if (DeclaredInProgram()) + { + // global atomics declared in program scope + code += " // Copy final values to host reachable buffer\n"; + code += R"( + if(atomic_fetch_add_explicit(&finishedThreads, 1u, + memory_order_acq_rel, + memory_scope_device) + == get_global_size(0)-1) // last finished thread + )"; + code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; " + "dstItemIdx++)\n"; + if (aTypeName == "atomic_flag") + { + code += R"( + finalDest[dstItemIdx] = + atomic_flag_test_and_set_explicit(destMemory+dstItemIdx, + memory_order_relaxed, + memory_scope_device);)"; + } + else + { + code += R"( + finalDest[dstItemIdx] = + atomic_load_explicit(destMemory+dstItemIdx, + memory_order_relaxed, + memory_scope_device);)"; + } + } code += "}\n" "\n"; return code; @@ -1108,6 +1128,15 @@ int CBasicTest::ExecuteSingleTest( log_info("\t\tTest disabled\n"); return 0; } + if (!LocalMemory() && DeclaredInProgram()) + { + if (((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0) + || ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)) + { + log_info("\t\tTest disabled\n"); + return 0; + } + } // set up work sizes based on device capabilities and test configuration error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,