mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Change memory order and scope for atomics that gate final results being stored. (#1377)
* Change memory order and scope for atomics that gate final results being stored. memory_order_acq_rel with memory_scope_device is now used to guarantee that the correct memory consistency is observed before final results are stored. Previously it was possible for kernels to be generated that all used relaxed memory ordering, which could lead to false-positive failures. Fixes #1370 * Disable atomics tests with global, in-program atomics. If the device does not support `memory_order_relaxed` or `memory_scope_device`, disable atomics tests that declare their atomics in-program with global memory. There is now an implicit requirement to support `memory_order_relaxed` and `memory_scope_device` for these tests. * Fix misplaced parentheses. * Change memory scope for atomic fetch and load calls in kernel Change the memory scope from memory_scope_work_group to memory_scope_device so the ordering applies across all work items Co-authored-by: Sreelakshmi Haridas <sharidas@quicinc.com>
This commit is contained in:
@@ -1031,20 +1031,11 @@ CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
|
||||
}
|
||||
code += "\n";
|
||||
}
|
||||
if (LocalMemory() || DeclaredInProgram())
|
||||
if (LocalMemory())
|
||||
{
|
||||
code += " // Copy final values to host reachable buffer\n";
|
||||
if (LocalMemory())
|
||||
code += " barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" if(get_local_id(0) == 0) // first thread in workgroup\n";
|
||||
else
|
||||
// global atomics declared in program scope
|
||||
code += R"(
|
||||
if(atomic_fetch_add_explicit(&finishedThreads, 1u,
|
||||
memory_order_relaxed,
|
||||
memory_scope_work_group)
|
||||
== get_global_size(0)-1) // last finished thread
|
||||
)";
|
||||
code += " barrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
" if(get_local_id(0) == 0) // first thread in workgroup\n";
|
||||
code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
|
||||
"dstItemIdx++)\n";
|
||||
if (aTypeName == "atomic_flag")
|
||||
@@ -1064,6 +1055,35 @@ CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
|
||||
memory_scope_work_group);)";
|
||||
}
|
||||
}
|
||||
else if (DeclaredInProgram())
|
||||
{
|
||||
// global atomics declared in program scope
|
||||
code += " // Copy final values to host reachable buffer\n";
|
||||
code += R"(
|
||||
if(atomic_fetch_add_explicit(&finishedThreads, 1u,
|
||||
memory_order_acq_rel,
|
||||
memory_scope_device)
|
||||
== get_global_size(0)-1) // last finished thread
|
||||
)";
|
||||
code += " for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
|
||||
"dstItemIdx++)\n";
|
||||
if (aTypeName == "atomic_flag")
|
||||
{
|
||||
code += R"(
|
||||
finalDest[dstItemIdx] =
|
||||
atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
|
||||
memory_order_relaxed,
|
||||
memory_scope_device);)";
|
||||
}
|
||||
else
|
||||
{
|
||||
code += R"(
|
||||
finalDest[dstItemIdx] =
|
||||
atomic_load_explicit(destMemory+dstItemIdx,
|
||||
memory_order_relaxed,
|
||||
memory_scope_device);)";
|
||||
}
|
||||
}
|
||||
code += "}\n"
|
||||
"\n";
|
||||
return code;
|
||||
@@ -1108,6 +1128,15 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
|
||||
log_info("\t\tTest disabled\n");
|
||||
return 0;
|
||||
}
|
||||
if (!LocalMemory() && DeclaredInProgram())
|
||||
{
|
||||
if (((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
|
||||
|| ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0))
|
||||
{
|
||||
log_info("\t\tTest disabled\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// set up work sizes based on device capabilities and test configuration
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
|
||||
|
||||
Reference in New Issue
Block a user