Specify memory scope and memory order for the atomic operations in generic_address_space generic_atomics_variant generic_atomics_invariant (#2550)

Use the explicit version of the atomic_load/store and atomic_fetch_add
with memory order relaxed and memory scope workgroup to allow devices
that only support the minimum CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES which
are (CL_DEVICE_ATOMIC_ORDER_RELAXED | CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP)
to run the tests.

The test should only require the relaxed ordering and memory scope
workgroup anyway.

From the specificiation:

"The non-explicit atomic_store function requires support for OpenCL C
2.0, or OpenCL C 3.0 or newer and both the
__opencl_c_atomic_order_seq_cst and __opencl_c_atomic_scope_device
features"

"The non-explicit atomic_load function requires support for OpenCL C 2.0
or OpenCL C 3.0 or newer and both the __opencl_c_atomic_order_seq_cst
and __opencl_c_atomic_scope_device features."

"The non-explicit atomic_fetch_key functions require support for OpenCL
C 2.0, or OpenCL C 3.0 or newer and both the
__opencl_c_atomic_order_seq_cst and __opencl_c_atomic_scope_device
features."
This commit is contained in:
Ahmed
2025-10-28 16:04:51 +00:00
committed by GitHub
parent d63cc8ce5d
commit c6e0f416e7

View File

@@ -35,7 +35,7 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
int wgid = get_group_id(0); int wgid = get_group_id(0);
int wgsize = get_local_size(0); int wgsize = get_local_size(0);
if (tid == 0) atomic_store(localPtr, 0); if (tid == 0) atomic_store_explicit(localPtr, 0, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
@@ -47,12 +47,12 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
if ((wgid % 2) == 0) if ((wgid % 2) == 0)
ptr = localPtr; ptr = localPtr;
int inc = atomic_fetch_add(ptr, 1); int inc = atomic_fetch_add_explicit(ptr, 1, memory_order_relaxed, memory_scope_work_group);
// In the cases where the local memory ptr was used, // In the cases where the local memory ptr was used,
// save off the final value. // save off the final value.
if ((wgid % 2) == 0 && inc == (wgsize-1)) if ((wgid % 2) == 0 && inc == (wgsize-1))
atomic_store(&globalPtr[wgid], inc); atomic_store_explicit(&globalPtr[wgid], inc, memory_order_relaxed, memory_scope_work_group);
} }
)OpenCLC"; )OpenCLC";
@@ -67,7 +67,7 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
int wgid = get_group_id(0); int wgid = get_group_id(0);
int wgsize = get_local_size(0); int wgsize = get_local_size(0);
if (tid == 0) atomic_store(localPtr, 0); if (tid == 0) atomic_store_explicit(localPtr, 0, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
@@ -79,14 +79,17 @@ kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr)
if ((tid % 2) == 0) if ((tid % 2) == 0)
ptr = localPtr; ptr = localPtr;
atomic_fetch_add(ptr, 1); atomic_fetch_add_explicit(ptr, 1, memory_order_relaxed, memory_scope_work_group);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
// In the cases where the local memory ptr was used, // In the cases where the local memory ptr was used,
// save off the final value. // save off the final value.
if (tid == 0) if (tid == 0)
atomic_store(&globalPtr[(wgid * 2) + 1], atomic_load(localPtr)); atomic_store_explicit(&globalPtr[(wgid * 2) + 1],
atomic_load_explicit(localPtr, memory_order_relaxed, memory_scope_work_group),
memory_order_relaxed,
memory_scope_work_group);
} }
)OpenCLC"; )OpenCLC";
} }