Files
OpenCL-CTS/test_conformance/SVM/test_fine_grain_sync_buffers.cpp
Ben Ashbaugh d99b302f90 switch SVM tests to the new test registration framework (#2168)
Switches the SVM tests to the new test registration framework.

The first commit is the best to review and contains the actual changes.
The second commit purely has formatting changes.

Note that several of these changes were a bit more than mechanical
because many of the SVM tests create a new context vs. using the context
provided by the harness and passed to each test function. The previous
code named the context provided by the harness differently, and hence
could use the name "context" in each test function, but with the new
test registration framework this is no longer possible. Instead, I am
creating the new context using the name "contextWrapper" and then
assigning it to the "context" passed to the test function, which seems
like the best way to avoid using the wrong context unintentionally. I am
open to suggestions to do this differently.

I have verified that the same calls are made before and after these
changes, and specifically that there are no context leaks.
2024-12-03 14:51:23 -08:00

132 lines
4.9 KiB
C++

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "common.h"
const char *find_targets_kernel[] = {
"__kernel void find_targets(__global uint* image, uint target, volatile "
"__global atomic_uint *numTargetsFound, volatile __global atomic_uint "
"*targetLocations)\n"
"{\n"
" size_t i = get_global_id(0);\n"
" uint index;\n"
" if(image[i] == target) {\n"
" index = atomic_fetch_add_explicit(numTargetsFound, 1u, "
"memory_order_relaxed, memory_scope_device); \n"
" atomic_exchange_explicit(&targetLocations[index], i, "
"memory_order_relaxed, memory_scope_all_svm_devices); \n"
" }\n"
"}\n"
};
void spawnAnalysisTask(int location)
{
// printf("found target at location %d\n", location);
}
#define MAX_TARGETS 1024
// Goals: demonstrate use of SVM's atomics to do fine grain synchronization between the device and host.
// Concept: a device kernel is used to search an input image for regions that match a target pattern.
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
REGISTER_TEST(svm_fine_grain_sync_buffers)
{
clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
err = create_cl_objects(deviceID, &find_targets_kernel[0], &contextWrapper,
&program, &queues[0], &num_devices,
CL_DEVICE_SVM_FINE_GRAIN_BUFFER
| CL_DEVICE_SVM_ATOMICS);
context = contextWrapper;
if (err == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (err < 0) return -1; // fail test.
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
test_error(err, "clCreateKernel failed");
size_t num_pixels = num_elements;
// cl_uint num_pixels = 1024*1024*32;
cl_uint *pInputImage = (cl_uint *)clSVMAlloc(
context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(cl_uint) * num_pixels, 0);
cl_uint *pNumTargetsFound = (cl_uint *)clSVMAlloc(
context,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
sizeof(cl_uint), 0);
cl_int *pTargetLocations = (cl_int *)clSVMAlloc(
context,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
sizeof(cl_int) * MAX_TARGETS, 0);
cl_uint targetDescriptor = 777;
*pNumTargetsFound = 0;
cl_uint i;
for (i = 0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
for (i = 0; i < num_pixels; i++) pInputImage[i] = 0;
pInputImage[0] = targetDescriptor;
pInputImage[3] = targetDescriptor;
pInputImage[num_pixels - 1] = targetDescriptor;
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |=
clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&targetDescriptor);
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
test_error(err, "clSetKernelArg failed");
cl_event done;
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL,
0, NULL, &done);
test_error(err, "clEnqueueNDRangeKernel failed");
clFlush(queues[0]);
i = 0;
cl_int status;
// check for new targets, if found spawn a task to analyze target.
do
{
err = clGetEventInfo(done, CL_EVENT_COMMAND_EXECUTION_STATUS,
sizeof(cl_int), &status, NULL);
test_error(err, "clGetEventInfo failed");
if (AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
!= -1) // -1 indicates slot not used yet.
{
spawnAnalysisTask(pTargetLocations[i]);
i++;
}
} while (status != CL_COMPLETE
|| AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
!= -1);
clReleaseEvent(done);
clSVMFree(context, pInputImage);
clSVMFree(context, pNumTargetsFound);
clSVMFree(context, pTargetLocations);
if (i != 3) return -1;
return 0;
}