switch SVM tests to the new test registration framework (#2168)

Switches the SVM tests to the new test registration framework.

The first commit is the best to review and contains the actual changes.
The second commit purely has formatting changes.

Note that several of these changes were a bit more than mechanical
because many of the SVM tests create a new context vs. using the context
provided by the harness and passed to each test function. The previous
code named the context provided by the harness differently, and hence
could use the name "context" in each test function, but with the new
test registration framework this is no longer possible. Instead, I am
creating the new context using the name "contextWrapper" and then
assigning it to the "context" passed to the test function, which seems
like the best way to avoid using the wrong context unintentionally. I am
open to suggestions to do this differently.

I have verified that the same calls are made before and after these
changes, and specifically that there are no context leaks.
This commit is contained in:
Ben Ashbaugh
2024-12-03 14:51:23 -08:00
committed by GitHub
parent e361b387d9
commit d99b302f90
16 changed files with 1066 additions and 854 deletions

View File

@@ -44,67 +44,88 @@ void spawnAnalysisTask(int location)
// Concept: a device kernel is used to search an input image for regions that match a target pattern.
// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
int test_svm_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
REGISTER_TEST(svm_fine_grain_sync_buffers)
{
clContextWrapper context = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
clContextWrapper contextWrapper = NULL;
clProgramWrapper program = NULL;
cl_uint num_devices = 0;
cl_int err = CL_SUCCESS;
clCommandQueueWrapper queues[MAXQ];
err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
if(err < 0) return -1; // fail test.
err = create_cl_objects(deviceID, &find_targets_kernel[0], &contextWrapper,
&program, &queues[0], &num_devices,
CL_DEVICE_SVM_FINE_GRAIN_BUFFER
| CL_DEVICE_SVM_ATOMICS);
context = contextWrapper;
if (err == 1)
return 0; // no devices capable of requested SVM level, so don't execute
// but count test as passing.
if (err < 0) return -1; // fail test.
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
test_error(err, "clCreateKernel failed");
clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
test_error(err, "clCreateKernel failed");
size_t num_pixels = num_elements;
//cl_uint num_pixels = 1024*1024*32;
size_t num_pixels = num_elements;
// cl_uint num_pixels = 1024*1024*32;
cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0);
cl_int *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0);
cl_uint *pInputImage = (cl_uint *)clSVMAlloc(
context, CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
sizeof(cl_uint) * num_pixels, 0);
cl_uint *pNumTargetsFound = (cl_uint *)clSVMAlloc(
context,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
sizeof(cl_uint), 0);
cl_int *pTargetLocations = (cl_int *)clSVMAlloc(
context,
CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
sizeof(cl_int) * MAX_TARGETS, 0);
cl_uint targetDescriptor = 777;
*pNumTargetsFound = 0;
cl_uint i;
for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
for(i=0; i < num_pixels; i++) pInputImage[i] = 0;
pInputImage[0] = targetDescriptor;
pInputImage[3] = targetDescriptor;
pInputImage[num_pixels - 1] = targetDescriptor;
cl_uint targetDescriptor = 777;
*pNumTargetsFound = 0;
cl_uint i;
for (i = 0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
for (i = 0; i < num_pixels; i++) pInputImage[i] = 0;
pInputImage[0] = targetDescriptor;
pInputImage[3] = targetDescriptor;
pInputImage[num_pixels - 1] = targetDescriptor;
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor);
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
test_error(err, "clSetKernelArg failed");
err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
err |=
clSetKernelArg(kernel, 1, sizeof(cl_uint), (void *)&targetDescriptor);
err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
test_error(err, "clSetKernelArg failed");
cl_event done;
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done);
test_error(err,"clEnqueueNDRangeKernel failed");
clFlush(queues[0]);
cl_event done;
err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL,
0, NULL, &done);
test_error(err, "clEnqueueNDRangeKernel failed");
clFlush(queues[0]);
i=0;
cl_int status;
// check for new targets, if found spawn a task to analyze target.
do {
err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
test_error(err,"clGetEventInfo failed");
if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1) // -1 indicates slot not used yet.
i = 0;
cl_int status;
// check for new targets, if found spawn a task to analyze target.
do
{
spawnAnalysisTask(pTargetLocations[i]);
i++;
}
} while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1);
err = clGetEventInfo(done, CL_EVENT_COMMAND_EXECUTION_STATUS,
sizeof(cl_int), &status, NULL);
test_error(err, "clGetEventInfo failed");
if (AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
!= -1) // -1 indicates slot not used yet.
{
spawnAnalysisTask(pTargetLocations[i]);
i++;
}
} while (status != CL_COMPLETE
|| AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed)
!= -1);
clReleaseEvent(done);
clSVMFree(context, pInputImage);
clSVMFree(context, pNumTargetsFound);
clSVMFree(context, pTargetLocations);
clReleaseEvent(done);
clSVMFree(context, pInputImage);
clSVMFree(context, pNumTargetsFound);
clSVMFree(context, pTargetLocations);
if(i != 3) return -1;
return 0;
if (i != 3) return -1;
return 0;
}