Fix memory leaks in test_profiling execute_multipass (#2390)

- Fixed leaks caused by missing clRelease calls
- Improved error handling readability

Signed-off-by: Oskar Hubert Weber
[oskar.hubert.weber@intel.com](mailto:oskar.hubert.weber@intel.com)

---------

Signed-off-by: Oskar Hubert Weber <oskar.hubert.weber@intel.com>
This commit is contained in:
Oskar Hubert Weber
2025-07-08 01:34:44 +02:00
committed by GitHub
parent c4b16940e4
commit 3065a62f77

View File

@@ -24,6 +24,7 @@
#include "procs.h" #include "procs.h"
#include "harness/testHarness.h" #include "harness/testHarness.h"
#include "harness/typeWrappers.h"
#include "harness/errorHelpers.h" #include "harness/errorHelpers.h"
static const char *read3d_kernel_code = static const char *read3d_kernel_code =
@@ -90,11 +91,11 @@ static int verifyImages( cl_uchar *ptr0, cl_uchar *ptr1, cl_uchar tolerance, int
static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue, static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue,
int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr ) int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr )
{ {
cl_program program[1]; clProgramWrapper program;
cl_kernel kernel[1]; clKernelWrapper kernel;
cl_mem memobjs[2]; clMemWrapper memobjs[2];
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 }; cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
cl_event executeEvent = NULL; clEventWrapper executeEvent = NULL;
cl_ulong queueStart, submitStart, writeStart, writeEnd; cl_ulong queueStart, submitStart, writeStart, writeEnd;
size_t threads[3]; size_t threads[3];
size_t localThreads[3]; size_t localThreads[3];
@@ -108,18 +109,11 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
3 * sizeof(size_t), (size_t *)localThreads, NULL); 3 * sizeof(size_t), (size_t *)localThreads, NULL);
if (err) test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed");
{
log_error("clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed\n");
return -1;
}
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
&maxWorkgroupSize, NULL); &maxWorkgroupSize, NULL);
if (err) test_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
{
log_error("clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed\n");
return -1;
}
localThreads[0] = localThreads[0] =
std::min({ localThreads[0], threads[0], maxWorkgroupSize }); std::min({ localThreads[0], threads[0], maxWorkgroupSize });
localThreads[1] = std::min( localThreads[1] = std::min(
@@ -128,121 +122,65 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
std::min({ localThreads[2], threads[2], std::min({ localThreads[2], threads[2],
maxWorkgroupSize / (localThreads[0] * localThreads[1]) }); maxWorkgroupSize / (localThreads[0] * localThreads[1]) });
cl_sampler sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err ); clSamplerWrapper sampler = clCreateSampler(
if( err ){ context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
log_error( " clCreateSampler failed.\n" ); test_error(err, "clCreateSampler failed");
return -1;
}
// allocate the input and output image memory objects // allocate the input and output image memory objects
memobjs[0] = memobjs[0] =
create_image_3d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, create_image_3d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
&image_format_desc, w, h, d, 0, 0, inptr, &err); &image_format_desc, w, h, d, 0, 0, inptr, &err);
if( memobjs[0] == (cl_mem)0 ){ test_error(err, "unable to create 3D image using create_image_3d");
log_error( " unable to create 2D image using create_image_2d\n" );
return -1;
}
// allocate an array memory object to load the filter weights // allocate an array memory object to load the filter weights
size_t outptr_size = sizeof(cl_uchar) * w * h * d * nChannels; size_t outptr_size = sizeof(cl_uchar) * w * h * d * nChannels;
memobjs[1] = memobjs[1] =
clCreateBuffer(context, CL_MEM_READ_WRITE, outptr_size, NULL, &err); clCreateBuffer(context, CL_MEM_READ_WRITE, outptr_size, NULL, &err);
if( memobjs[1] == (cl_mem)0 ){ test_error(err, "unable to create array using clCreateBuffer");
log_error( " unable to create array using clCreateBuffer\n" );
clReleaseMemObject( memobjs[0] );
return -1;
}
// create the compute program // create the compute program
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &read3d_kernel_code, "read3d" ); err = create_single_kernel_helper(context, &program, &kernel, 1,
if( err ){ &read3d_kernel_code, "read3d");
clReleaseMemObject( memobjs[1] ); test_error(err, "create_single_kernel_helper failed");
clReleaseMemObject( memobjs[0] );
return -1;
}
// create kernel args object and set arg values. // create kernel args object and set arg values.
// set the args values // set the args values
err |= clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] ); err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobjs[0]);
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] ); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&memobjs[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler); err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
test_error(err, "clSetKernelArg failed");
if( err != CL_SUCCESS ){ err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, localThreads,
print_error( err, "clSetKernelArg failed\n" ); 0, NULL, &executeEvent);
clReleaseKernel( kernel[0] ); test_error(err, "clEnqueueNDRangeKernel failed");
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 3, NULL, threads, localThreads, 0, NULL, &executeEvent );
if( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
if (executeEvent) { if (executeEvent) {
// This synchronization point is needed in order to assume the data is valid. // This synchronization point is needed in order to assume the data is valid.
// Getting profiling information is not a synchronization point. // Getting profiling information is not a synchronization point.
err = clWaitForEvents( 1, &executeEvent ); err = clWaitForEvents( 1, &executeEvent );
if( err != CL_SUCCESS ) test_error(err, "clWaitForEvents failed");
{
print_error( err, "clWaitForEvents failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// test profiling // test profiling
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE ); while ((err = clGetEventProfilingInfo(
if( err != CL_SUCCESS ){ executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong),
print_error( err, "clGetEventProfilingInfo failed" ); &queueStart, NULL))
clReleaseKernel( kernel[0] ); == CL_PROFILING_INFO_NOT_AVAILABLE)
clReleaseProgram( program[0] ); ;
clReleaseMemObject( memobjs[1] ); test_error(err, "clGetEventProfilingInfo failed");
clReleaseMemObject( memobjs[0] );
return -1;
}
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE ); while ((err = clGetEventProfilingInfo(
if( err != CL_SUCCESS ){ executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong),
print_error( err, "clGetEventProfilingInfo failed" ); &submitStart, NULL))
clReleaseKernel( kernel[0] ); == CL_PROFILING_INFO_NOT_AVAILABLE)
clReleaseProgram( program[0] ); ;
clReleaseMemObject( memobjs[1] ); test_error(err, "clGetEventProfilingInfo failed");
clReleaseMemObject( memobjs[0] );
return -1;
}
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL ); err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
if( err != CL_SUCCESS ){ test_error(err, "clGetEventProfilingInfo failed");
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL ); err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
if( err != CL_SUCCESS ){ test_error(err, "clGetEventProfilingInfo failed");
print_error( err, "clGetEventProfilingInfo failed" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
log_info( "Profiling info:\n" ); log_info( "Profiling info:\n" );
log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f ); log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f );
@@ -252,23 +190,9 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
// read output image // read output image
err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, outptr_size, err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, outptr_size,
outptr, 0, NULL, NULL); outptr, 0, NULL, NULL);
if( err != CL_SUCCESS ){ test_error(err, "clReadImage failed");
print_error( err, "clReadImage failed\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return -1;
}
// release kernel, program, and memory objects
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseMemObject( memobjs[1] );
clReleaseMemObject( memobjs[0] );
return err; return err;
} // end run_kernel() } // end run_kernel()