Mem-leaks from conformance pipes (#772)

Fix various memory leaks around events.
Convert test to use supplied typewrappers
to avoid memory leaks. Also use error helper
functions to reduce code size.
Use stringstreams to synthesize kernel sources, and
raw c+11 string literals.

Signed-off-by: John Kesapides <john.kesapides@arm.com>
This commit is contained in:
John Kesapides
2020-05-22 13:26:05 +01:00
committed by GitHub
parent ee2d0921dc
commit 094cc04e16
6 changed files with 790 additions and 1937 deletions

View File

@@ -21,29 +21,25 @@ const char* pipe_kernel_code = {
int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem pipe;
clMemWrapper pipe;
cl_int err;
cl_uint pipe_width = 512;
cl_uint pipe_depth = 1024;
cl_uint returnVal;
cl_program program;
cl_kernel kernel;
clProgramWrapper program;
clKernelWrapper kernel;
pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, pipe_width, pipe_depth, NULL, &err);
pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, pipe_width, pipe_depth,
NULL, &err);
test_error(err, "clCreatePipe failed.");
err = clGetPipeInfo(pipe, CL_PIPE_PACKET_SIZE, sizeof(pipe_width), (void *)&returnVal, NULL);
if ( err )
{
log_error( "Error calling clGetPipeInfo(): %d\n", err );
clReleaseMemObject(pipe);
return -1;
}
err = clGetPipeInfo(pipe, CL_PIPE_PACKET_SIZE, sizeof(pipe_width),
(void *)&returnVal, NULL);
test_error(err, "clGetPipeInfo failed.");
if(pipe_width != returnVal)
if (pipe_width != returnVal)
{
log_error( "Error in clGetPipeInfo() check of pipe packet size\n" );
clReleaseMemObject(pipe);
log_error("Error in clGetPipeInfo() check of pipe packet size\n");
return -1;
}
else
@@ -52,17 +48,11 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue
}
err = clGetPipeInfo(pipe, CL_PIPE_MAX_PACKETS, sizeof(pipe_depth), (void *)&returnVal, NULL);
if ( err )
{
log_error( "Error calling clGetPipeInfo(): %d\n", err );
clReleaseMemObject(pipe);
return -1;
}
test_error(err, "clGetPipeInfo failed.");
if(pipe_depth != returnVal)
{
log_error( "Error in clGetPipeInfo() check of pipe max packets\n" );
clReleaseMemObject(pipe);
return -1;
}
else
@@ -71,39 +61,20 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue
}
err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, (const char**)&pipe_kernel_code, "pipe_kernel", "-cl-std=CL2.0 -cl-kernel-arg-info");
if(err)
{
clReleaseMemObject(pipe);
print_error(err, "Error creating program\n");
return -1;
}
test_error_ret(err, " Error creating program", -1);
cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
cl_kernel_arg_type_qualifier expected_type_qualifier = CL_KERNEL_ARG_TYPE_PIPE;
err = clGetKernelArgInfo( kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(arg_type_qualifier), &arg_type_qualifier, NULL );
if(err)
{
clReleaseMemObject(pipe);
clReleaseKernel(kernel);
clReleaseProgram(program);
print_error(err, "clSetKernelArg failed\n");
return -1;
}
test_error_ret(err, " clSetKernelArgInfo failed", -1);
err = (arg_type_qualifier != expected_type_qualifier);
if(err)
{
clReleaseMemObject(pipe);
clReleaseKernel(kernel);
clReleaseProgram(program);
print_error(err, "ERROR: Bad type qualifier\n");
return -1;
}
// cleanup
clReleaseMemObject(pipe);
clReleaseKernel(kernel);
clReleaseProgram(program);
return err;
}

File diff suppressed because it is too large Load Diff

View File

@@ -79,28 +79,32 @@ static int verify_result(void *ptr1, void *ptr2, int n)
int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem pipe;
cl_mem buffers[4];
void *outptr1;
void *outptr2;
cl_int *inptr;
cl_program program;
cl_kernel kernel[3];
size_t global_work_size[3];
size_t half_global_work_size[3];
size_t global_work_size_pipe_query[3];
cl_int pipe_max_packets, pipe_num_packets;
cl_int err;
cl_int size;
cl_int i;
cl_event producer_sync_event = NULL;
cl_event consumer_sync_event = NULL;
cl_event pipe_query_sync_event = NULL;
cl_event pipe_read_sync_event = NULL;
MTdata d = init_genrand( gRandomSeed );
const char* kernelName[] = {"test_pipe_write", "test_pipe_read", "test_pipe_query_functions"};
clMemWrapper pipe;
clMemWrapper buffers[4];
void *outptr1;
void *outptr2;
cl_int *inptr;
clProgramWrapper program;
clKernelWrapper kernel[3];
size_t global_work_size[3];
size_t half_global_work_size[3];
size_t global_work_size_pipe_query[3];
cl_int pipe_max_packets, pipe_num_packets;
cl_int err;
cl_int size;
cl_int i;
clEventWrapper producer_sync_event = NULL;
clEventWrapper consumer_sync_event = NULL;
clEventWrapper pipe_query_sync_event = NULL;
clEventWrapper pipe_read_sync_event = NULL;
BufferOwningPtr<cl_int> BufferInPtr;
BufferOwningPtr<cl_int> BufferOutPtr1;
BufferOwningPtr<cl_int> BufferOutPtr2;
MTdataHolder d(gRandomSeed);
const char *kernelName[] = { "test_pipe_write", "test_pipe_read",
"test_pipe_query_functions" };
size_t min_alignment = get_min_alignment(context);
size_t min_alignment = get_min_alignment(context);
size = sizeof(int) * num_elements;
global_work_size[0] = (cl_uint)num_elements;
@@ -109,98 +113,43 @@ int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_comm
inptr = (int *)align_malloc(size, min_alignment);
for(i = 0; i < num_elements; i++){
for (i = 0; i < num_elements; i++)
{
inptr[i] = TEST_PRIME_INT;
}
BufferInPtr.reset(inptr, nullptr, 0, size, true);
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
if(err){
clReleaseMemObject(buffers[0]);
print_error(err, " clCreateBuffer failed\n");
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
outptr1 = align_malloc(size/2, min_alignment);
outptr2 = align_malloc(size, min_alignment);
BufferOutPtr1.reset(outptr1, nullptr, 0, size, true);
BufferOutPtr2.reset(outptr2, nullptr, 0, size, true);
buffers[1] = clCreateBuffer(context, CL_MEM_HOST_READ_ONLY, size, NULL, &err);
if ( err ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
align_free( outptr1 );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
buffers[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err);
if ( err ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
align_free( outptr1 );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
buffers[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int), NULL, &err);
if ( err ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
align_free( outptr1 );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), num_elements, NULL, &err);
if(err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
align_free( outptr1 );
clReleaseMemObject(pipe);
print_error(err, " clCreatePipe failed\n");
return -1;
}
test_error_ret(err, " clCreatePipe failed", -1);
// Create producer kernel
err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_query_functions_kernel_code, kernelName[0], "-cl-std=CL2.0");
if(err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
align_free(outptr1);
print_error(err, "Error creating program\n");
return -1;
}
test_error_ret(err, " Error creating program", -1);
//Create consumer kernel
kernel[1] = clCreateKernel(program, kernelName[1], &err);
if( kernel[1] == NULL || err != CL_SUCCESS)
{
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
align_free(outptr1);
print_error(err, "Error creating kernel\n");
return -1;
}
test_error_ret(err, " Error creating kernel", -1);
//Create pipe query functions kernel
kernel[2] = clCreateKernel(program, kernelName[2], &err);
if( kernel[1] == NULL || err != CL_SUCCESS)
{
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
align_free(outptr1);
print_error(err, "Error creating kernel\n");
return -1;
}
test_error_ret(err, " Error creating kernel", -1);
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
@@ -209,104 +158,21 @@ int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_comm
err |= clSetKernelArg(kernel[2], 0, sizeof(cl_mem), (void*)&pipe);
err |= clSetKernelArg(kernel[2], 1, sizeof(cl_mem), (void*)&buffers[2]);
err |= clSetKernelArg(kernel[2], 2, sizeof(cl_mem), (void*)&buffers[3]);
if ( err != CL_SUCCESS ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseProgram(program);
align_free(outptr1);
print_error(err, " clSetKernelArg failed\n");
return -1;
}
test_error_ret(err, " clSetKernelArg failed", -1);
// Launch Producer kernel
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
// Launch Pipe query kernel
err = clEnqueueNDRangeKernel( queue, kernel[2], 1, NULL, global_work_size_pipe_query, NULL, 1, &producer_sync_event, &pipe_query_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(cl_int), &pipe_num_packets, 1, &pipe_query_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
err = clEnqueueReadBuffer(queue, buffers[3], true, 0, sizeof(cl_int), &pipe_max_packets, 1, &pipe_query_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if(pipe_num_packets != num_elements || pipe_max_packets != num_elements)
{
@@ -316,85 +182,20 @@ int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_comm
// Launch Consumer kernel with half the previous global size
err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, half_global_work_size, NULL, 1, &producer_sync_event, &consumer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size / 2, outptr1, 1, &consumer_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
// We will reuse this variable so release the previous referred event.
clReleaseEvent(pipe_query_sync_event);
// Launch Pipe query kernel
err = clEnqueueNDRangeKernel( queue, kernel[2], 1, NULL, global_work_size_pipe_query, NULL, 1, &consumer_sync_event, &pipe_query_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(cl_int), &pipe_num_packets, 1, &pipe_query_sync_event, &pipe_read_sync_event);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
// After consumer kernel consumes num_elements/2 from the pipe,
// there are (num_elements - num_elements/2) remaining package in the pipe.
@@ -404,68 +205,24 @@ int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_comm
return -1;
}
// We will reuse this variable so release the previous referred event.
clReleaseEvent(producer_sync_event);
// Launch Producer kernel to fill the pipe again
global_work_size[0] = pipe_num_packets;
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 1, &pipe_read_sync_event, &producer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
// We will reuse this variable so release the previous referred event.
clReleaseEvent(pipe_query_sync_event);
// Launch Pipe query kernel
err = clEnqueueNDRangeKernel( queue, kernel[2], 1, NULL, global_work_size_pipe_query, NULL, 1, &producer_sync_event, &pipe_query_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
// We will reuse this variable so release the previous referred event.
clReleaseEvent(pipe_read_sync_event);
err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(cl_int), &pipe_num_packets, 1, &pipe_query_sync_event, &pipe_read_sync_event);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if(pipe_num_packets != num_elements)
{
@@ -473,48 +230,16 @@ int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_comm
return -1;
}
// We will reuse this variable so release the previous referred event.
clReleaseEvent(consumer_sync_event);
// Launch Consumer kernel to consume all packets from pipe
global_work_size[0] = pipe_num_packets;
err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 1, &pipe_read_sync_event, &consumer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size, outptr2, 1, &consumer_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed\n" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if( verify_result(outptr1, outptr2, num_elements )){
log_error("test_pipe_query_functions failed\n");
@@ -523,22 +248,6 @@ int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_comm
else {
log_info("test_pipe_query_functions passed\n");
}
//cleanup
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(buffers[3]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseKernel(kernel[2]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseEvent(pipe_query_sync_event);
clReleaseEvent(pipe_read_sync_event);
clReleaseProgram(program);
align_free(outptr1);
return 0;
}

View File

@@ -15,11 +15,15 @@
//
#include "harness/compat.h"
#include <assert.h>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <string>
#include <sys/stat.h>
#include <assert.h>
#include <sys/types.h>
#include "procs.h"
#include "kernels.h"
@@ -89,113 +93,139 @@ static const char* convenience_float_kernel_name[] = { "test_pipe_convenience_wr
static const char* convenience_half_kernel_name[] = { "test_pipe_convenience_write_half", "test_pipe_convenience_read_half", "test_pipe_convenience_write_half2", "test_pipe_convenience_read_half2", "test_pipe_convenience_write_half4", "test_pipe_convenience_read_half4", "test_pipe_convenience_write_half8", "test_pipe_convenience_read_half8", "test_pipe_convenience_write_half16", "test_pipe_convenience_read_half16" };
static const char* convenience_double_kernel_name[] = { "test_pipe_convenience_write_double", "test_pipe_convenience_read_double", "test_pipe_convenience_write_double2", "test_pipe_convenience_read_double2", "test_pipe_convenience_write_double4", "test_pipe_convenience_read_double4", "test_pipe_convenience_write_double8", "test_pipe_convenience_read_double8", "test_pipe_convenience_write_double16", "test_pipe_convenience_read_double16" };
static void insertPragmaForHalfType(char *source, char *type)
static void insertPragmaForHalfType(std::stringstream &stream, char *type)
{
source[0] = 0;
if(strncmp(type, "half",4) == 0)
if (strncmp(type, "half", 4) == 0)
{
strcat(source, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
stream << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
}
}
void createKernelSource(char *source, char *type)
void createKernelSource(std::stringstream &stream, char *type)
{
char str[512];
int str_length;
insertPragmaForHalfType(stream, type);
insertPragmaForHalfType(source, type);
// clang-format off
stream << R"(
__kernel void test_pipe_write_)" << type << "(__global " << type << " *src, __write_only pipe " << type << R"( out_pipe)
{
int gid = get_global_id(0);
reserve_id_t res_id;
sprintf(str, "__kernel void test_pipe_write_%s(__global %s *src, __write_only pipe %s out_pipe)\n", type, type, type);
strcat(source, str);
sprintf(str, "{\n int gid = get_global_id(0);\n reserve_id_t res_id; \n\n");
strcat(source, str);
sprintf(str, " res_id = reserve_write_pipe(out_pipe, 1);\n if(is_valid_reserve_id(res_id))\n {\n");
strcat(source, str);
sprintf(str, " write_pipe(out_pipe, res_id, 0, &src[gid]);\n commit_write_pipe(out_pipe, res_id);\n }\n}\n\n");
strcat(source, str);
sprintf(str, "__kernel void test_pipe_read_%s(__read_only pipe %s in_pipe, __global %s *dst)\n", type, type, type);
strcat(source, str);
sprintf(str, "{\n int gid = get_global_id(0);\n reserve_id_t res_id; \n\n");
strcat(source, str);
sprintf(str, " res_id = reserve_read_pipe(in_pipe, 1);\n if(is_valid_reserve_id(res_id))\n {\n");
strcat(source, str);
sprintf(str, " read_pipe(in_pipe, res_id, 0, &dst[gid]);\n commit_read_pipe(in_pipe, res_id);\n }\n}\n");
strcat(source, str);
str_length = strlen(source);
assert(str_length <= STRING_LENGTH);
res_id = reserve_write_pipe(out_pipe, 1);
if(is_valid_reserve_id(res_id))
{
write_pipe(out_pipe, res_id, 0, &src[gid]);
commit_write_pipe(out_pipe, res_id);
}
}
__kernel void test_pipe_read_)" << type << "(__read_only pipe " << type << " in_pipe, __global " << type << R"( *dst)
{
int gid = get_global_id(0);
reserve_id_t res_id;
res_id = reserve_read_pipe(in_pipe, 1);
if(is_valid_reserve_id(res_id))
{
read_pipe(in_pipe, res_id, 0, &dst[gid]);
commit_read_pipe(in_pipe, res_id);
}
}
)";
// clang-format on
}
void createKernelSourceWorkGroup(char *source, char *type)
void createKernelSourceWorkGroup(std::stringstream &stream, char *type)
{
char str[512];
int str_length;
insertPragmaForHalfType(stream, type);
insertPragmaForHalfType(source, type);
// clang-format off
stream << R"(
__kernel void test_pipe_workgroup_write_)" << type << "(__global " << type << " *src, __write_only pipe " << type << R"( out_pipe)
{
int gid = get_global_id(0);
__local reserve_id_t res_id;
sprintf(str, "__kernel void test_pipe_workgroup_write_%s(__global %s *src, __write_only pipe %s out_pipe)\n", type, type, type);
strcat(source, str);
sprintf(str, "{\n int gid = get_global_id(0);\n __local reserve_id_t res_id; \n\n");
strcat(source, str);
sprintf(str, " res_id = work_group_reserve_write_pipe(out_pipe, get_local_size(0));\n if(is_valid_reserve_id(res_id))\n {\n");
strcat(source, str);
sprintf(str, " write_pipe(out_pipe, res_id, get_local_id(0), &src[gid]);\n work_group_commit_write_pipe(out_pipe, res_id);\n }\n}\n\n");
strcat(source, str);
sprintf(str, "__kernel void test_pipe_workgroup_read_%s(__read_only pipe %s in_pipe, __global %s *dst)\n", type, type, type);
strcat(source, str);
sprintf(str, "{\n int gid = get_global_id(0);\n __local reserve_id_t res_id; \n\n");
strcat(source, str);
sprintf(str, " res_id = work_group_reserve_read_pipe(in_pipe, get_local_size(0));\n if(is_valid_reserve_id(res_id))\n {\n");
strcat(source, str);
sprintf(str, " read_pipe(in_pipe, res_id, get_local_id(0), &dst[gid]);\n work_group_commit_read_pipe(in_pipe, res_id);\n }\n}\n");
strcat(source, str);
str_length = strlen(source);
assert(str_length <= STRING_LENGTH);
res_id = work_group_reserve_write_pipe(out_pipe, get_local_size(0));
if(is_valid_reserve_id(res_id))
{
write_pipe(out_pipe, res_id, get_local_id(0), &src[gid]);
work_group_commit_write_pipe(out_pipe, res_id);
}
}
__kernel void test_pipe_workgroup_read_)" << type << "(__read_only pipe " << type << " in_pipe, __global " << type << R"( *dst)
{
int gid = get_global_id(0);
__local reserve_id_t res_id;
res_id = work_group_reserve_read_pipe(in_pipe, get_local_size(0));
if(is_valid_reserve_id(res_id))
{
read_pipe(in_pipe, res_id, get_local_id(0), &dst[gid]);
work_group_commit_read_pipe(in_pipe, res_id);
}
}
)";
// clang-format on
}
void createKernelSourceSubGroup(char *source, char *type)
void createKernelSourceSubGroup(std::stringstream &stream, char *type)
{
char str[512];
int str_length;
insertPragmaForHalfType(stream, type);
insertPragmaForHalfType(source, type);
// clang-format off
stream << R"(
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
__kernel void test_pipe_subgroup_write_)" << type << "(__global " << type << " *src, __write_only pipe " << type << R"( out_pipe)
{
int gid = get_global_id(0);
reserve_id_t res_id;
sprintf(str, "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n__kernel void test_pipe_subgroup_write_%s(__global %s *src, __write_only pipe %s out_pipe)\n", type, type, type);
strcat(source, str);
sprintf(str, "{\n int gid = get_global_id(0);\n reserve_id_t res_id; \n\n");
strcat(source, str);
sprintf(str, " res_id = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());\n if(is_valid_reserve_id(res_id))\n {\n");
strcat(source, str);
sprintf(str, " write_pipe(out_pipe, res_id, get_sub_group_local_id(), &src[gid]);\n sub_group_commit_write_pipe(out_pipe, res_id);\n }\n}\n\n");
strcat(source, str);
sprintf(str, "__kernel void test_pipe_subgroup_read_%s(__read_only pipe %s in_pipe, __global %s *dst)\n", type, type, type);
strcat(source, str);
sprintf(str, "{\n int gid = get_global_id(0);\n reserve_id_t res_id; \n\n");
strcat(source, str);
sprintf(str, " res_id = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());\n if(is_valid_reserve_id(res_id))\n {\n");
strcat(source, str);
sprintf(str, " read_pipe(in_pipe, res_id, get_sub_group_local_id(), &dst[gid]);\n sub_group_commit_read_pipe(in_pipe, res_id);\n }\n}\n");
strcat(source, str);
str_length = strlen(source);
assert(str_length <= STRING_LENGTH);
res_id = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());
if(is_valid_reserve_id(res_id))
{
write_pipe(out_pipe, res_id, get_sub_group_local_id(), &src[gid]);
sub_group_commit_write_pipe(out_pipe, res_id);
}
}
__kernel void test_pipe_subgroup_read_)" << type << "(__read_only pipe " << type << " in_pipe, __global " << type << R"( *dst)
{
int gid = get_global_id(0);
reserve_id_t res_id;
res_id = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());
if(is_valid_reserve_id(res_id))
{
read_pipe(in_pipe, res_id, get_sub_group_local_id(), &dst[gid]);
sub_group_commit_read_pipe(in_pipe, res_id);
}
}
)";
// clang-format on
}
void createKernelSourceConvenience(char *source, char *type)
void createKernelSourceConvenience(std::stringstream &stream, char *type)
{
char str[512];
int str_length;
insertPragmaForHalfType(stream, type);
insertPragmaForHalfType(source, type);
// clang-format off
stream << R"(
__kernel void test_pipe_convenience_write_)" << type << "(__global " << type << " *src, __write_only pipe " << type << R"( out_pipe)
{
int gid = get_global_id(0);
write_pipe(out_pipe, &src[gid]);
}
sprintf(str, "__kernel void test_pipe_convenience_write_%s(__global %s *src, __write_only pipe %s out_pipe)\n", type, type, type);
strcat(source, str);
sprintf(str, "{\n int gid = get_global_id(0);\n write_pipe(out_pipe, &src[gid]);\n}\n\n");
strcat(source, str);
sprintf(str, "__kernel void test_pipe_convenience_read_%s(__read_only pipe %s in_pipe, __global %s *dst)\n", type, type, type);
strcat(source, str);
sprintf(str, "{\n int gid = get_global_id(0);\n read_pipe(in_pipe, &dst[gid]);\n}\n");
strcat(source, str);
str_length = strlen(source);
assert(str_length <= STRING_LENGTH);
__kernel void test_pipe_convenience_read_)" << type << "(__read_only pipe " << type << " in_pipe, __global " << type << R"( *dst)
{
int gid = get_global_id(0);
read_pipe(in_pipe, &dst[gid]);
}
)";
// clang-format on
}
// verify functions
@@ -424,23 +454,24 @@ static int verify_readwrite_struct(void *ptr1, void *ptr2, int n)
int test_pipe_readwrite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
void *inptr[5], const char *kernelName[], int (*fn)(void *, void *, int) )
{
cl_mem pipes[5];
cl_mem buffers[10];
void *outptr[5];
cl_program program[5];
cl_kernel kernel[10];
size_t global_work_size[3];
size_t local_work_size[3];
cl_int err;
int i, ii;
size_t ptrSizes[5];
int total_errors = 0;
cl_event producer_sync_event[5];
cl_event consumer_sync_event[5];
char *sourceCode[5];
char vector_type[10];
clMemWrapper pipes[5];
clMemWrapper buffers[10];
void *outptr[5];
BufferOwningPtr<cl_int> BufferOutPtr[5];
clProgramWrapper program[5];
clKernelWrapper kernel[10];
size_t global_work_size[3];
size_t local_work_size[3];
cl_int err;
int i, ii;
size_t ptrSizes[5];
int total_errors = 0;
clEventWrapper producer_sync_event[5];
clEventWrapper consumer_sync_event[5];
std::stringstream sourceCode[5];
char vector_type[10];
size_t min_alignment = get_min_alignment(context);
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
@@ -450,217 +481,133 @@ int test_pipe_readwrite( cl_device_id deviceID, cl_context context, cl_command_q
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
for( i = 0; i < loops; i++)
for (i = 0; i < loops; i++)
{
ii = i << 1;
sourceCode[i] = (char*) malloc(STRING_LENGTH * sizeof(char));
buffers[ii] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, ptrSizes[i] * num_elements, inptr[i], &err);
if(err){
clReleaseMemObject(buffers[ii]);
align_free( outptr[i] );
print_error(err, " clCreateBuffer failed\n");
return -1;
}
outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
buffers[ii+1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, ptrSizes[i] * num_elements, outptr[i], &err);
if ( err ){
clReleaseMemObject(buffers[ii]);
align_free( outptr[i] );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
buffers[ii] =
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
ptrSizes[i] * num_elements, inptr[i], &err);
test_error_ret(err, " clCreateBuffer failed", -1);
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
BufferOutPtr[i].reset(outptr[i], nullptr, 0, size, true);
buffers[ii + 1] =
clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
ptrSizes[i] * num_elements, outptr[i], &err);
test_error_ret(err, " clCreateBuffer failed", -1);
// Creating pipe with non-power of 2 size
pipes[i] = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, ptrSizes[i], num_elements+3, NULL, &err);
if(err){
clReleaseMemObject(pipes[i]);
print_error(err, " clCreatePipe failed\n");
return -1;
}
pipes[i] = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, ptrSizes[i],
num_elements + 3, NULL, &err);
test_error_ret(err, " clCreatePipe failed", -1);
switch(i)
switch (i)
{
case 0:
sprintf(vector_type, "%s", type);
break;
case 1:
sprintf(vector_type, "%s%d", type, 2);
break;
case 2:
sprintf(vector_type, "%s%d", type, 4);
break;
case 3:
sprintf(vector_type, "%s%d", type, 8);
break;
case 4:
sprintf(vector_type, "%s%d", type, 16);
break;
case 0: sprintf(vector_type, "%s", type); break;
case 1: sprintf(vector_type, "%s%d", type, 2); break;
case 2: sprintf(vector_type, "%s%d", type, 4); break;
case 3: sprintf(vector_type, "%s%d", type, 8); break;
case 4: sprintf(vector_type, "%s%d", type, 16); break;
}
if(useWorkgroupReserve == 1){
if (useWorkgroupReserve == 1)
{
createKernelSourceWorkGroup(sourceCode[i], vector_type);
}
else if(useSubgroupReserve == 1){
else if (useSubgroupReserve == 1)
{
createKernelSourceSubGroup(sourceCode[i], vector_type);
}
else if(useConvenienceBuiltIn == 1){
else if (useConvenienceBuiltIn == 1)
{
createKernelSourceConvenience(sourceCode[i], vector_type);
}
else{
else
{
createKernelSource(sourceCode[i], vector_type);
}
std::string kernel_source = sourceCode[i].str();
const char *sources[] = { kernel_source.c_str() };
// Create producer kernel
err = create_single_kernel_helper_with_build_options(context, &program[i], &kernel[ii], 1, (const char**)&sourceCode[i], kernelName[ii], "-cl-std=CL2.0");
if(err){
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
align_free( outptr[i] );
print_error(err, "Error creating program\n");
return -1;
}
//Create consumer kernel
err = create_single_kernel_helper_with_build_options(
context, &program[i], &kernel[ii], 1, sources, kernelName[ii],
"-cl-std=CL2.0");
test_error_ret(err, " Error creating program", -1);
// Create consumer kernel
kernel[ii + 1] = clCreateKernel(program[i], kernelName[ii + 1], &err);
if( kernel[ii + 1] == NULL || err != CL_SUCCESS)
{
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
align_free( outptr[i] );
log_error("Creating program for %s\n", type);
print_error( err, "Unable to create kernel" );
return -1;
}
test_error_ret(err, " Error creating kernel", -1);
err = clSetKernelArg(kernel[ii], 0, sizeof(cl_mem), (void*)&buffers[ii]);
err |= clSetKernelArg(kernel[ii], 1, sizeof(cl_mem), (void*)&pipes[i]);
err |= clSetKernelArg(kernel[ii + 1], 0, sizeof(cl_mem), (void*)&pipes[i]);
err |= clSetKernelArg(kernel[ii + 1], 1, sizeof(cl_mem), (void*)&buffers[ii + 1]);
if ( err != CL_SUCCESS ){
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
clReleaseKernel(kernel[ii]);
clReleaseKernel(kernel[ii+1]);
clReleaseProgram(program[i]);
align_free(outptr[i]);
print_error(err, " clSetKernelArg failed");
return -1;
}
err =
clSetKernelArg(kernel[ii], 0, sizeof(cl_mem), (void *)&buffers[ii]);
err |= clSetKernelArg(kernel[ii], 1, sizeof(cl_mem), (void *)&pipes[i]);
err |= clSetKernelArg(kernel[ii + 1], 0, sizeof(cl_mem),
(void *)&pipes[i]);
err |= clSetKernelArg(kernel[ii + 1], 1, sizeof(cl_mem),
(void *)&buffers[ii + 1]);
test_error_ret(err, " clSetKernelArg failed", -1);
if(useWorkgroupReserve == 1 || useSubgroupReserve == 1)
if (useWorkgroupReserve == 1 || useSubgroupReserve == 1)
{
err = get_max_common_work_group_size( context, kernel[ii], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
err = get_max_common_work_group_size(
context, kernel[ii], global_work_size[0], &local_work_size[0]);
test_error(err, "Unable to get work group size to use");
// Launch Producer kernel
err = clEnqueueNDRangeKernel( queue, kernel[ii], 1, NULL, global_work_size, local_work_size, 0, NULL, &producer_sync_event[i] );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
clReleaseKernel(kernel[ii]);
clReleaseKernel(kernel[ii+1]);
clReleaseEvent(producer_sync_event[i]);
clReleaseEvent(consumer_sync_event[i]);
clReleaseProgram(program[i]);
align_free(outptr[i]);
return -1;
}
err = clEnqueueNDRangeKernel(queue, kernel[ii], 1, NULL,
global_work_size, local_work_size, 0,
NULL, &producer_sync_event[i]);
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
}
else
{
// Launch Producer kernel
err = clEnqueueNDRangeKernel( queue, kernel[ii], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event[i] );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
clReleaseKernel(kernel[ii]);
clReleaseKernel(kernel[ii+1]);
clReleaseEvent(producer_sync_event[i]);
clReleaseEvent(consumer_sync_event[i]);
clReleaseProgram(program[i]);
align_free(outptr[i]);
return -1;
}
err = clEnqueueNDRangeKernel(queue, kernel[ii], 1, NULL,
global_work_size, NULL, 0, NULL,
&producer_sync_event[i]);
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
}
if(useWorkgroupReserve == 1 || useSubgroupReserve == 1)
if (useWorkgroupReserve == 1 || useSubgroupReserve == 1)
{
err = get_max_common_work_group_size( context, kernel[ii + 1], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
err = get_max_common_work_group_size(context, kernel[ii + 1],
global_work_size[0],
&local_work_size[0]);
test_error(err, "Unable to get work group size to use");
// Launch Consumer kernel
err = clEnqueueNDRangeKernel( queue, kernel[ii + 1], 1, NULL, global_work_size, local_work_size, 1, &producer_sync_event[i], &consumer_sync_event[i] );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
clReleaseKernel(kernel[ii]);
clReleaseKernel(kernel[ii+1]);
clReleaseEvent(producer_sync_event[i]);
clReleaseEvent(consumer_sync_event[i]);
clReleaseProgram(program[i]);
align_free(outptr[i]);
return -1;
}
err = clEnqueueNDRangeKernel(queue, kernel[ii + 1], 1, NULL,
global_work_size, local_work_size, 1,
&producer_sync_event[i],
&consumer_sync_event[i]);
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
}
else
{
// Launch Consumer kernel
err = clEnqueueNDRangeKernel( queue, kernel[ii + 1], 1, NULL, global_work_size, NULL, 1, &producer_sync_event[i], &consumer_sync_event[i] );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
clReleaseKernel(kernel[ii]);
clReleaseKernel(kernel[ii+1]);
clReleaseEvent(producer_sync_event[i]);
clReleaseEvent(consumer_sync_event[i]);
clReleaseProgram(program[i]);
align_free(outptr[i]);
return -1;
}
err = clEnqueueNDRangeKernel(
queue, kernel[ii + 1], 1, NULL, global_work_size, NULL, 1,
&producer_sync_event[i], &consumer_sync_event[i]);
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
}
err = clEnqueueReadBuffer(queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 1, &consumer_sync_event[i], NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed" );
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
clReleaseKernel(kernel[ii]);
clReleaseKernel(kernel[ii+1]);
clReleaseEvent(producer_sync_event[i]);
clReleaseEvent(consumer_sync_event[i]);
clReleaseProgram(program[i]);
align_free(outptr[i]);
return -1;
}
err = clEnqueueReadBuffer(queue, buffers[ii + 1], true, 0,
ptrSizes[i] * num_elements, outptr[i], 1,
&consumer_sync_event[i], NULL);
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]))){
log_error("%s%d test failed\n", type, 1<<i);
if (fn(inptr[i], outptr[i],
(int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0])))
{
log_error("%s%d test failed\n", type, 1 << i);
total_errors++;
}
else {
log_info("%s%d test passed\n", type, 1<<i);
else
{
log_info("%s%d test passed\n", type, 1 << i);
}
//cleanup
clReleaseMemObject(buffers[ii]);
clReleaseMemObject(buffers[ii+1]);
clReleaseMemObject(pipes[i]);
clReleaseKernel(kernel[ii]);
clReleaseKernel(kernel[ii+1]);
clReleaseEvent(producer_sync_event[i]);
clReleaseEvent(consumer_sync_event[i]);
clReleaseProgram(program[i]);
align_free(outptr[i]);
}
return total_errors;
@@ -669,166 +616,80 @@ int test_pipe_readwrite( cl_device_id deviceID, cl_context context, cl_command_q
int test_pipe_readwrite_struct_generic( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
const char *kernelCode, const char *kernelName[])
{
cl_mem buffers[2];
cl_mem pipe;
void *outptr;
TestStruct *inptr;
cl_program program;
cl_kernel kernel[2];
size_t size = sizeof(TestStruct);
size_t global_work_size[3];
cl_int err;
int total_errors = 0;
int i;
MTdata d = init_genrand( gRandomSeed );
cl_event producer_sync_event = NULL;
cl_event consumer_sync_event = NULL;
clMemWrapper buffers[2];
clMemWrapper pipe;
void *outptr;
TestStruct *inptr;
BufferOwningPtr<cl_int> BufferInPtr;
BufferOwningPtr<TestStruct> BufferOutPtr;
clProgramWrapper program;
clKernelWrapper kernel[2];
size_t size = sizeof(TestStruct);
size_t global_work_size[3];
cl_int err;
int total_errors = 0;
int i;
MTdataHolder d(gRandomSeed);
clEventWrapper producer_sync_event = NULL;
clEventWrapper consumer_sync_event = NULL;
size_t min_alignment = get_min_alignment(context);
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (size_t)num_elements;
inptr = (TestStruct *)align_malloc(size * num_elements, min_alignment);
for ( i = 0; i < num_elements; i++ ){
for (i = 0; i < num_elements; i++)
{
inptr[i].a = (char)genrand_int32(d);
inptr[i].b = genrand_int32(d);
}
BufferInPtr.reset(inptr, nullptr, 0, size, true);
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size * num_elements, inptr, &err);
if(err){
clReleaseMemObject(buffers[0]);
print_error(err, " clCreateBuffer failed\n");
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
outptr = align_malloc( size * num_elements, min_alignment);
BufferOutPtr.reset(outptr, nullptr, 0, size, true);
buffers[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size * num_elements, outptr, &err);
if (err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
align_free( outptr );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, size, num_elements, NULL, &err);
if(err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
align_free( outptr );
clReleaseMemObject(pipe);
print_error(err, " clCreatePipe failed\n");
return -1;
}
test_error_ret(err, " clCreatePipe failed", -1);
// Create producer kernel
err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, &kernelCode, kernelName[0], "-cl-std=CL2.0");
if(err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(pipe);
align_free(outptr);
log_error(" Error creating program for struct\n");
print_error(err, "Error creating program\n");
return -1;
}
test_error_ret(err, " Error creating program", -1);
//Create consumer kernel
kernel[1] = clCreateKernel(program, kernelName[1], &err);
if( kernel[1] == NULL || err != CL_SUCCESS)
{
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(pipe);
align_free(outptr);
print_error(err, "Error creating kernel\n");
return -1;
}
test_error_ret(err, " Error creating kernel", -1);
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
if (err != CL_SUCCESS){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
align_free(outptr);
print_error(err, " clSetKernelArg failed");
return -1;
}
test_error_ret(err, " clSetKernelArg failed", -1);
// Launch Producer kernel
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event );
if (err != CL_SUCCESS){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
// Launch Consumer kernel
err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 1, &producer_sync_event, &consumer_sync_event );
if (err != CL_SUCCESS){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size*num_elements, outptr, 1, &consumer_sync_event, NULL);
if (err != CL_SUCCESS){
print_error( err, " clEnqueueReadBuffer failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if( verify_readwrite_struct( inptr, outptr, num_elements)){
log_error("struct_readwrite test failed\n");
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
else {
log_info("struct_readwrite test passed\n");
}
//cleanup
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return 0;
}

View File

@@ -64,23 +64,26 @@ const char* pipe_readwrite_errors_kernel_code = {
int test_pipe_readwrite_errors(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem pipe;
cl_mem buffers[3];
void *outptr;
cl_int *inptr;
cl_program program;
cl_kernel kernel[2];
size_t global_work_size[3];
cl_int err;
cl_int size;
cl_int i;
cl_int status = 0;
cl_event producer_sync_event;
cl_event consumer_sync_event;
MTdata d = init_genrand( gRandomSeed );
const char* kernelName[] = {"test_pipe_write_error", "test_pipe_read_error"};
clMemWrapper pipe;
clMemWrapper buffers[3];
void *outptr;
cl_int *inptr;
clProgramWrapper program;
clKernelWrapper kernel[2];
size_t global_work_size[3];
cl_int err;
cl_int size;
cl_int i;
cl_int status = 0;
clEventWrapper producer_sync_event;
clEventWrapper consumer_sync_event;
BufferOwningPtr<cl_int> BufferInPtr;
BufferOwningPtr<cl_int> BufferOutPtr;
MTdataHolder d(gRandomSeed);
const char *kernelName[] = { "test_pipe_write_error",
"test_pipe_read_error" };
size_t min_alignment = get_min_alignment(context);
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = num_elements;
@@ -88,69 +91,36 @@ int test_pipe_readwrite_errors(cl_device_id deviceID, cl_context context, cl_com
inptr = (cl_int *)align_malloc(size, min_alignment);
for(i = 0; i < (cl_int)(size / sizeof(int)); i++){
for (i = 0; i < num_elements; i++)
{
inptr[i] = (int)genrand_int32(d);
}
BufferInPtr.reset(inptr, nullptr, 0, size, true);
buffers[0] =
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
test_error_ret(err, " clCreateBuffer failed", -1);
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
if(err){
clReleaseMemObject(buffers[0]);
print_error(err, " clCreateBuffer failed\n");
return -1;
}
outptr = align_malloc(size, min_alignment);
BufferOutPtr.reset(outptr, nullptr, 0, size, true);
buffers[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, outptr, &err);
if ( err ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
align_free( outptr );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
buffers[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(int), &status, &err);
if ( err ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
align_free( outptr );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
//Pipe created with max_packets less than global size
pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), num_elements - (num_elements/2), NULL, &err);
if(err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
align_free( outptr );
clReleaseMemObject(pipe);
print_error(err, " clCreatePipe failed\n");
return -1;
}
test_error_ret(err, " clCreatePipe failed", -1);
// Create producer kernel
err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_readwrite_errors_kernel_code, kernelName[0], "-cl-std=CL2.0");
if(err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
align_free(outptr);
print_error(err, "Error creating program\n");
return -1;
}
test_error_ret(err, " Error creating program", -1);
//Create consumer kernel
kernel[1] = clCreateKernel(program, kernelName[1], &err);
if( kernel[1] == NULL || err != CL_SUCCESS)
{
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
align_free(outptr);
print_error(err, "Error creating kernel\n");
return -1;
}
test_error_ret(err, " Error creating kernel", -1);
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
@@ -158,49 +128,15 @@ int test_pipe_readwrite_errors(cl_device_id deviceID, cl_context context, cl_com
err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof(cl_mem), (void*)&buffers[2]);
if ( err != CL_SUCCESS ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
align_free(outptr);
print_error(err, " clSetKernelArg failed");
return -1;
}
test_error_ret(err, " clSetKernelArg failed", -1);
// Launch Consumer kernel for empty pipe
err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 0, NULL, &consumer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(status), &status, 1, &consumer_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if(status == 0){
log_error("test_pipe_readwrite_errors failed\n");
@@ -212,34 +148,13 @@ int test_pipe_readwrite_errors(cl_device_id deviceID, cl_context context, cl_com
// Launch Producer kernel
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(status), &status, 1, &producer_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if(status == 0){
if (status == 0)
{
log_error("test_pipe_readwrite_errors failed\n");
return -1;
}
@@ -247,66 +162,27 @@ int test_pipe_readwrite_errors(cl_device_id deviceID, cl_context context, cl_com
status = 0;
}
// We will reuse this variable so release the previous referred event.
clReleaseEvent(consumer_sync_event);
// Launch Consumer kernel
err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 1, &producer_sync_event, &consumer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(status), &status, 1, &consumer_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if(status == 0)
if (status == 0)
{
log_error("test_pipe_readwrite_errors failed\n");
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
else
{
status = 0;
}
log_info("test_pipe_readwrite_errors passed\n");
//cleanup
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return 0;
}

View File

@@ -88,30 +88,35 @@ static int verify_result(void *ptr1, void *ptr2, int n)
int test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem pipe;
cl_mem buffers[3];
cl_int *outptr;
cl_int *inptr;
cl_int *active_work_item_buffer;
cl_program program;
cl_kernel kernel[2];
size_t global_work_size[3];
size_t local_work_size[3];
cl_int err;
cl_int size;
int i;
size_t subgroup_count;
cl_event producer_sync_event = NULL;
cl_event consumer_sync_event = NULL;
const char* kernelName[] = {"test_pipe_subgroups_divergence_write", "test_pipe_subgroups_divergence_read"};
clMemWrapper pipe;
clMemWrapper buffers[3];
cl_int *outptr;
cl_int *inptr;
cl_int *active_work_item_buffer;
clProgramWrapper program;
clKernelWrapper kernel[2];
size_t global_work_size[3];
size_t local_work_size[3];
cl_int err;
cl_int size;
int i;
size_t subgroup_count;
clEventWrapper producer_sync_event = NULL;
clEventWrapper consumer_sync_event = NULL;
BufferOwningPtr<cl_int> BufferInPtr;
BufferOwningPtr<cl_int> BufferOutPtr;
const char *kernelName[] = { "test_pipe_subgroups_divergence_write",
"test_pipe_subgroups_divergence_read" };
size_t min_alignment = get_min_alignment(context);
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
if (!is_extension_available(deviceID, "cl_khr_subgroups"))
{
log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
log_info(
"cl_khr_subgroups is not supported on this platoform. Skipping "
"test.\n");
return CL_SUCCESS;
}
@@ -125,215 +130,77 @@ int test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl
outptr[i] = 0;
active_work_item_buffer[i] = 0;
}
BufferInPtr.reset(inptr, nullptr, 0, size, true);
BufferOutPtr.reset(outptr, nullptr, 0, size, true);
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
if(err){
clReleaseMemObject(buffers[0]);
print_error(err, " clCreateBuffer failed\n");
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
buffers[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, outptr, &err);
if ( err ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
align_free( outptr );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
buffers[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, active_work_item_buffer, &err);
if ( err ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
align_free( outptr );
print_error(err, " clCreateBuffer failed\n" );
return -1;
}
test_error_ret(err, " clCreateBuffer failed", -1);
pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), num_elements, NULL, &err);
if(err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
align_free( outptr );
clReleaseMemObject(pipe);
print_error(err, " clCreatePipe failed\n");
return -1;
}
test_error_ret(err, " clCreatePipe failed", -1);
// Create producer kernel
err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_subgroups_kernel_code, kernelName[0], "-cl-std=CL2.0");
if(err){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
align_free(outptr);
print_error(err, "Error creating program\n");
return -1;
}
test_error_ret(err, " Error creating program", -1);
//Create consumer kernel
kernel[1] = clCreateKernel(program, kernelName[1], &err);
if( kernel[1] == NULL || err != CL_SUCCESS)
{
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
align_free(outptr);
print_error(err, "Error creating kernel\n");
return -1;
}
test_error_ret(err, " Error creating kernel", -1);
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), (void*)&buffers[2]);
err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
if ( err != CL_SUCCESS ){
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
align_free(outptr);
print_error(err, " clSetKernelArg failed");
return -1;
}
test_error_ret(err, " clSetKernelArg failed", -1);
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
if( err != CL_SUCCESS)
{
test_error( err, "Unable to get work group size to use" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " Unable to get work group size to use", -1);
cl_platform_id platform;
err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR = (clGetKernelSubGroupInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetKernelSubGroupInfoKHR");
cl_platform_id platform;
err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform),
&platform, NULL);
test_error_ret(err, " clGetDeviceInfo failed", -1);
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR =
(clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(
platform, "clGetKernelSubGroupInfoKHR");
err = clGetKernelSubGroupInfoKHR(kernel[0], deviceID, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, sizeof(local_work_size[0]), &local_work_size[0], sizeof(subgroup_count), &subgroup_count, NULL);
test_error_ret(err, " clGetKernelSubGroupInfoKHR failed", -1);
if(subgroup_count <= 1)
{
log_info("Only 1 subgroup per workgroup for the kernel. Hence no divergence among subgroups possible. Skipping test.\n");
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
align_free(outptr);
return CL_SUCCESS;
}
// Launch Producer kernel
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, &producer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[2], true, 0, size, active_work_item_buffer, 1, &producer_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
// Launch Consumer kernel
err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, local_work_size, 1, &producer_sync_event, &consumer_sync_event );
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueNDRangeKernel failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueNDRangeKernel failed", -1);
err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size, outptr, 1, &consumer_sync_event, NULL);
if ( err != CL_SUCCESS ){
print_error( err, " clEnqueueReadBuffer failed" );
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
test_error_ret(err, " clEnqueueReadBuffer failed", -1);
if( verify_result( active_work_item_buffer, outptr, num_elements)){
log_error("test_pipe_subgroups_divergence failed\n");
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return -1;
}
else {
log_info("test_pipe_subgroups_divergence passed\n");
}
//cleanup
clReleaseMemObject(buffers[0]);
clReleaseMemObject(buffers[1]);
clReleaseMemObject(buffers[2]);
clReleaseMemObject(pipe);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseEvent(producer_sync_event);
clReleaseEvent(consumer_sync_event);
clReleaseProgram(program);
align_free(outptr);
return 0;
}