Reimplement buffer tests (#1007)

* Reimplement buffer tests

Reintegrated and fixed test code for buffer tests buffer_read_half and
buffer_write_half tests.

Added mem_alloc_ref_flags test code, as was previously non-existent,
to test CL_MEM_ALLOC_HOST_PTR. This flag was otherwise untested and
as similar tests within the suite are used to test other cl_mem_flags
it has been assumed that this was the purpose of the test.

Fixes #439

Change-Id: I5accf986be7436d09377d0bfd7afd5de2235c329
Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_read_write_flags to a common function

Code under mem_*_flags tests have a lot of duplication, this is
the first step of moving test code to a common function.

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_write_only_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_read_only_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_copy_host_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication, moved
mem_copy_host_flags code and rearranged function where appropriate

mem_ref_alloc_flags test also uses common function.

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* Remove unused NOT_IMPLEMENTED_TEST macro

This define is not in use anymore, since tests have been
reimplemented in #439. Tests should be working and implemented
or not registered.

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>
This commit is contained in:
ellnor01
2020-11-06 11:33:36 +00:00
committed by GitHub
parent e8c55e59bc
commit 63f01be181
7 changed files with 260 additions and 586 deletions

View File

@@ -829,17 +829,8 @@ test_status callSingleTestFunction(test_definition test,
} }
else else
{ {
int ret = test.func( int ret = test.func(deviceToUse, context, queue, numElementsToUse);
deviceToUse, context, queue, if (ret == TEST_SKIPPED_ITSELF)
numElementsToUse); // test_threaded_function( ptr_basefn_list[i],
// group, context, num_elements);
if (ret == TEST_NOT_IMPLEMENTED)
{
/* Tests can also let us know they're not implemented yet */
log_info("%s test currently not implemented\n", test.name);
status = TEST_SKIP;
}
else if (ret == TEST_SKIPPED_ITSELF)
{ {
/* Tests can also let us know they're not supported by the /* Tests can also let us know they're not supported by the
* implementation */ * implementation */

View File

@@ -64,10 +64,6 @@ Version get_device_cl_version(cl_device_id device);
{ \ { \
test_##fn, #fn, ver \ test_##fn, #fn, ver \
} }
#define NOT_IMPLEMENTED_TEST(fn) \
{ \
NULL, #fn, Version(0, 0) \
}
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

View File

@@ -22,7 +22,6 @@
#include <CL/opencl.h> #include <CL/opencl.h>
#endif #endif
#define TEST_NOT_IMPLEMENTED -99
#define TEST_SKIPPED_ITSELF -100 #define TEST_SKIPPED_ITSELF -100
typedef int (*basefn)(cl_device_id deviceID, cl_context context, typedef int (*basefn)(cl_device_id deviceID, cl_context context,

View File

@@ -22,104 +22,104 @@
#include "harness/testHarness.h" #include "harness/testHarness.h"
test_definition test_list[] = { test_definition test_list[] = {
ADD_TEST( buffer_read_async_int ), ADD_TEST(buffer_read_async_int),
ADD_TEST( buffer_read_async_uint ), ADD_TEST(buffer_read_async_uint),
ADD_TEST( buffer_read_async_long ), ADD_TEST(buffer_read_async_long),
ADD_TEST( buffer_read_async_ulong ), ADD_TEST(buffer_read_async_ulong),
ADD_TEST( buffer_read_async_short ), ADD_TEST(buffer_read_async_short),
ADD_TEST( buffer_read_async_ushort ), ADD_TEST(buffer_read_async_ushort),
ADD_TEST( buffer_read_async_char ), ADD_TEST(buffer_read_async_char),
ADD_TEST( buffer_read_async_uchar ), ADD_TEST(buffer_read_async_uchar),
ADD_TEST( buffer_read_async_float ), ADD_TEST(buffer_read_async_float),
ADD_TEST( buffer_read_array_barrier_int ), ADD_TEST(buffer_read_array_barrier_int),
ADD_TEST( buffer_read_array_barrier_uint ), ADD_TEST(buffer_read_array_barrier_uint),
ADD_TEST( buffer_read_array_barrier_long ), ADD_TEST(buffer_read_array_barrier_long),
ADD_TEST( buffer_read_array_barrier_ulong ), ADD_TEST(buffer_read_array_barrier_ulong),
ADD_TEST( buffer_read_array_barrier_short ), ADD_TEST(buffer_read_array_barrier_short),
ADD_TEST( buffer_read_array_barrier_ushort ), ADD_TEST(buffer_read_array_barrier_ushort),
ADD_TEST( buffer_read_array_barrier_char ), ADD_TEST(buffer_read_array_barrier_char),
ADD_TEST( buffer_read_array_barrier_uchar ), ADD_TEST(buffer_read_array_barrier_uchar),
ADD_TEST( buffer_read_array_barrier_float ), ADD_TEST(buffer_read_array_barrier_float),
ADD_TEST( buffer_read_int ), ADD_TEST(buffer_read_int),
ADD_TEST( buffer_read_uint ), ADD_TEST(buffer_read_uint),
ADD_TEST( buffer_read_long ), ADD_TEST(buffer_read_long),
ADD_TEST( buffer_read_ulong ), ADD_TEST(buffer_read_ulong),
ADD_TEST( buffer_read_short ), ADD_TEST(buffer_read_short),
ADD_TEST( buffer_read_ushort ), ADD_TEST(buffer_read_ushort),
ADD_TEST( buffer_read_float ), ADD_TEST(buffer_read_float),
NOT_IMPLEMENTED_TEST( buffer_read_half ), ADD_TEST(buffer_read_half),
ADD_TEST( buffer_read_char ), ADD_TEST(buffer_read_char),
ADD_TEST( buffer_read_uchar ), ADD_TEST(buffer_read_uchar),
ADD_TEST( buffer_read_struct ), ADD_TEST(buffer_read_struct),
ADD_TEST( buffer_read_random_size ), ADD_TEST(buffer_read_random_size),
ADD_TEST( buffer_map_read_int ), ADD_TEST(buffer_map_read_int),
ADD_TEST( buffer_map_read_uint ), ADD_TEST(buffer_map_read_uint),
ADD_TEST( buffer_map_read_long ), ADD_TEST(buffer_map_read_long),
ADD_TEST( buffer_map_read_ulong ), ADD_TEST(buffer_map_read_ulong),
ADD_TEST( buffer_map_read_short ), ADD_TEST(buffer_map_read_short),
ADD_TEST( buffer_map_read_ushort ), ADD_TEST(buffer_map_read_ushort),
ADD_TEST( buffer_map_read_char ), ADD_TEST(buffer_map_read_char),
ADD_TEST( buffer_map_read_uchar ), ADD_TEST(buffer_map_read_uchar),
ADD_TEST( buffer_map_read_float ), ADD_TEST(buffer_map_read_float),
ADD_TEST( buffer_map_read_struct ), ADD_TEST(buffer_map_read_struct),
ADD_TEST( buffer_map_write_int ), ADD_TEST(buffer_map_write_int),
ADD_TEST( buffer_map_write_uint ), ADD_TEST(buffer_map_write_uint),
ADD_TEST( buffer_map_write_long ), ADD_TEST(buffer_map_write_long),
ADD_TEST( buffer_map_write_ulong ), ADD_TEST(buffer_map_write_ulong),
ADD_TEST( buffer_map_write_short ), ADD_TEST(buffer_map_write_short),
ADD_TEST( buffer_map_write_ushort ), ADD_TEST(buffer_map_write_ushort),
ADD_TEST( buffer_map_write_char ), ADD_TEST(buffer_map_write_char),
ADD_TEST( buffer_map_write_uchar ), ADD_TEST(buffer_map_write_uchar),
ADD_TEST( buffer_map_write_float ), ADD_TEST(buffer_map_write_float),
ADD_TEST( buffer_map_write_struct ), ADD_TEST(buffer_map_write_struct),
ADD_TEST( buffer_write_int ), ADD_TEST(buffer_write_int),
ADD_TEST( buffer_write_uint ), ADD_TEST(buffer_write_uint),
ADD_TEST( buffer_write_short ), ADD_TEST(buffer_write_short),
ADD_TEST( buffer_write_ushort ), ADD_TEST(buffer_write_ushort),
ADD_TEST( buffer_write_char ), ADD_TEST(buffer_write_char),
ADD_TEST( buffer_write_uchar ), ADD_TEST(buffer_write_uchar),
ADD_TEST( buffer_write_float ), ADD_TEST(buffer_write_float),
NOT_IMPLEMENTED_TEST( buffer_write_half ), ADD_TEST(buffer_write_half),
ADD_TEST( buffer_write_long ), ADD_TEST(buffer_write_long),
ADD_TEST( buffer_write_ulong ), ADD_TEST(buffer_write_ulong),
ADD_TEST( buffer_write_struct ), ADD_TEST(buffer_write_struct),
ADD_TEST( buffer_write_async_int ), ADD_TEST(buffer_write_async_int),
ADD_TEST( buffer_write_async_uint ), ADD_TEST(buffer_write_async_uint),
ADD_TEST( buffer_write_async_short ), ADD_TEST(buffer_write_async_short),
ADD_TEST( buffer_write_async_ushort ), ADD_TEST(buffer_write_async_ushort),
ADD_TEST( buffer_write_async_char ), ADD_TEST(buffer_write_async_char),
ADD_TEST( buffer_write_async_uchar ), ADD_TEST(buffer_write_async_uchar),
ADD_TEST( buffer_write_async_float ), ADD_TEST(buffer_write_async_float),
ADD_TEST( buffer_write_async_long ), ADD_TEST(buffer_write_async_long),
ADD_TEST( buffer_write_async_ulong ), ADD_TEST(buffer_write_async_ulong),
ADD_TEST( buffer_copy ), ADD_TEST(buffer_copy),
ADD_TEST( buffer_partial_copy ), ADD_TEST(buffer_partial_copy),
ADD_TEST( mem_read_write_flags ), ADD_TEST(mem_read_write_flags),
ADD_TEST( mem_write_only_flags ), ADD_TEST(mem_write_only_flags),
ADD_TEST( mem_read_only_flags ), ADD_TEST(mem_read_only_flags),
ADD_TEST( mem_copy_host_flags ), ADD_TEST(mem_copy_host_flags),
NOT_IMPLEMENTED_TEST( mem_alloc_ref_flags ), ADD_TEST(mem_alloc_ref_flags),
ADD_TEST( array_info_size ), ADD_TEST(array_info_size),
ADD_TEST( sub_buffers_read_write ), ADD_TEST(sub_buffers_read_write),
ADD_TEST( sub_buffers_read_write_dual_devices ), ADD_TEST(sub_buffers_read_write_dual_devices),
ADD_TEST( sub_buffers_overlapping ), ADD_TEST(sub_buffers_overlapping),
ADD_TEST( buffer_fill_int ), ADD_TEST(buffer_fill_int),
ADD_TEST( buffer_fill_uint ), ADD_TEST(buffer_fill_uint),
ADD_TEST( buffer_fill_short ), ADD_TEST(buffer_fill_short),
ADD_TEST( buffer_fill_ushort ), ADD_TEST(buffer_fill_ushort),
ADD_TEST( buffer_fill_char ), ADD_TEST(buffer_fill_char),
ADD_TEST( buffer_fill_uchar ), ADD_TEST(buffer_fill_uchar),
ADD_TEST( buffer_fill_long ), ADD_TEST(buffer_fill_long),
ADD_TEST( buffer_fill_ulong ), ADD_TEST(buffer_fill_ulong),
ADD_TEST( buffer_fill_float ), ADD_TEST(buffer_fill_float),
ADD_TEST( buffer_fill_struct ), ADD_TEST(buffer_fill_struct),
ADD_TEST( buffer_migrate ), ADD_TEST(buffer_migrate),
ADD_TEST( image_migrate ), ADD_TEST(image_migrate),
}; };
const int test_num = ARRAY_SIZE( test_list ); const int test_num = ARRAY_SIZE( test_list );

View File

@@ -39,12 +39,12 @@ const char *mem_read_write_kernel_code =
"}\n"; "}\n";
const char *mem_read_kernel_code = const char *mem_read_kernel_code =
"__kernel void test_mem_read(__global int *src, __global int *dst)\n" "__kernel void test_mem_read(__global int *dst, __global int *src)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = src[tid]+1;\n" " dst[tid] = src[tid]+1;\n"
"}\n"; "}\n";
const char *mem_write_kernel_code = const char *mem_write_kernel_code =
"__kernel void test_mem_write(__global int *dst)\n" "__kernel void test_mem_write(__global int *dst)\n"
@@ -68,13 +68,14 @@ static int verify_mem( int *outptr, int n )
} }
int test_mem_flags(cl_context context, cl_command_queue queue, int num_elements,
int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) cl_mem_flags flags, const char **kernel_program,
const char *kernel_name)
{ {
cl_mem buffers[1]; clMemWrapper buffers[2];
cl_int *inptr, *outptr; cl_int *inptr, *outptr;
cl_program program[1]; clProgramWrapper program;
cl_kernel kernel[1]; clKernelWrapper kernel;
size_t global_work_size[3]; size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP #ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3]; size_t local_work_size[3];
@@ -83,443 +84,177 @@ int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_com
int i; int i;
size_t min_alignment = get_min_alignment(context); size_t min_alignment = get_min_alignment(context);
bool test_read_only = (flags & CL_MEM_READ_ONLY) != 0;
bool test_write_only = (flags & CL_MEM_WRITE_ONLY) != 0;
bool copy_host_ptr = (flags & CL_MEM_COPY_HOST_PTR) != 0;
global_work_size[0] = (cl_uint)num_elements; global_work_size[0] = (cl_uint)num_elements;
inptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment); inptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
if (!inptr)
{
log_error(" unable to allocate %d bytes of memory\n",
(int)sizeof(cl_int) * num_elements);
return -1;
}
outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment); outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, NULL, &err); if (!outptr)
if (err != CL_SUCCESS) { {
print_error( err, "clCreateBuffer failed"); log_error(" unable to allocate %d bytes of memory\n",
align_free( (void *)outptr ); (int)sizeof(cl_int) * num_elements);
align_free( (void *)inptr ); align_free((void *)inptr);
return -1; return -1;
} }
for (i=0; i<num_elements; i++) for (i = 0; i < num_elements; i++) inptr[i] = i;
inptr[i] = i;
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL); buffers[0] = clCreateBuffer(context, flags, sizeof(cl_int) * num_elements,
if (err != CL_SUCCESS) { copy_host_ptr ? inptr : NULL, &err);
print_error( err, "clEnqueueWriteBuffer failed"); if (err != CL_SUCCESS)
clReleaseMemObject( buffers[0] ); {
align_free( (void *)outptr ); print_error(err, "clCreateBuffer failed");
align_free( (void *)inptr ); align_free((void *)outptr);
align_free((void *)inptr);
return -1; return -1;
} }
if (!copy_host_ptr)
{
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
sizeof(cl_int) * num_elements, (void *)inptr,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err, "clEnqueueWriteBuffer failed");
align_free((void *)outptr);
align_free((void *)inptr);
return -1;
}
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" ); if (test_read_only)
{
/* The read only buffer for mem_read_only_flags should be created above
with the correct flags as in other tests. However to make later test
code simpler, the additional read_write buffer required is stored as
the first buffer */
buffers[1] = buffers[0];
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &err);
if (err != CL_SUCCESS)
{
print_error(err, " clCreateBuffer failed \n");
align_free((void *)inptr);
align_free((void *)outptr);
return -1;
}
}
err = create_single_kernel_helper(context, &program, &kernel, 1,
kernel_program, kernel_name);
if (err){ if (err){
clReleaseMemObject( buffers[0] ); print_error(err, "creating kernel failed");
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return -1; return -1;
} }
#ifdef USE_LOCAL_WORK_GROUP #ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] ); err = get_max_common_work_group_size(context, kernel, global_work_size[0],
&local_work_size[0]);
test_error( err, "Unable to get work group size to use" ); test_error( err, "Unable to get work group size to use" );
#endif #endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] ); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&buffers[0]);
if (test_read_only && (err == CL_SUCCESS))
{
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&buffers[1]);
}
if ( err != CL_SUCCESS ){ if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed" ); print_error( err, "clSetKernelArg failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return -1; return -1;
} }
#ifdef USE_LOCAL_WORK_GROUP #ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL ); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
local_work_size, 0, NULL, NULL);
#else #else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL ); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL,
0, NULL, NULL);
#endif #endif
if (err != CL_SUCCESS){ if (err != CL_SUCCESS){
log_error("clEnqueueNDRangeKernel failed\n"); log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return -1; return -1;
} }
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL ); err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
sizeof(cl_int) * num_elements, (void *)outptr, 0,
NULL, NULL);
if ( err != CL_SUCCESS ){ if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" ); print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return -1; return -1;
} }
if (verify_mem(outptr, num_elements)){ if (!test_write_only)
log_error("buffer_MEM_READ_WRITE test failed\n");
err = -1;
}
else{
log_info("buffer_MEM_READ_WRITE test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return err;
} // end test_mem_read_write()
int test_mem_write_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem buffers[1];
int *inptr, *outptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
align_free( (void *)inptr );
return -1;
}
buffers[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
if (err != CL_SUCCESS)
{ {
print_error(err, "clCreateBuffer failed\n"); if (verify_mem(outptr, num_elements))
align_free( (void *)outptr ); {
align_free( (void *)inptr ); log_error("test failed\n");
return -1; err = -1;
} }
else
for (i=0; i<num_elements; i++) {
inptr[i] = i; log_info("test passed\n");
err = 0;
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL); }
if (err != CL_SUCCESS){
print_error( err, "clEnqueueWriteBuffer failed" );
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_write_kernel_code, "test_mem_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "Error reading array" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
} }
// cleanup // cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return err; return err;
} // end test_mem_write() } // end test_mem_flags()
int test_mem_read_write_flags(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
return test_mem_flags(context, queue, num_elements, CL_MEM_READ_WRITE,
&mem_read_write_kernel_code, "test_mem_read_write");
}
int test_mem_write_only_flags(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
return test_mem_flags(context, queue, num_elements, CL_MEM_WRITE_ONLY,
&mem_write_kernel_code, "test_mem_write");
}
int test_mem_read_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) int test_mem_read_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{ {
cl_mem buffers[2]; return test_mem_flags(context, queue, num_elements, CL_MEM_READ_ONLY,
int *inptr, *outptr; &mem_read_kernel_code, "test_mem_read");
cl_program program[1]; }
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
align_free( (void *)inptr );
return -1;
}
buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed to create READ_ONLY array\n" );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
for (i=0; i<num_elements; i++)
inptr[i] = i;
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed to create MEM_ALLOC_GLOBAL_POOL array\n" );
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueWriteBuffer() failed");
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_kernel_code, "test_mem_read" );
if ( err ){
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&buffers[1] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArgs failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
if (verify_mem(outptr, num_elements)){
log_error( " CL_MEM_READ_ONLY test failed\n" );
err = -1;
}
else{
log_info( " CL_MEM_READ_ONLY test passed\n" );
err = 0;
}
// cleanup
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return err;
} // end test_mem_read()
int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{ {
cl_mem buffers[1]; return test_mem_flags(context, queue, num_elements,
int *ptr; CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE,
cl_program program[1]; &mem_read_write_kernel_code, "test_mem_read_write");
cl_kernel kernel[1]; }
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
ptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! ptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
for (i=0; i<num_elements; i++)
ptr[i] = i;
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, (void *)ptr, &err);
if (err != CL_SUCCESS){
print_error(err, "clCreateBuffer failed for CL_MEM_COPY_HOST_PTR\n");
align_free( (void *)ptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if (err != CL_SUCCESS){
log_error("clSetKernelArgs failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)ptr, 0, NULL, NULL );
if (err != CL_SUCCESS){
log_error("CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_CONSTANT_POOL failed.\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
if ( verify_mem( ptr, num_elements ) ){
log_error("CL_MEM_COPY_HOST_PTR test failed\n");
err = -1;
}
else{
log_info("CL_MEM_COPY_HOST_PTR test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return err;
} // end test_mem_copy_host_flags()
int test_mem_alloc_ref_flags(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
return test_mem_flags(context, queue, num_elements,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
&mem_read_write_kernel_code, "test_mem_read_write");
}

View File

@@ -21,6 +21,7 @@
#include <time.h> #include <time.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <CL/cl_half.h>
#include "procs.h" #include "procs.h"
@@ -325,6 +326,7 @@ static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffe
static const char *buffer_read_half_kernel_code[] = { static const char *buffer_read_half_kernel_code[] = {
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half(__global half *dst)\n" "__kernel void test_buffer_read_half(__global half *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
@@ -332,6 +334,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n", "}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half2(__global half2 *dst)\n" "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
@@ -339,6 +342,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n", "}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half4(__global half4 *dst)\n" "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
@@ -346,6 +350,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n", "}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half8(__global half8 *dst)\n" "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
@@ -353,12 +358,14 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n", "}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half16(__global half16 *dst)\n" "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n" }; "}\n"
};
static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" }; static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
@@ -557,11 +564,11 @@ static int verify_read_float( void *ptr, int n )
static int verify_read_half( void *ptr, int n ) static int verify_read_half( void *ptr, int n )
{ {
int i; int i;
float *outptr = (float *)ptr; // FIXME: should this be cl_half_float? cl_half *outptr = (cl_half *)ptr;
for ( i = 0; i < n / 2; i++ ){ for (i = 0; i < n; i++)
if ( outptr[i] != TEST_PRIME_HALF ) {
return -1; if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
} }
return 0; return 0;
@@ -1099,8 +1106,10 @@ DECLARE_READ_TEST(float, cl_float)
DECLARE_READ_TEST(char, cl_char) DECLARE_READ_TEST(char, cl_char)
DECLARE_READ_TEST(uchar, cl_uchar) DECLARE_READ_TEST(uchar, cl_uchar)
int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) int test_buffer_read_half(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{ {
PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
buffer_read_half_kernel_code, half_kernel_name, verify_read_half ); buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
} }
@@ -1141,76 +1150,6 @@ DECLARE_BARRIER_TEST(char, cl_char)
DECLARE_BARRIER_TEST(uchar, cl_uchar) DECLARE_BARRIER_TEST(uchar, cl_uchar)
DECLARE_BARRIER_TEST(float, cl_float) DECLARE_BARRIER_TEST(float, cl_float)
/*
int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
{
cl_mem buffers[1];
float *outptr;
cl_program program[1];
cl_kernel kernel[1];
void *values[1];
size_t sizes[1] = { sizeof(cl_buffer) };
uint threads[1];
int err;
int i;
size_t ptrSize; // sizeof(half)
ptrSize = sizeof(cl_float)/2;
outptr = (float *)malloc(ptrSize * num_elements);
buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSize * num_elements, NULL);
if( !buffers[0] ){
log_error("clCreateBuffer failed\n");
return -1;
}
err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
if( err ){
log_error( " Error creating program for half\n" );
clReleaseMemObject(buffers[0]);
free( (void *)outptr );
return -1;
}
values[0] = buffers[0];
err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
if( err != CL_SUCCESS ){
log_error("clSetKernelArgs failed\n");
return -1;
}
global_work_size[0] = (cl_uint)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clEnqueueReadBuffer failed: %d\n", err);
return -1;
}
if( verify_read_half( outptr, num_elements >> 1 ) ){
log_error( "buffer_READ half test failed\n" );
err = -1;
}
else{
log_info( "buffer_READ half test passed\n" );
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
free( (void *)outptr );
return err;
} // end test_buffer_half_read()
*/
int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
cl_mem buffers[1]; cl_mem buffers[1];

View File

@@ -315,40 +315,51 @@ static const char *float_kernel_name[] = { "test_buffer_write_float", "test_buff
const char *buffer_write_half_kernel_code[] = { const char *buffer_write_half_kernel_code[] = {
"__kernel void test_buffer_write_half(__global half *src, __global float *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half(__global half *src, __global half "
"*dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n", "}\n",
"__kernel void test_buffer_write_half2(__global half2 *src, __global float2 *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half2(__global half2 *src, __global half2 "
"*dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half2( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n", "}\n",
"__kernel void test_buffer_write_half4(__global half4 *src, __global float4 *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half4(__global half4 *src, __global half4 "
"*dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half4( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n", "}\n",
"__kernel void test_buffer_write_half8(__global half8 *src, __global float8 *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half8(__global half8 *src, __global half8 "
"*dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half8( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n", "}\n",
"__kernel void test_buffer_write_half16(__global half16 *src, __global float16 *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half16(__global half16 *src, __global "
"half16 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half16( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n" }; "}\n"
};
static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" }; static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" };
@@ -1398,6 +1409,7 @@ int test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_comma
int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{ {
PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
float *inptr[5]; float *inptr[5];
size_t ptrSizes[5]; size_t ptrSizes[5];
int i, err; int i, err;
@@ -1422,8 +1434,10 @@ int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_comman
inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d ); inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
} }
err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, (void**)inptr, err = test_buffer_write(deviceID, context, queue, num_elements,
buffer_write_half_kernel_code, half_kernel_name, foo, d ); sizeof(cl_half), (char *)"half", 5, (void **)inptr,
buffer_write_half_kernel_code, half_kernel_name,
foo, d);
for ( i = 0; i < 5; i++ ){ for ( i = 0; i < 5; i++ ){
align_free( (void *)inptr[i] ); align_free( (void *)inptr[i] );