Reimplement buffer tests (#1007)

* Reimplement buffer tests

Reintegrated and fixed test code for buffer tests buffer_read_half and
buffer_write_half tests.

Added mem_alloc_ref_flags test code, as was previously non-existent,
to test CL_MEM_ALLOC_HOST_PTR. This flag was otherwise untested and
as similar tests within the suite are used to test other cl_mem_flags
it has been assumed that this was the purpose of the test.

Fixes #439

Change-Id: I5accf986be7436d09377d0bfd7afd5de2235c329
Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_read_write_flags to a common function

Code under mem_*_flags tests have a lot of duplication, this is
the first step of moving test code to a common function.

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_write_only_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_read_only_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_copy_host_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication, moved
mem_copy_host_flags code and rearranged function where appropriate

mem_ref_alloc_flags test also uses common function.

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* Remove unused NOT_IMPLEMENTED_TEST macro

This define is not in use anymore, since tests have been
reimplemented in #439. Tests should be working and implemented
or not registered.

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>
This commit is contained in:
ellnor01
2020-11-06 11:33:36 +00:00
committed by GitHub
parent e8c55e59bc
commit 63f01be181
7 changed files with 260 additions and 586 deletions

View File

@@ -829,17 +829,8 @@ test_status callSingleTestFunction(test_definition test,
} }
else else
{ {
int ret = test.func( int ret = test.func(deviceToUse, context, queue, numElementsToUse);
deviceToUse, context, queue, if (ret == TEST_SKIPPED_ITSELF)
numElementsToUse); // test_threaded_function( ptr_basefn_list[i],
// group, context, num_elements);
if (ret == TEST_NOT_IMPLEMENTED)
{
/* Tests can also let us know they're not implemented yet */
log_info("%s test currently not implemented\n", test.name);
status = TEST_SKIP;
}
else if (ret == TEST_SKIPPED_ITSELF)
{ {
/* Tests can also let us know they're not supported by the /* Tests can also let us know they're not supported by the
* implementation */ * implementation */

View File

@@ -64,10 +64,6 @@ Version get_device_cl_version(cl_device_id device);
{ \ { \
test_##fn, #fn, ver \ test_##fn, #fn, ver \
} }
#define NOT_IMPLEMENTED_TEST(fn) \
{ \
NULL, #fn, Version(0, 0) \
}
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

View File

@@ -22,7 +22,6 @@
#include <CL/opencl.h> #include <CL/opencl.h>
#endif #endif
#define TEST_NOT_IMPLEMENTED -99
#define TEST_SKIPPED_ITSELF -100 #define TEST_SKIPPED_ITSELF -100
typedef int (*basefn)(cl_device_id deviceID, cl_context context, typedef int (*basefn)(cl_device_id deviceID, cl_context context,

View File

@@ -47,7 +47,7 @@ test_definition test_list[] = {
ADD_TEST(buffer_read_short), ADD_TEST(buffer_read_short),
ADD_TEST(buffer_read_ushort), ADD_TEST(buffer_read_ushort),
ADD_TEST(buffer_read_float), ADD_TEST(buffer_read_float),
NOT_IMPLEMENTED_TEST( buffer_read_half ), ADD_TEST(buffer_read_half),
ADD_TEST(buffer_read_char), ADD_TEST(buffer_read_char),
ADD_TEST(buffer_read_uchar), ADD_TEST(buffer_read_uchar),
ADD_TEST(buffer_read_struct), ADD_TEST(buffer_read_struct),
@@ -81,7 +81,7 @@ test_definition test_list[] = {
ADD_TEST(buffer_write_char), ADD_TEST(buffer_write_char),
ADD_TEST(buffer_write_uchar), ADD_TEST(buffer_write_uchar),
ADD_TEST(buffer_write_float), ADD_TEST(buffer_write_float),
NOT_IMPLEMENTED_TEST( buffer_write_half ), ADD_TEST(buffer_write_half),
ADD_TEST(buffer_write_long), ADD_TEST(buffer_write_long),
ADD_TEST(buffer_write_ulong), ADD_TEST(buffer_write_ulong),
ADD_TEST(buffer_write_struct), ADD_TEST(buffer_write_struct),
@@ -100,7 +100,7 @@ test_definition test_list[] = {
ADD_TEST(mem_write_only_flags), ADD_TEST(mem_write_only_flags),
ADD_TEST(mem_read_only_flags), ADD_TEST(mem_read_only_flags),
ADD_TEST(mem_copy_host_flags), ADD_TEST(mem_copy_host_flags),
NOT_IMPLEMENTED_TEST( mem_alloc_ref_flags ), ADD_TEST(mem_alloc_ref_flags),
ADD_TEST(array_info_size), ADD_TEST(array_info_size),
ADD_TEST(sub_buffers_read_write), ADD_TEST(sub_buffers_read_write),

View File

@@ -39,7 +39,7 @@ const char *mem_read_write_kernel_code =
"}\n"; "}\n";
const char *mem_read_kernel_code = const char *mem_read_kernel_code =
"__kernel void test_mem_read(__global int *src, __global int *dst)\n" "__kernel void test_mem_read(__global int *dst, __global int *src)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
@@ -68,13 +68,14 @@ static int verify_mem( int *outptr, int n )
} }
int test_mem_flags(cl_context context, cl_command_queue queue, int num_elements,
int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) cl_mem_flags flags, const char **kernel_program,
const char *kernel_name)
{ {
cl_mem buffers[1]; clMemWrapper buffers[2];
cl_int *inptr, *outptr; cl_int *inptr, *outptr;
cl_program program[1]; clProgramWrapper program;
cl_kernel kernel[1]; clKernelWrapper kernel;
size_t global_work_size[3]; size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP #ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3]; size_t local_work_size[3];
@@ -83,443 +84,177 @@ int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_com
int i; int i;
size_t min_alignment = get_min_alignment(context); size_t min_alignment = get_min_alignment(context);
bool test_read_only = (flags & CL_MEM_READ_ONLY) != 0;
bool test_write_only = (flags & CL_MEM_WRITE_ONLY) != 0;
bool copy_host_ptr = (flags & CL_MEM_COPY_HOST_PTR) != 0;
global_work_size[0] = (cl_uint)num_elements; global_work_size[0] = (cl_uint)num_elements;
inptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment); inptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
if (!inptr)
{
log_error(" unable to allocate %d bytes of memory\n",
(int)sizeof(cl_int) * num_elements);
return -1;
}
outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment); outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, NULL, &err); if (!outptr)
if (err != CL_SUCCESS) { {
log_error(" unable to allocate %d bytes of memory\n",
(int)sizeof(cl_int) * num_elements);
align_free((void *)inptr);
return -1;
}
for (i = 0; i < num_elements; i++) inptr[i] = i;
buffers[0] = clCreateBuffer(context, flags, sizeof(cl_int) * num_elements,
copy_host_ptr ? inptr : NULL, &err);
if (err != CL_SUCCESS)
{
print_error(err, "clCreateBuffer failed"); print_error(err, "clCreateBuffer failed");
align_free((void *)outptr); align_free((void *)outptr);
align_free((void *)inptr); align_free((void *)inptr);
return -1; return -1;
} }
if (!copy_host_ptr)
for (i=0; i<num_elements; i++)
inptr[i] = i;
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if (err != CL_SUCCESS) {
print_error( err, "clEnqueueWriteBuffer failed");
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
if (verify_mem(outptr, num_elements)){
log_error("buffer_MEM_READ_WRITE test failed\n");
err = -1;
}
else{
log_info("buffer_MEM_READ_WRITE test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return err;
} // end test_mem_read_write()
int test_mem_write_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{ {
cl_mem buffers[1]; err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
int *inptr, *outptr; sizeof(cl_int) * num_elements, (void *)inptr,
cl_program program[1]; 0, NULL, NULL);
cl_kernel kernel[1]; if (err != CL_SUCCESS)
size_t global_work_size[3]; {
#ifdef USE_LOCAL_WORK_GROUP print_error(err, "clEnqueueWriteBuffer failed");
size_t local_work_size[3]; align_free((void *)outptr);
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
align_free((void *)inptr); align_free((void *)inptr);
return -1; return -1;
} }
buffers[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * num_elements, NULL, &err); }
if (test_read_only)
{
/* The read only buffer for mem_read_only_flags should be created above
with the correct flags as in other tests. However to make later test
code simpler, the additional read_write buffer required is stored as
the first buffer */
buffers[1] = buffers[0];
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &err);
if (err != CL_SUCCESS) if (err != CL_SUCCESS)
{ {
print_error(err, " clCreateBuffer failed \n"); print_error(err, " clCreateBuffer failed \n");
align_free( (void *)outptr );
align_free((void *)inptr); align_free((void *)inptr);
align_free((void *)outptr);
return -1; return -1;
} }
for (i=0; i<num_elements; i++)
inptr[i] = i;
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if (err != CL_SUCCESS){
print_error( err, "clEnqueueWriteBuffer failed" );
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
} }
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_write_kernel_code, "test_mem_write" ); err = create_single_kernel_helper(context, &program, &kernel, 1,
kernel_program, kernel_name);
if (err){ if (err){
clReleaseMemObject( buffers[0] ); print_error(err, "creating kernel failed");
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return -1; return -1;
} }
#ifdef USE_LOCAL_WORK_GROUP #ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] ); err = get_max_common_work_group_size(context, kernel, global_work_size[0],
&local_work_size[0]);
test_error( err, "Unable to get work group size to use" ); test_error( err, "Unable to get work group size to use" );
#endif #endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] ); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&buffers[0]);
if (test_read_only && (err == CL_SUCCESS))
{
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&buffers[1]);
}
if ( err != CL_SUCCESS ){ if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed" ); print_error( err, "clSetKernelArg failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return -1; return -1;
} }
#ifdef USE_LOCAL_WORK_GROUP #ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL ); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
local_work_size, 0, NULL, NULL);
#else #else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL ); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL,
0, NULL, NULL);
#endif #endif
if (err != CL_SUCCESS){ if (err != CL_SUCCESS){
print_error( err, "clEnqueueNDRangeKernel failed" ); log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return -1; return -1;
} }
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL ); err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
sizeof(cl_int) * num_elements, (void *)outptr, 0,
NULL, NULL);
if ( err != CL_SUCCESS ){ if ( err != CL_SUCCESS ){
print_error( err, "Error reading array" ); print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return -1; return -1;
} }
if (!test_write_only)
{
if (verify_mem(outptr, num_elements))
{
log_error("test failed\n");
err = -1;
}
else
{
log_info("test passed\n");
err = 0;
}
}
// cleanup // cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr ); align_free( (void *)outptr );
align_free( (void *)inptr ); align_free( (void *)inptr );
return err; return err;
} // end test_mem_write() } // end test_mem_flags()
int test_mem_read_write_flags(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
return test_mem_flags(context, queue, num_elements, CL_MEM_READ_WRITE,
&mem_read_write_kernel_code, "test_mem_read_write");
}
int test_mem_write_only_flags(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
return test_mem_flags(context, queue, num_elements, CL_MEM_WRITE_ONLY,
&mem_write_kernel_code, "test_mem_write");
}
int test_mem_read_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) int test_mem_read_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{ {
cl_mem buffers[2]; return test_mem_flags(context, queue, num_elements, CL_MEM_READ_ONLY,
int *inptr, *outptr; &mem_read_kernel_code, "test_mem_read");
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
} }
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
align_free( (void *)inptr );
return -1;
}
buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed to create READ_ONLY array\n" );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
for (i=0; i<num_elements; i++)
inptr[i] = i;
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed to create MEM_ALLOC_GLOBAL_POOL array\n" );
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueWriteBuffer() failed");
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_kernel_code, "test_mem_read" );
if ( err ){
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&buffers[1] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArgs failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
if (verify_mem(outptr, num_elements)){
log_error( " CL_MEM_READ_ONLY test failed\n" );
err = -1;
}
else{
log_info( " CL_MEM_READ_ONLY test passed\n" );
err = 0;
}
// cleanup
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return err;
} // end test_mem_read()
int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{ {
cl_mem buffers[1]; return test_mem_flags(context, queue, num_elements,
int *ptr; CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE,
cl_program program[1]; &mem_read_write_kernel_code, "test_mem_read_write");
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
ptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! ptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
} }
for (i=0; i<num_elements; i++) int test_mem_alloc_ref_flags(cl_device_id deviceID, cl_context context,
ptr[i] = i; cl_command_queue queue, int num_elements)
{
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, (void *)ptr, &err); return test_mem_flags(context, queue, num_elements,
if (err != CL_SUCCESS){ CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
print_error(err, "clCreateBuffer failed for CL_MEM_COPY_HOST_PTR\n"); &mem_read_write_kernel_code, "test_mem_read_write");
align_free( (void *)ptr );
return -1;
} }
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if (err != CL_SUCCESS){
log_error("clSetKernelArgs failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)ptr, 0, NULL, NULL );
if (err != CL_SUCCESS){
log_error("CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_CONSTANT_POOL failed.\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
if ( verify_mem( ptr, num_elements ) ){
log_error("CL_MEM_COPY_HOST_PTR test failed\n");
err = -1;
}
else{
log_info("CL_MEM_COPY_HOST_PTR test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return err;
} // end test_mem_copy_host_flags()

View File

@@ -21,6 +21,7 @@
#include <time.h> #include <time.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <CL/cl_half.h>
#include "procs.h" #include "procs.h"
@@ -325,6 +326,7 @@ static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffe
static const char *buffer_read_half_kernel_code[] = { static const char *buffer_read_half_kernel_code[] = {
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half(__global half *dst)\n" "__kernel void test_buffer_read_half(__global half *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
@@ -332,6 +334,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n", "}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half2(__global half2 *dst)\n" "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
@@ -339,6 +342,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n", "}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half4(__global half4 *dst)\n" "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
@@ -346,6 +350,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n", "}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half8(__global half8 *dst)\n" "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
@@ -353,12 +358,14 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n", "}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half16(__global half16 *dst)\n" "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = (half)119;\n" " dst[tid] = (half)119;\n"
"}\n" }; "}\n"
};
static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" }; static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
@@ -557,11 +564,11 @@ static int verify_read_float( void *ptr, int n )
static int verify_read_half( void *ptr, int n ) static int verify_read_half( void *ptr, int n )
{ {
int i; int i;
float *outptr = (float *)ptr; // FIXME: should this be cl_half_float? cl_half *outptr = (cl_half *)ptr;
for ( i = 0; i < n / 2; i++ ){ for (i = 0; i < n; i++)
if ( outptr[i] != TEST_PRIME_HALF ) {
return -1; if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
} }
return 0; return 0;
@@ -1099,8 +1106,10 @@ DECLARE_READ_TEST(float, cl_float)
DECLARE_READ_TEST(char, cl_char) DECLARE_READ_TEST(char, cl_char)
DECLARE_READ_TEST(uchar, cl_uchar) DECLARE_READ_TEST(uchar, cl_uchar)
int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) int test_buffer_read_half(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{ {
PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
buffer_read_half_kernel_code, half_kernel_name, verify_read_half ); buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
} }
@@ -1141,76 +1150,6 @@ DECLARE_BARRIER_TEST(char, cl_char)
DECLARE_BARRIER_TEST(uchar, cl_uchar) DECLARE_BARRIER_TEST(uchar, cl_uchar)
DECLARE_BARRIER_TEST(float, cl_float) DECLARE_BARRIER_TEST(float, cl_float)
/*
int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
{
cl_mem buffers[1];
float *outptr;
cl_program program[1];
cl_kernel kernel[1];
void *values[1];
size_t sizes[1] = { sizeof(cl_buffer) };
uint threads[1];
int err;
int i;
size_t ptrSize; // sizeof(half)
ptrSize = sizeof(cl_float)/2;
outptr = (float *)malloc(ptrSize * num_elements);
buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSize * num_elements, NULL);
if( !buffers[0] ){
log_error("clCreateBuffer failed\n");
return -1;
}
err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
if( err ){
log_error( " Error creating program for half\n" );
clReleaseMemObject(buffers[0]);
free( (void *)outptr );
return -1;
}
values[0] = buffers[0];
err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
if( err != CL_SUCCESS ){
log_error("clSetKernelArgs failed\n");
return -1;
}
global_work_size[0] = (cl_uint)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clEnqueueReadBuffer failed: %d\n", err);
return -1;
}
if( verify_read_half( outptr, num_elements >> 1 ) ){
log_error( "buffer_READ half test failed\n" );
err = -1;
}
else{
log_info( "buffer_READ half test passed\n" );
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
free( (void *)outptr );
return err;
} // end test_buffer_half_read()
*/
int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
cl_mem buffers[1]; cl_mem buffers[1];

View File

@@ -315,40 +315,51 @@ static const char *float_kernel_name[] = { "test_buffer_write_float", "test_buff
const char *buffer_write_half_kernel_code[] = { const char *buffer_write_half_kernel_code[] = {
"__kernel void test_buffer_write_half(__global half *src, __global float *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half(__global half *src, __global half "
"*dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n", "}\n",
"__kernel void test_buffer_write_half2(__global half2 *src, __global float2 *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half2(__global half2 *src, __global half2 "
"*dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half2( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n", "}\n",
"__kernel void test_buffer_write_half4(__global half4 *src, __global float4 *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half4(__global half4 *src, __global half4 "
"*dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half4( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n", "}\n",
"__kernel void test_buffer_write_half8(__global half8 *src, __global float8 *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half8(__global half8 *src, __global half8 "
"*dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half8( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n", "}\n",
"__kernel void test_buffer_write_half16(__global half16 *src, __global float16 *dst)\n" "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half16(__global half16 *src, __global "
"half16 *dst)\n"
"{\n" "{\n"
" int tid = get_global_id(0);\n" " int tid = get_global_id(0);\n"
"\n" "\n"
" dst[tid] = vload_half16( tid * 2, src );\n" " dst[tid] = src[tid];\n"
"}\n" }; "}\n"
};
static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" }; static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" };
@@ -1398,6 +1409,7 @@ int test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_comma
int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{ {
PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
float *inptr[5]; float *inptr[5];
size_t ptrSizes[5]; size_t ptrSizes[5];
int i, err; int i, err;
@@ -1422,8 +1434,10 @@ int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_comman
inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d ); inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
} }
err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, (void**)inptr, err = test_buffer_write(deviceID, context, queue, num_elements,
buffer_write_half_kernel_code, half_kernel_name, foo, d ); sizeof(cl_half), (char *)"half", 5, (void **)inptr,
buffer_write_half_kernel_code, half_kernel_name,
foo, d);
for ( i = 0; i < 5; i++ ){ for ( i = 0; i < 5; i++ ){
align_free( (void *)inptr[i] ); align_free( (void *)inptr[i] );