Reimplement buffer tests (#1007)

* Reimplement buffer tests

Reintegrated and fixed test code for buffer tests buffer_read_half and
buffer_write_half tests.

Added mem_alloc_ref_flags test code, as was previously non-existent,
to test CL_MEM_ALLOC_HOST_PTR. This flag was otherwise untested and
as similar tests within the suite are used to test other cl_mem_flags
it has been assumed that this was the purpose of the test.

Fixes #439

Change-Id: I5accf986be7436d09377d0bfd7afd5de2235c329
Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_read_write_flags to a common function

Code under mem_*_flags tests have a lot of duplication, this is
the first step of moving test code to a common function.

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_write_only_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_read_only_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* move mem_copy_host_flags test code to a common function

Code under mem_*_flags tests have a lot of duplication, moved
mem_copy_host_flags code and rearranged function where appropriate

mem_ref_alloc_flags test also uses common function.

Contributes #439

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* Remove unused NOT_IMPLEMENTED_TEST macro

This define is not in use anymore, since tests have been
reimplemented in #439. Tests should be working and implemented
or not registered.

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>
This commit is contained in:
ellnor01
2020-11-06 11:33:36 +00:00
committed by GitHub
parent e8c55e59bc
commit 63f01be181
7 changed files with 260 additions and 586 deletions

View File

@@ -829,17 +829,8 @@ test_status callSingleTestFunction(test_definition test,
}
else
{
int ret = test.func(
deviceToUse, context, queue,
numElementsToUse); // test_threaded_function( ptr_basefn_list[i],
// group, context, num_elements);
if (ret == TEST_NOT_IMPLEMENTED)
{
/* Tests can also let us know they're not implemented yet */
log_info("%s test currently not implemented\n", test.name);
status = TEST_SKIP;
}
else if (ret == TEST_SKIPPED_ITSELF)
int ret = test.func(deviceToUse, context, queue, numElementsToUse);
if (ret == TEST_SKIPPED_ITSELF)
{
/* Tests can also let us know they're not supported by the
* implementation */

View File

@@ -64,10 +64,6 @@ Version get_device_cl_version(cl_device_id device);
{ \
test_##fn, #fn, ver \
}
#define NOT_IMPLEMENTED_TEST(fn) \
{ \
NULL, #fn, Version(0, 0) \
}
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

View File

@@ -22,7 +22,6 @@
#include <CL/opencl.h>
#endif
#define TEST_NOT_IMPLEMENTED -99
#define TEST_SKIPPED_ITSELF -100
typedef int (*basefn)(cl_device_id deviceID, cl_context context,

View File

@@ -22,104 +22,104 @@
#include "harness/testHarness.h"
test_definition test_list[] = {
ADD_TEST( buffer_read_async_int ),
ADD_TEST( buffer_read_async_uint ),
ADD_TEST( buffer_read_async_long ),
ADD_TEST( buffer_read_async_ulong ),
ADD_TEST( buffer_read_async_short ),
ADD_TEST( buffer_read_async_ushort ),
ADD_TEST( buffer_read_async_char ),
ADD_TEST( buffer_read_async_uchar ),
ADD_TEST( buffer_read_async_float ),
ADD_TEST( buffer_read_array_barrier_int ),
ADD_TEST( buffer_read_array_barrier_uint ),
ADD_TEST( buffer_read_array_barrier_long ),
ADD_TEST( buffer_read_array_barrier_ulong ),
ADD_TEST( buffer_read_array_barrier_short ),
ADD_TEST( buffer_read_array_barrier_ushort ),
ADD_TEST( buffer_read_array_barrier_char ),
ADD_TEST( buffer_read_array_barrier_uchar ),
ADD_TEST( buffer_read_array_barrier_float ),
ADD_TEST( buffer_read_int ),
ADD_TEST( buffer_read_uint ),
ADD_TEST( buffer_read_long ),
ADD_TEST( buffer_read_ulong ),
ADD_TEST( buffer_read_short ),
ADD_TEST( buffer_read_ushort ),
ADD_TEST( buffer_read_float ),
NOT_IMPLEMENTED_TEST( buffer_read_half ),
ADD_TEST( buffer_read_char ),
ADD_TEST( buffer_read_uchar ),
ADD_TEST( buffer_read_struct ),
ADD_TEST( buffer_read_random_size ),
ADD_TEST( buffer_map_read_int ),
ADD_TEST( buffer_map_read_uint ),
ADD_TEST( buffer_map_read_long ),
ADD_TEST( buffer_map_read_ulong ),
ADD_TEST( buffer_map_read_short ),
ADD_TEST( buffer_map_read_ushort ),
ADD_TEST( buffer_map_read_char ),
ADD_TEST( buffer_map_read_uchar ),
ADD_TEST( buffer_map_read_float ),
ADD_TEST( buffer_map_read_struct ),
ADD_TEST(buffer_read_async_int),
ADD_TEST(buffer_read_async_uint),
ADD_TEST(buffer_read_async_long),
ADD_TEST(buffer_read_async_ulong),
ADD_TEST(buffer_read_async_short),
ADD_TEST(buffer_read_async_ushort),
ADD_TEST(buffer_read_async_char),
ADD_TEST(buffer_read_async_uchar),
ADD_TEST(buffer_read_async_float),
ADD_TEST(buffer_read_array_barrier_int),
ADD_TEST(buffer_read_array_barrier_uint),
ADD_TEST(buffer_read_array_barrier_long),
ADD_TEST(buffer_read_array_barrier_ulong),
ADD_TEST(buffer_read_array_barrier_short),
ADD_TEST(buffer_read_array_barrier_ushort),
ADD_TEST(buffer_read_array_barrier_char),
ADD_TEST(buffer_read_array_barrier_uchar),
ADD_TEST(buffer_read_array_barrier_float),
ADD_TEST(buffer_read_int),
ADD_TEST(buffer_read_uint),
ADD_TEST(buffer_read_long),
ADD_TEST(buffer_read_ulong),
ADD_TEST(buffer_read_short),
ADD_TEST(buffer_read_ushort),
ADD_TEST(buffer_read_float),
ADD_TEST(buffer_read_half),
ADD_TEST(buffer_read_char),
ADD_TEST(buffer_read_uchar),
ADD_TEST(buffer_read_struct),
ADD_TEST(buffer_read_random_size),
ADD_TEST(buffer_map_read_int),
ADD_TEST(buffer_map_read_uint),
ADD_TEST(buffer_map_read_long),
ADD_TEST(buffer_map_read_ulong),
ADD_TEST(buffer_map_read_short),
ADD_TEST(buffer_map_read_ushort),
ADD_TEST(buffer_map_read_char),
ADD_TEST(buffer_map_read_uchar),
ADD_TEST(buffer_map_read_float),
ADD_TEST(buffer_map_read_struct),
ADD_TEST( buffer_map_write_int ),
ADD_TEST( buffer_map_write_uint ),
ADD_TEST( buffer_map_write_long ),
ADD_TEST( buffer_map_write_ulong ),
ADD_TEST( buffer_map_write_short ),
ADD_TEST( buffer_map_write_ushort ),
ADD_TEST( buffer_map_write_char ),
ADD_TEST( buffer_map_write_uchar ),
ADD_TEST( buffer_map_write_float ),
ADD_TEST( buffer_map_write_struct ),
ADD_TEST(buffer_map_write_int),
ADD_TEST(buffer_map_write_uint),
ADD_TEST(buffer_map_write_long),
ADD_TEST(buffer_map_write_ulong),
ADD_TEST(buffer_map_write_short),
ADD_TEST(buffer_map_write_ushort),
ADD_TEST(buffer_map_write_char),
ADD_TEST(buffer_map_write_uchar),
ADD_TEST(buffer_map_write_float),
ADD_TEST(buffer_map_write_struct),
ADD_TEST( buffer_write_int ),
ADD_TEST( buffer_write_uint ),
ADD_TEST( buffer_write_short ),
ADD_TEST( buffer_write_ushort ),
ADD_TEST( buffer_write_char ),
ADD_TEST( buffer_write_uchar ),
ADD_TEST( buffer_write_float ),
NOT_IMPLEMENTED_TEST( buffer_write_half ),
ADD_TEST( buffer_write_long ),
ADD_TEST( buffer_write_ulong ),
ADD_TEST( buffer_write_struct ),
ADD_TEST( buffer_write_async_int ),
ADD_TEST( buffer_write_async_uint ),
ADD_TEST( buffer_write_async_short ),
ADD_TEST( buffer_write_async_ushort ),
ADD_TEST( buffer_write_async_char ),
ADD_TEST( buffer_write_async_uchar ),
ADD_TEST( buffer_write_async_float ),
ADD_TEST( buffer_write_async_long ),
ADD_TEST( buffer_write_async_ulong ),
ADD_TEST( buffer_copy ),
ADD_TEST( buffer_partial_copy ),
ADD_TEST( mem_read_write_flags ),
ADD_TEST( mem_write_only_flags ),
ADD_TEST( mem_read_only_flags ),
ADD_TEST( mem_copy_host_flags ),
NOT_IMPLEMENTED_TEST( mem_alloc_ref_flags ),
ADD_TEST( array_info_size ),
ADD_TEST(buffer_write_int),
ADD_TEST(buffer_write_uint),
ADD_TEST(buffer_write_short),
ADD_TEST(buffer_write_ushort),
ADD_TEST(buffer_write_char),
ADD_TEST(buffer_write_uchar),
ADD_TEST(buffer_write_float),
ADD_TEST(buffer_write_half),
ADD_TEST(buffer_write_long),
ADD_TEST(buffer_write_ulong),
ADD_TEST(buffer_write_struct),
ADD_TEST(buffer_write_async_int),
ADD_TEST(buffer_write_async_uint),
ADD_TEST(buffer_write_async_short),
ADD_TEST(buffer_write_async_ushort),
ADD_TEST(buffer_write_async_char),
ADD_TEST(buffer_write_async_uchar),
ADD_TEST(buffer_write_async_float),
ADD_TEST(buffer_write_async_long),
ADD_TEST(buffer_write_async_ulong),
ADD_TEST(buffer_copy),
ADD_TEST(buffer_partial_copy),
ADD_TEST(mem_read_write_flags),
ADD_TEST(mem_write_only_flags),
ADD_TEST(mem_read_only_flags),
ADD_TEST(mem_copy_host_flags),
ADD_TEST(mem_alloc_ref_flags),
ADD_TEST(array_info_size),
ADD_TEST( sub_buffers_read_write ),
ADD_TEST( sub_buffers_read_write_dual_devices ),
ADD_TEST( sub_buffers_overlapping ),
ADD_TEST(sub_buffers_read_write),
ADD_TEST(sub_buffers_read_write_dual_devices),
ADD_TEST(sub_buffers_overlapping),
ADD_TEST( buffer_fill_int ),
ADD_TEST( buffer_fill_uint ),
ADD_TEST( buffer_fill_short ),
ADD_TEST( buffer_fill_ushort ),
ADD_TEST( buffer_fill_char ),
ADD_TEST( buffer_fill_uchar ),
ADD_TEST( buffer_fill_long ),
ADD_TEST( buffer_fill_ulong ),
ADD_TEST( buffer_fill_float ),
ADD_TEST( buffer_fill_struct ),
ADD_TEST(buffer_fill_int),
ADD_TEST(buffer_fill_uint),
ADD_TEST(buffer_fill_short),
ADD_TEST(buffer_fill_ushort),
ADD_TEST(buffer_fill_char),
ADD_TEST(buffer_fill_uchar),
ADD_TEST(buffer_fill_long),
ADD_TEST(buffer_fill_ulong),
ADD_TEST(buffer_fill_float),
ADD_TEST(buffer_fill_struct),
ADD_TEST( buffer_migrate ),
ADD_TEST( image_migrate ),
ADD_TEST(buffer_migrate),
ADD_TEST(image_migrate),
};
const int test_num = ARRAY_SIZE( test_list );

View File

@@ -39,12 +39,12 @@ const char *mem_read_write_kernel_code =
"}\n";
const char *mem_read_kernel_code =
"__kernel void test_mem_read(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid]+1;\n"
"}\n";
"__kernel void test_mem_read(__global int *dst, __global int *src)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid]+1;\n"
"}\n";
const char *mem_write_kernel_code =
"__kernel void test_mem_write(__global int *dst)\n"
@@ -68,13 +68,14 @@ static int verify_mem( int *outptr, int n )
}
int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
int test_mem_flags(cl_context context, cl_command_queue queue, int num_elements,
cl_mem_flags flags, const char **kernel_program,
const char *kernel_name)
{
cl_mem buffers[1];
clMemWrapper buffers[2];
cl_int *inptr, *outptr;
cl_program program[1];
cl_kernel kernel[1];
clProgramWrapper program;
clKernelWrapper kernel;
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
@@ -83,443 +84,177 @@ int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_com
int i;
size_t min_alignment = get_min_alignment(context);
bool test_read_only = (flags & CL_MEM_READ_ONLY) != 0;
bool test_write_only = (flags & CL_MEM_WRITE_ONLY) != 0;
bool copy_host_ptr = (flags & CL_MEM_COPY_HOST_PTR) != 0;
global_work_size[0] = (cl_uint)num_elements;
inptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
if (!inptr)
{
log_error(" unable to allocate %d bytes of memory\n",
(int)sizeof(cl_int) * num_elements);
return -1;
}
outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, NULL, &err);
if (err != CL_SUCCESS) {
print_error( err, "clCreateBuffer failed");
align_free( (void *)outptr );
align_free( (void *)inptr );
if (!outptr)
{
log_error(" unable to allocate %d bytes of memory\n",
(int)sizeof(cl_int) * num_elements);
align_free((void *)inptr);
return -1;
}
for (i=0; i<num_elements; i++)
inptr[i] = i;
for (i = 0; i < num_elements; i++) inptr[i] = i;
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if (err != CL_SUCCESS) {
print_error( err, "clEnqueueWriteBuffer failed");
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
buffers[0] = clCreateBuffer(context, flags, sizeof(cl_int) * num_elements,
copy_host_ptr ? inptr : NULL, &err);
if (err != CL_SUCCESS)
{
print_error(err, "clCreateBuffer failed");
align_free((void *)outptr);
align_free((void *)inptr);
return -1;
}
if (!copy_host_ptr)
{
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
sizeof(cl_int) * num_elements, (void *)inptr,
0, NULL, NULL);
if (err != CL_SUCCESS)
{
print_error(err, "clEnqueueWriteBuffer failed");
align_free((void *)outptr);
align_free((void *)inptr);
return -1;
}
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
if (test_read_only)
{
/* The read only buffer for mem_read_only_flags should be created above
with the correct flags as in other tests. However to make later test
code simpler, the additional read_write buffer required is stored as
the first buffer */
buffers[1] = buffers[0];
buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &err);
if (err != CL_SUCCESS)
{
print_error(err, " clCreateBuffer failed \n");
align_free((void *)inptr);
align_free((void *)outptr);
return -1;
}
}
err = create_single_kernel_helper(context, &program, &kernel, 1,
kernel_program, kernel_name);
if (err){
clReleaseMemObject( buffers[0] );
print_error(err, "creating kernel failed");
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
err = get_max_common_work_group_size(context, kernel, global_work_size[0],
&local_work_size[0]);
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&buffers[0]);
if (test_read_only && (err == CL_SUCCESS))
{
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&buffers[1]);
}
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
local_work_size, 0, NULL, NULL);
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL,
0, NULL, NULL);
#endif
if (err != CL_SUCCESS){
log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
sizeof(cl_int) * num_elements, (void *)outptr, 0,
NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
if (verify_mem(outptr, num_elements)){
log_error("buffer_MEM_READ_WRITE test failed\n");
err = -1;
}
else{
log_info("buffer_MEM_READ_WRITE test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return err;
} // end test_mem_read_write()
int test_mem_write_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem buffers[1];
int *inptr, *outptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
align_free( (void *)inptr );
return -1;
}
buffers[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
if (err != CL_SUCCESS)
if (!test_write_only)
{
print_error(err, "clCreateBuffer failed\n");
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
for (i=0; i<num_elements; i++)
inptr[i] = i;
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if (err != CL_SUCCESS){
print_error( err, "clEnqueueWriteBuffer failed" );
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_write_kernel_code, "test_mem_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "Error reading array" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
if (verify_mem(outptr, num_elements))
{
log_error("test failed\n");
err = -1;
}
else
{
log_info("test passed\n");
err = 0;
}
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)outptr );
align_free( (void *)inptr );
return err;
} // end test_mem_write()
} // end test_mem_flags()
int test_mem_read_write_flags(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
return test_mem_flags(context, queue, num_elements, CL_MEM_READ_WRITE,
&mem_read_write_kernel_code, "test_mem_read_write");
}
int test_mem_write_only_flags(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
return test_mem_flags(context, queue, num_elements, CL_MEM_WRITE_ONLY,
&mem_write_kernel_code, "test_mem_write");
}
int test_mem_read_only_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem buffers[2];
int *inptr, *outptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! inptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! outptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
align_free( (void *)inptr );
return -1;
}
buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed to create READ_ONLY array\n" );
align_free( (void *)outptr );
align_free( (void *)inptr );
return -1;
}
for (i=0; i<num_elements; i++)
inptr[i] = i;
buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed to create MEM_ALLOC_GLOBAL_POOL array\n" );
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueWriteBuffer() failed");
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_kernel_code, "test_mem_read" );
if ( err ){
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&buffers[1] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArgs failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return -1;
}
if (verify_mem(outptr, num_elements)){
log_error( " CL_MEM_READ_ONLY test failed\n" );
err = -1;
}
else{
log_info( " CL_MEM_READ_ONLY test passed\n" );
err = 0;
}
// cleanup
clReleaseMemObject( buffers[1]) ;
clReleaseMemObject( buffers[0]) ;
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)inptr );
align_free( (void *)outptr );
return err;
} // end test_mem_read()
return test_mem_flags(context, queue, num_elements, CL_MEM_READ_ONLY,
&mem_read_kernel_code, "test_mem_read");
}
int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
cl_mem buffers[1];
int *ptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
ptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
if ( ! ptr ){
log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
return -1;
}
for (i=0; i<num_elements; i++)
ptr[i] = i;
buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, (void *)ptr, &err);
if (err != CL_SUCCESS){
print_error(err, "clCreateBuffer failed for CL_MEM_COPY_HOST_PTR\n");
align_free( (void *)ptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
if (err){
clReleaseMemObject( buffers[0] );
align_free( (void *)ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
#endif
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if (err != CL_SUCCESS){
log_error("clSetKernelArgs failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if (err != CL_SUCCESS){
log_error("clEnqueueNDRangeKernel failed\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)ptr, 0, NULL, NULL );
if (err != CL_SUCCESS){
log_error("CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_CONSTANT_POOL failed.\n");
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return -1;
}
if ( verify_mem( ptr, num_elements ) ){
log_error("CL_MEM_COPY_HOST_PTR test failed\n");
err = -1;
}
else{
log_info("CL_MEM_COPY_HOST_PTR test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( (void *)ptr );
return err;
} // end test_mem_copy_host_flags()
return test_mem_flags(context, queue, num_elements,
CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE,
&mem_read_write_kernel_code, "test_mem_read_write");
}
int test_mem_alloc_ref_flags(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
return test_mem_flags(context, queue, num_elements,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
&mem_read_write_kernel_code, "test_mem_read_write");
}

View File

@@ -21,6 +21,7 @@
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <CL/cl_half.h>
#include "procs.h"
@@ -325,6 +326,7 @@ static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffe
static const char *buffer_read_half_kernel_code[] = {
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half(__global half *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
@@ -332,6 +334,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n"
"}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half2(__global half2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
@@ -339,6 +342,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n"
"}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half4(__global half4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
@@ -346,6 +350,7 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n"
"}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half8(__global half8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
@@ -353,12 +358,14 @@ static const char *buffer_read_half_kernel_code[] = {
" dst[tid] = (half)119;\n"
"}\n",
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_read_half16(__global half16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n" };
"}\n"
};
static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
@@ -557,11 +564,11 @@ static int verify_read_float( void *ptr, int n )
static int verify_read_half( void *ptr, int n )
{
int i;
float *outptr = (float *)ptr; // FIXME: should this be cl_half_float?
cl_half *outptr = (cl_half *)ptr;
for ( i = 0; i < n / 2; i++ ){
if ( outptr[i] != TEST_PRIME_HALF )
return -1;
for (i = 0; i < n; i++)
{
if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
}
return 0;
@@ -1099,8 +1106,10 @@ DECLARE_READ_TEST(float, cl_float)
DECLARE_READ_TEST(char, cl_char)
DECLARE_READ_TEST(uchar, cl_uchar)
int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
int test_buffer_read_half(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
}
@@ -1141,76 +1150,6 @@ DECLARE_BARRIER_TEST(char, cl_char)
DECLARE_BARRIER_TEST(uchar, cl_uchar)
DECLARE_BARRIER_TEST(float, cl_float)
/*
int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
{
cl_mem buffers[1];
float *outptr;
cl_program program[1];
cl_kernel kernel[1];
void *values[1];
size_t sizes[1] = { sizeof(cl_buffer) };
uint threads[1];
int err;
int i;
size_t ptrSize; // sizeof(half)
ptrSize = sizeof(cl_float)/2;
outptr = (float *)malloc(ptrSize * num_elements);
buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSize * num_elements, NULL);
if( !buffers[0] ){
log_error("clCreateBuffer failed\n");
return -1;
}
err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
if( err ){
log_error( " Error creating program for half\n" );
clReleaseMemObject(buffers[0]);
free( (void *)outptr );
return -1;
}
values[0] = buffers[0];
err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
if( err != CL_SUCCESS ){
log_error("clSetKernelArgs failed\n");
return -1;
}
global_work_size[0] = (cl_uint)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clEnqueueReadBuffer failed: %d\n", err);
return -1;
}
if( verify_read_half( outptr, num_elements >> 1 ) ){
log_error( "buffer_READ half test failed\n" );
err = -1;
}
else{
log_info( "buffer_READ half test passed\n" );
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
free( (void *)outptr );
return err;
} // end test_buffer_half_read()
*/
int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem buffers[1];

View File

@@ -315,40 +315,51 @@ static const char *float_kernel_name[] = { "test_buffer_write_float", "test_buff
const char *buffer_write_half_kernel_code[] = {
"__kernel void test_buffer_write_half(__global half *src, __global float *dst)\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half(__global half *src, __global half "
"*dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = vload_half( tid * 2, src );\n"
" dst[tid] = src[tid];\n"
"}\n",
"__kernel void test_buffer_write_half2(__global half2 *src, __global float2 *dst)\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half2(__global half2 *src, __global half2 "
"*dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = vload_half2( tid * 2, src );\n"
" dst[tid] = src[tid];\n"
"}\n",
"__kernel void test_buffer_write_half4(__global half4 *src, __global float4 *dst)\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half4(__global half4 *src, __global half4 "
"*dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = vload_half4( tid * 2, src );\n"
" dst[tid] = src[tid];\n"
"}\n",
"__kernel void test_buffer_write_half8(__global half8 *src, __global float8 *dst)\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half8(__global half8 *src, __global half8 "
"*dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = vload_half8( tid * 2, src );\n"
" dst[tid] = src[tid];\n"
"}\n",
"__kernel void test_buffer_write_half16(__global half16 *src, __global float16 *dst)\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"__kernel void test_buffer_write_half16(__global half16 *src, __global "
"half16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = vload_half16( tid * 2, src );\n"
"}\n" };
" dst[tid] = src[tid];\n"
"}\n"
};
static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" };
@@ -1398,6 +1409,7 @@ int test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_comma
int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
float *inptr[5];
size_t ptrSizes[5];
int i, err;
@@ -1422,8 +1434,10 @@ int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_comman
inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
}
err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, (void**)inptr,
buffer_write_half_kernel_code, half_kernel_name, foo, d );
err = test_buffer_write(deviceID, context, queue, num_elements,
sizeof(cl_half), (char *)"half", 5, (void **)inptr,
buffer_write_half_kernel_code, half_kernel_name,
foo, d);
for ( i = 0; i < 5; i++ ){
align_free( (void *)inptr[i] );