Files
OpenCL-CTS/test_conformance/buffers/test_buffer_read.c
Kevin Petit d8733efc0f Synchronise with Khronos-private Gitlab branch
The maintenance of the conformance tests is moving to Github.

This commit contains all the changes that have been done in
Gitlab since the first public release of the conformance tests.

Signed-off-by: Kevin Petit <kevin.petit@arm.com>
2019-03-05 16:23:49 +00:00

1464 lines
47 KiB
C

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
//#include <stdbool.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
//#define HK_DO_NOT_RUN_SHORT_ASYNC 1
//#define HK_DO_NOT_RUN_USHORT_ASYNC 1
//#define HK_DO_NOT_RUN_CHAR_ASYNC 1
//#define HK_DO_NOT_RUN_UCHAR_ASYNC 1
#define TEST_PRIME_INT ((1<<16)+1)
#define TEST_PRIME_UINT ((1U<<16)+1U)
#define TEST_PRIME_LONG ((1LL<<32)+1LL)
#define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
#define TEST_PRIME_SHORT ((1S<<8)+1S)
#define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
#define TEST_PRIME_HALF 119.f
#define TEST_BOOL true
#define TEST_PRIME_CHAR 0x77
#ifndef ulong
typedef unsigned long ulong;
#endif
#ifndef uchar
typedef unsigned char uchar;
#endif
#ifndef TestStruct
typedef struct{
int a;
float b;
} TestStruct;
#endif
//--- the code for the kernel executables
static const char *buffer_read_int_kernel_code[] = {
"__kernel void test_buffer_read_int(__global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_buffer_read_int2(__global int2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_buffer_read_int4(__global int4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_buffer_read_int8(__global int8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n",
"__kernel void test_buffer_read_int16(__global int16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1<<16)+1);\n"
"}\n" };
static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
static const char *buffer_read_uint_kernel_code[] = {
"__kernel void test_buffer_read_uint(__global uint *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n",
"__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1U<<16)+1U);\n"
"}\n" };
static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
static const char *buffer_read_long_kernel_code[] = {
"__kernel void test_buffer_read_long(__global long *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_buffer_read_long2(__global long2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_buffer_read_long4(__global long4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_buffer_read_long8(__global long8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n",
"__kernel void test_buffer_read_long16(__global long16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1L<<32)+1L);\n"
"}\n" };
static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
static const char *buffer_read_ulong_kernel_code[] = {
"__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n",
"__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = ((1UL<<32)+1UL);\n"
"}\n" };
static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
static const char *buffer_read_short_kernel_code[] = {
"__kernel void test_buffer_read_short(__global short *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_short2(__global short2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_short4(__global short4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_short8(__global short8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_short16(__global short16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (short)((1<<8)+1);\n"
"}\n" };
static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
static const char *buffer_read_ushort_kernel_code[] = {
"__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n",
"__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (ushort)((1<<8)+1);\n"
"}\n" };
static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
static const char *buffer_read_float_kernel_code[] = {
"__kernel void test_buffer_read_float(__global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_buffer_read_float2(__global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_buffer_read_float4(__global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_buffer_read_float8(__global float8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n",
"__kernel void test_buffer_read_float16(__global float16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)3.40282346638528860e+38;\n"
"}\n" };
static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
static const char *buffer_read_half_kernel_code[] = {
"__kernel void test_buffer_read_half(__global half *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n",
"__kernel void test_buffer_read_half2(__global half2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n",
"__kernel void test_buffer_read_half4(__global half4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n",
"__kernel void test_buffer_read_half8(__global half8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n",
"__kernel void test_buffer_read_half16(__global half16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (half)119;\n"
"}\n" };
static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
static const char *buffer_read_char_kernel_code[] = {
"__kernel void test_buffer_read_char(__global char *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_buffer_read_char2(__global char2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_buffer_read_char4(__global char4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_buffer_read_char8(__global char8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n",
"__kernel void test_buffer_read_char16(__global char16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (char)'w';\n"
"}\n" };
static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
static const char *buffer_read_uchar_kernel_code[] = {
"__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = 'w';\n"
"}\n",
"__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n",
"__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (uchar)'w';\n"
"}\n" };
static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
static const char *buffer_read_struct_kernel_code =
"typedef struct{\n"
"int a;\n"
"float b;\n"
"} TestStruct;\n"
"__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid].a = ((1<<16)+1);\n"
" dst[tid].b = (float)3.40282346638528860e+38;\n"
"}\n";
//--- the verify functions
static int verify_read_int(void *ptr, int n)
{
int i;
cl_int *outptr = (cl_int *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_INT )
return -1;
}
return 0;
}
static int verify_read_uint(void *ptr, int n)
{
int i;
cl_uint *outptr = (cl_uint *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_UINT )
return -1;
}
return 0;
}
static int verify_read_long(void *ptr, int n)
{
int i;
cl_long *outptr = (cl_long *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_LONG )
return -1;
}
return 0;
}
static int verify_read_ulong(void *ptr, int n)
{
int i;
cl_ulong *outptr = (cl_ulong *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_ULONG )
return -1;
}
return 0;
}
static int verify_read_short(void *ptr, int n)
{
int i;
cl_short *outptr = (cl_short *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != (cl_short)((1<<8)+1) )
return -1;
}
return 0;
}
static int verify_read_ushort(void *ptr, int n)
{
int i;
cl_ushort *outptr = (cl_ushort *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != (cl_ushort)((1<<8)+1) )
return -1;
}
return 0;
}
static int verify_read_float( void *ptr, int n )
{
int i;
cl_float *outptr = (cl_float *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_FLOAT )
return -1;
}
return 0;
}
static int verify_read_half( void *ptr, int n )
{
int i;
float *outptr = (float *)ptr; // FIXME: should this be cl_half_float?
for ( i = 0; i < n / 2; i++ ){
if ( outptr[i] != TEST_PRIME_HALF )
return -1;
}
return 0;
}
static int verify_read_char(void *ptr, int n)
{
int i;
cl_char *outptr = (cl_char *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_CHAR )
return -1;
}
return 0;
}
static int verify_read_uchar(void *ptr, int n)
{
int i;
cl_uchar *outptr = (cl_uchar *)ptr;
for (i=0; i<n; i++){
if ( outptr[i] != TEST_PRIME_CHAR )
return -1;
}
return 0;
}
static int verify_read_struct(TestStruct *outptr, int n)
{
int i;
for (i=0; i<n; i++)
{
if ( ( outptr[i].a != TEST_PRIME_INT ) ||
( outptr[i].b != TEST_PRIME_FLOAT ) )
return -1;
}
return 0;
}
//----- the test functions
int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
{
cl_mem buffers[5];
void *outptr[5];
void *inptr[5];
cl_program program[5];
cl_kernel kernel[5];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t ptrSizes[5];
int src_flag_id;
int total_errors = 0;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
//skip devices that don't support long
if (! gHasLong && strstr(type,"long") )
{
log_info( "Device does not support 64-bit integers. Skipping test.\n" );
return CL_SUCCESS;
}
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
for ( i = 0; i < loops; i++ ){
outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
return -1;
}
inptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
if ( ! inptr[i] ){
log_error( " unable to allocate %d bytes for inptr\n", (int)( ptrSizes[i] * num_elements ) );
return -1;
}
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
else
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed\n" );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
if ( err ){
log_error("Creating program for %s\n", type);
print_error(err, " Error creating program " );
clReleaseMemObject(buffers[i]);
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
if (fn(outptr[i], num_elements*(1<<i))){
log_error( " %s%d test failed\n", type, 1<<i );
total_errors++;
}
else{
log_info( " %s%d test passed\n", type, 1<<i );
}
err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
if (fn(inptr[i], num_elements*(1<<i))){
log_error( " %s%d test failed in-place readback\n", type, 1<<i );
total_errors++;
}
else{
log_info( " %s%d test passed in-place readback\n", type, 1<<i );
}
// cleanup
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
}
} // mem flag
return total_errors;
} // end test_buffer_read()
int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
{
cl_mem buffers[5];
cl_program program[5];
cl_kernel kernel[5];
cl_event event;
void *outptr[5];
void *inptr[5];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t lastIndex;
size_t ptrSizes[5];
int src_flag_id;
int total_errors = 0;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
//skip devices that don't support long
if (! gHasLong && strstr(type,"long") )
{
log_info( "Device does not support 64-bit integers. Skipping test.\n" );
return CL_SUCCESS;
}
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
for ( i = 0; i < loops; i++ ){
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
return -1;
}
memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
if ( ! inptr[i] ){
log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
return -1;
}
memset( inptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
else
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed\n" );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i]);
if ( err ){
log_error( " Error creating program for %s\n", type );
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArg failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &event );
#ifdef CHECK_FOR_NON_WAIT
if ( ((uchar *)outptr[i])[lastIndex] ){
log_error( " clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
log_error( " Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
}
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = clWaitForEvents(1, &event );
if ( err != CL_SUCCESS ){
print_error( err, "clWaitForEvents() failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
if ( fn(outptr[i], num_elements*(1<<i)) ){
log_error( " %s%d test failed\n", type, 1<<i );
total_errors++;
}
else{
log_info( " %s%d test passed\n", type, 1<<i );
}
// cleanup
clReleaseEvent( event );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
}
} // mem flags
return total_errors;
} // end test_buffer_read_array_async()
int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
{
cl_mem buffers[5];
cl_program program[5];
cl_kernel kernel[5];
cl_event event;
void *outptr[5], *inptr[5];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i;
size_t lastIndex;
size_t ptrSizes[5];
int src_flag_id;
int total_errors = 0;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
ptrSizes[0] = size;
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
ptrSizes[3] = ptrSizes[2] << 1;
ptrSizes[4] = ptrSizes[3] << 1;
//skip devices that don't support long
if (! gHasLong && strstr(type,"long") )
{
log_info( "Device does not support 64-bit integers. Skipping test.\n" );
return CL_SUCCESS;
}
for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
for ( i = 0; i < loops; i++ ){
outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
return -1;
}
memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
if ( ! inptr[i] ){
log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
return -1;
}
memset( inptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
else
buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed\n" );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
if ( err ){
log_error( " Error creating program for %s\n", type );
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArgs failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, (void *)(outptr[i]), 0, NULL, &event );
#ifdef CHECK_FOR_NON_WAIT
if ( ((uchar *)outptr[i])[lastIndex] ){
log_error( " clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
log_error( " Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
}
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueBarrierWithWaitList() failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
return -1;
}
err = clWaitForEvents(1, &event);
if ( err != CL_SUCCESS ){
print_error( err, "clWaitForEvents() failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
return -1;
}
if ( fn(outptr[i], num_elements*(1<<i)) ){
log_error(" %s%d test failed\n", type, 1<<i);
total_errors++;
}
else{
log_info(" %s%d test passed\n", type, 1<<i);
}
// cleanup
clReleaseEvent( event );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
align_free( inptr[i] );
}
} // cl_mem flags
return total_errors;
} // end test_buffer_read_array_barrier()
#define DECLARE_READ_TEST(type, realType) \
int test_buffer_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
{ \
return test_buffer_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
}
DECLARE_READ_TEST(int, cl_int)
DECLARE_READ_TEST(uint, cl_uint)
DECLARE_READ_TEST(long, cl_long)
DECLARE_READ_TEST(ulong, cl_ulong)
DECLARE_READ_TEST(short, cl_short)
DECLARE_READ_TEST(ushort, cl_ushort)
DECLARE_READ_TEST(float, cl_float)
DECLARE_READ_TEST(char, cl_char)
DECLARE_READ_TEST(uchar, cl_uchar)
int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
}
#define DECLARE_ASYNC_TEST(type, realType) \
int test_buffer_read_async_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
{ \
return test_buffer_read_async( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
}
DECLARE_ASYNC_TEST(char, cl_char)
DECLARE_ASYNC_TEST(uchar, cl_uchar)
DECLARE_ASYNC_TEST(short, cl_short)
DECLARE_ASYNC_TEST(ushort, cl_ushort)
DECLARE_ASYNC_TEST(int, cl_int)
DECLARE_ASYNC_TEST(uint, cl_uint)
DECLARE_ASYNC_TEST(long, cl_long)
DECLARE_ASYNC_TEST(ulong, cl_ulong)
DECLARE_ASYNC_TEST(float, cl_float)
#define DECLARE_BARRIER_TEST(type, realType) \
int test_buffer_read_array_barrier_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
{ \
return test_buffer_read_array_barrier( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
}
DECLARE_BARRIER_TEST(int, cl_int)
DECLARE_BARRIER_TEST(uint, cl_uint)
DECLARE_BARRIER_TEST(long, cl_long)
DECLARE_BARRIER_TEST(ulong, cl_ulong)
DECLARE_BARRIER_TEST(short, cl_short)
DECLARE_BARRIER_TEST(ushort, cl_ushort)
DECLARE_BARRIER_TEST(char, cl_char)
DECLARE_BARRIER_TEST(uchar, cl_uchar)
DECLARE_BARRIER_TEST(float, cl_float)
/*
int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
{
cl_mem buffers[1];
float *outptr;
cl_program program[1];
cl_kernel kernel[1];
void *values[1];
size_t sizes[1] = { sizeof(cl_buffer) };
uint threads[1];
int err;
int i;
size_t ptrSize; // sizeof(half)
ptrSize = sizeof(cl_float)/2;
outptr = (float *)malloc(ptrSize * num_elements);
buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSize * num_elements, NULL);
if( !buffers[0] ){
log_error("clCreateBuffer failed\n");
return -1;
}
err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
if( err ){
log_error( " Error creating program for half\n" );
clReleaseMemObject(buffers[0]);
free( (void *)outptr );
return -1;
}
values[0] = buffers[0];
err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
if( err != CL_SUCCESS ){
log_error("clSetKernelArgs failed\n");
return -1;
}
global_work_size[0] = (cl_uint)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
if( err != CL_SUCCESS ){
log_error("clEnqueueReadBuffer failed: %d\n", err);
return -1;
}
if( verify_read_half( outptr, num_elements >> 1 ) ){
log_error( "buffer_READ half test failed\n" );
err = -1;
}
else{
log_info( "buffer_READ half test passed\n" );
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
free( (void *)outptr );
return err;
} // end test_buffer_half_read()
*/
int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem buffers[1];
TestStruct *output_ptr;
cl_program program[1];
cl_kernel kernel[1];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
size_t objSize = sizeof(TestStruct);
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment);
if ( ! output_ptr ){
log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
return -1;
}
buffers[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), objSize * num_elements, NULL , &err);
if ( err != CL_SUCCESS ){
print_error( err, " clCreateBuffer failed\n" );
align_free( output_ptr );
return -1;
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" );
if ( err ){
clReleaseProgram( program[0] );
align_free( output_ptr );
return -1;
}
err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
if ( err != CL_SUCCESS){
print_error( err, "clSetKernelArg failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( output_ptr );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( output_ptr );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
if ( err != CL_SUCCESS){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( output_ptr );
return -1;
}
if (verify_read_struct(output_ptr, num_elements)){
log_error(" struct test failed\n");
err = -1;
}
else{
log_info(" struct test passed\n");
err = 0;
}
// cleanup
clReleaseMemObject( buffers[0] );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
align_free( output_ptr );
return err;
}
static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead )
{
cl_mem buffers[3];
int *outptr[3];
cl_program program[3];
cl_kernel kernel[3];
size_t global_work_size[3];
#ifdef USE_LOCAL_WORK_GROUP
size_t local_work_size[3];
#endif
cl_int err;
int i, j;
size_t ptrSizes[3]; // sizeof(int), sizeof(int2), sizeof(int4)
int total_errors = 0;
size_t min_alignment = get_min_alignment(context);
global_work_size[0] = (cl_uint)num_elements;
ptrSizes[0] = sizeof(cl_int);
ptrSizes[1] = ptrSizes[0] << 1;
ptrSizes[2] = ptrSizes[1] << 1;
for ( i = 0; i < 3; i++ ){
outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment);
if ( ! outptr[i] ){
log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i );
for ( j = 0; j < i; j++ ){
clReleaseMemObject( buffers[j] );
align_free( outptr[j] );
}
return -1;
}
buffers[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL, &err);
if ( err != CL_SUCCESS ){
print_error(err, " clCreateBuffer failed\n" );
for ( j = 0; j < i; j++ ){
clReleaseMemObject( buffers[j] );
align_free( outptr[j] );
}
align_free( outptr[i] );
return -1;
}
}
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" );
if ( err ){
log_error( " Error creating program for int\n" );
for ( i = 0; i < 3; i++ ){
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
}
return -1;
}
err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" );
if ( err ){
log_error( " Error creating program for int2\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
for ( i = 0; i < 3; i++ ){
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
}
return -1;
}
err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" );
if ( err ){
log_error( " Error creating program for int4\n" );
clReleaseKernel( kernel[0] );
clReleaseProgram( program[0] );
clReleaseKernel( kernel[1] );
clReleaseProgram( program[1] );
for ( i = 0; i < 3; i++ ){
clReleaseMemObject( buffers[i] );
align_free( outptr[i] );
}
return -1;
}
for (i=0; i<3; i++){
err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
if ( err != CL_SUCCESS ){
print_error( err, "clSetKernelArgs failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
return -1;
}
#ifdef USE_LOCAL_WORK_GROUP
err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
test_error( err, "Unable to get work group size to use" );
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
#else
err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
#endif
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueNDRangeKernel failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
return -1;
}
err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL );
if ( err != CL_SUCCESS ){
print_error( err, "clEnqueueReadBuffer failed" );
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
return -1;
}
if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<<i) ) ){
log_error(" random size from %d, size: %d test failed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
total_errors++;
}
else{
log_info(" random size from %d, size: %d test passed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
}
// cleanup
clReleaseMemObject( buffers[i] );
clReleaseKernel( kernel[i] );
clReleaseProgram( program[i] );
align_free( outptr[i] );
}
return total_errors;
} // end testRandomReadSize()
int test_buffer_read_random_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int err = 0;
int i;
cl_uint start;
size_t size;
MTdata d = init_genrand( gRandomSeed );
// now test for random sizes of array being read
for ( i = 0; i < 8; i++ ){
start = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
size = (size_t)get_random_float( 8.f, (float)(num_elements - start), d );
if (testRandomReadSize( deviceID, context, queue, num_elements, start, size ))
err++;
}
free_mtdata(d);
return err;
}