// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "harness/compat.h" #include #include #include #include #include #include #include "procs.h" //#define HK_DO_NOT_RUN_SHORT_ASYNC 1 //#define HK_DO_NOT_RUN_USHORT_ASYNC 1 //#define HK_DO_NOT_RUN_CHAR_ASYNC 1 //#define HK_DO_NOT_RUN_UCHAR_ASYNC 1 #define TEST_PRIME_INT ((1<<16)+1) #define TEST_PRIME_UINT ((1U<<16)+1U) #define TEST_PRIME_LONG ((1LL<<32)+1LL) #define TEST_PRIME_ULONG ((1ULL<<32)+1ULL) #define TEST_PRIME_SHORT ((1S<<8)+1S) #define TEST_PRIME_FLOAT (float)3.40282346638528860e+38 #define TEST_PRIME_HALF 119.f #define TEST_BOOL true #define TEST_PRIME_CHAR 0x77 #ifndef ulong typedef unsigned long ulong; #endif #ifndef uchar typedef unsigned char uchar; #endif #ifndef TestStruct typedef struct{ int a; float b; } TestStruct; #endif //--- the code for the kernel executables static const char *buffer_read_int_kernel_code[] = { "__kernel void test_buffer_read_int(__global int *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1<<16)+1);\n" "}\n", "__kernel void test_buffer_read_int2(__global int2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1<<16)+1);\n" "}\n", "__kernel void test_buffer_read_int4(__global int4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1<<16)+1);\n" "}\n", "__kernel void test_buffer_read_int8(__global int8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1<<16)+1);\n" "}\n", "__kernel void test_buffer_read_int16(__global int16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1<<16)+1);\n" "}\n" }; static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" }; static const char *buffer_read_uint_kernel_code[] = { "__kernel void test_buffer_read_uint(__global uint *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1U<<16)+1U);\n" "}\n", "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1U<<16)+1U);\n" "}\n", "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1U<<16)+1U);\n" "}\n", "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1U<<16)+1U);\n" "}\n", "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1U<<16)+1U);\n" "}\n" }; static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" }; static const char *buffer_read_long_kernel_code[] = { "__kernel void test_buffer_read_long(__global long *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1L<<32)+1L);\n" "}\n", "__kernel void test_buffer_read_long2(__global long2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1L<<32)+1L);\n" "}\n", "__kernel void test_buffer_read_long4(__global long4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1L<<32)+1L);\n" "}\n", "__kernel void test_buffer_read_long8(__global long8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1L<<32)+1L);\n" "}\n", "__kernel void test_buffer_read_long16(__global long16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1L<<32)+1L);\n" "}\n" }; static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" }; static const char *buffer_read_ulong_kernel_code[] = { "__kernel void test_buffer_read_ulong(__global ulong *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1UL<<32)+1UL);\n" "}\n", "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1UL<<32)+1UL);\n" "}\n", "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1UL<<32)+1UL);\n" "}\n", "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1UL<<32)+1UL);\n" "}\n", "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = ((1UL<<32)+1UL);\n" "}\n" }; static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" }; static const char *buffer_read_short_kernel_code[] = { "__kernel void test_buffer_read_short(__global short *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (short)((1<<8)+1);\n" "}\n", "__kernel void test_buffer_read_short2(__global short2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (short)((1<<8)+1);\n" "}\n", "__kernel void test_buffer_read_short4(__global short4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (short)((1<<8)+1);\n" "}\n", "__kernel void test_buffer_read_short8(__global short8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (short)((1<<8)+1);\n" "}\n", "__kernel void test_buffer_read_short16(__global short16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (short)((1<<8)+1);\n" "}\n" }; static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" }; static const char *buffer_read_ushort_kernel_code[] = { "__kernel void test_buffer_read_ushort(__global ushort *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (ushort)((1<<8)+1);\n" "}\n", "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (ushort)((1<<8)+1);\n" "}\n", "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (ushort)((1<<8)+1);\n" "}\n", "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (ushort)((1<<8)+1);\n" "}\n", "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (ushort)((1<<8)+1);\n" "}\n" }; static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" }; static const char *buffer_read_float_kernel_code[] = { "__kernel void test_buffer_read_float(__global float *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (float)3.40282346638528860e+38;\n" "}\n", "__kernel void test_buffer_read_float2(__global float2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (float)3.40282346638528860e+38;\n" "}\n", "__kernel void test_buffer_read_float4(__global float4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (float)3.40282346638528860e+38;\n" "}\n", "__kernel void test_buffer_read_float8(__global float8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (float)3.40282346638528860e+38;\n" "}\n", "__kernel void test_buffer_read_float16(__global float16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (float)3.40282346638528860e+38;\n" "}\n" }; static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" }; static const char *buffer_read_half_kernel_code[] = { "__kernel void test_buffer_read_half(__global half *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (half)119;\n" "}\n", "__kernel void test_buffer_read_half2(__global half2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (half)119;\n" "}\n", "__kernel void test_buffer_read_half4(__global half4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (half)119;\n" "}\n", "__kernel void test_buffer_read_half8(__global half8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (half)119;\n" "}\n", "__kernel void test_buffer_read_half16(__global half16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (half)119;\n" "}\n" }; static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" }; static const char *buffer_read_char_kernel_code[] = { "__kernel void test_buffer_read_char(__global char *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (char)'w';\n" "}\n", "__kernel void test_buffer_read_char2(__global char2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (char)'w';\n" "}\n", "__kernel void test_buffer_read_char4(__global char4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (char)'w';\n" "}\n", "__kernel void test_buffer_read_char8(__global char8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (char)'w';\n" "}\n", "__kernel void test_buffer_read_char16(__global char16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (char)'w';\n" "}\n" }; static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" }; static const char *buffer_read_uchar_kernel_code[] = { "__kernel void test_buffer_read_uchar(__global uchar *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = 'w';\n" "}\n", "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (uchar)'w';\n" "}\n", "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (uchar)'w';\n" "}\n", "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (uchar)'w';\n" "}\n", "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = (uchar)'w';\n" "}\n" }; static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" }; static const char *buffer_read_struct_kernel_code = "typedef struct{\n" "int a;\n" "float b;\n" "} TestStruct;\n" "__kernel void test_buffer_read_struct(__global TestStruct *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid].a = ((1<<16)+1);\n" " dst[tid].b = (float)3.40282346638528860e+38;\n" "}\n"; //--- the verify functions static int verify_read_int(void *ptr, int n) { int i; cl_int *outptr = (cl_int *)ptr; for (i=0; i> 1 ) ){ log_error( "buffer_READ half test failed\n" ); err = -1; } else{ log_info( "buffer_READ half test passed\n" ); err = 0; } // cleanup clReleaseMemObject( buffers[0] ); clReleaseKernel( kernel[0] ); clReleaseProgram( program[0] ); free( (void *)outptr ); return err; } // end test_buffer_half_read() */ int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { cl_mem buffers[1]; TestStruct *output_ptr; cl_program program[1]; cl_kernel kernel[1]; size_t global_work_size[3]; #ifdef USE_LOCAL_WORK_GROUP size_t local_work_size[3]; #endif cl_int err; size_t objSize = sizeof(TestStruct); size_t min_alignment = get_min_alignment(context); global_work_size[0] = (cl_uint)num_elements; output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment); if ( ! output_ptr ){ log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) ); return -1; } buffers[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), objSize * num_elements, NULL , &err); if ( err != CL_SUCCESS ){ print_error( err, " clCreateBuffer failed\n" ); align_free( output_ptr ); return -1; } err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" ); if ( err ){ clReleaseProgram( program[0] ); align_free( output_ptr ); return -1; } err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] ); if ( err != CL_SUCCESS){ print_error( err, "clSetKernelArg failed" ); clReleaseMemObject( buffers[0] ); clReleaseKernel( kernel[0] ); clReleaseProgram( program[0] ); align_free( output_ptr ); return -1; } #ifdef USE_LOCAL_WORK_GROUP err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] ); test_error( err, "Unable to get work group size to use" ); err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL ); #else err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL ); #endif if ( err != CL_SUCCESS ){ print_error( err, "clEnqueueNDRangeKernel failed" ); clReleaseMemObject( buffers[0] ); clReleaseKernel( kernel[0] ); clReleaseProgram( program[0] ); align_free( output_ptr ); return -1; } err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL ); if ( err != CL_SUCCESS){ print_error( err, "clEnqueueReadBuffer failed" ); clReleaseMemObject( buffers[0] ); clReleaseKernel( kernel[0] ); clReleaseProgram( program[0] ); align_free( output_ptr ); return -1; } if (verify_read_struct(output_ptr, num_elements)){ log_error(" struct test failed\n"); err = -1; } else{ log_info(" struct test passed\n"); err = 0; } // cleanup clReleaseMemObject( buffers[0] ); clReleaseKernel( kernel[0] ); clReleaseProgram( program[0] ); align_free( output_ptr ); return err; } static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead ) { cl_mem buffers[3]; int *outptr[3]; cl_program program[3]; cl_kernel kernel[3]; size_t global_work_size[3]; #ifdef USE_LOCAL_WORK_GROUP size_t local_work_size[3]; #endif cl_int err; int i, j; size_t ptrSizes[3]; // sizeof(int), sizeof(int2), sizeof(int4) int total_errors = 0; size_t min_alignment = get_min_alignment(context); global_work_size[0] = (cl_uint)num_elements; ptrSizes[0] = sizeof(cl_int); ptrSizes[1] = ptrSizes[0] << 1; ptrSizes[2] = ptrSizes[1] << 1; for ( i = 0; i < 3; i++ ){ outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment); if ( ! outptr[i] ){ log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i ); for ( j = 0; j < i; j++ ){ clReleaseMemObject( buffers[j] ); align_free( outptr[j] ); } return -1; } buffers[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL, &err); if ( err != CL_SUCCESS ){ print_error(err, " clCreateBuffer failed\n" ); for ( j = 0; j < i; j++ ){ clReleaseMemObject( buffers[j] ); align_free( outptr[j] ); } align_free( outptr[i] ); return -1; } } err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" ); if ( err ){ log_error( " Error creating program for int\n" ); for ( i = 0; i < 3; i++ ){ clReleaseMemObject( buffers[i] ); align_free( outptr[i] ); } return -1; } err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" ); if ( err ){ log_error( " Error creating program for int2\n" ); clReleaseKernel( kernel[0] ); clReleaseProgram( program[0] ); for ( i = 0; i < 3; i++ ){ clReleaseMemObject( buffers[i] ); align_free( outptr[i] ); } return -1; } err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" ); if ( err ){ log_error( " Error creating program for int4\n" ); clReleaseKernel( kernel[0] ); clReleaseProgram( program[0] ); clReleaseKernel( kernel[1] ); clReleaseProgram( program[1] ); for ( i = 0; i < 3; i++ ){ clReleaseMemObject( buffers[i] ); align_free( outptr[i] ); } return -1; } for (i=0; i<3; i++){ err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] ); if ( err != CL_SUCCESS ){ print_error( err, "clSetKernelArgs failed" ); clReleaseMemObject( buffers[i] ); clReleaseKernel( kernel[i] ); clReleaseProgram( program[i] ); align_free( outptr[i] ); return -1; } #ifdef USE_LOCAL_WORK_GROUP err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] ); test_error( err, "Unable to get work group size to use" ); err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL ); #else err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL ); #endif if ( err != CL_SUCCESS ){ print_error( err, "clEnqueueNDRangeKernel failed" ); clReleaseMemObject( buffers[i] ); clReleaseKernel( kernel[i] ); clReleaseProgram( program[i] ); align_free( outptr[i] ); return -1; } err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL ); if ( err != CL_SUCCESS ){ print_error( err, "clEnqueueReadBuffer failed" ); clReleaseMemObject( buffers[i] ); clReleaseKernel( kernel[i] ); clReleaseProgram( program[i] ); align_free( outptr[i] ); return -1; } if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<