diff --git a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt index 9b17bab2..f6dc173f 100644 --- a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt @@ -4,10 +4,8 @@ set(${MODULE_NAME}_SOURCES main.c test_readimage.c test_writeimage.c - test_async_copy.cpp test_sizeof.c test_bufferreadwriterect.c - test_async_strided_copy.cpp ) set(${MODULE_NAME}_LIBS harness-compat) diff --git a/test_conformance/compatibility/test_conformance/basic/main.c b/test_conformance/compatibility/test_conformance/basic/main.c index 43847fd7..38646b83 100644 --- a/test_conformance/compatibility/test_conformance/basic/main.c +++ b/test_conformance/compatibility/test_conformance/basic/main.c @@ -36,12 +36,6 @@ test_definition test_list[] = { ADD_TEST( writeimage ), ADD_TEST( bufferreadwriterect ), - - ADD_TEST( async_copy_global_to_local ), - ADD_TEST( async_copy_local_to_global ), - ADD_TEST( async_strided_copy_global_to_local ), - ADD_TEST( async_strided_copy_local_to_global ), - ADD_TEST( prefetch ), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/compatibility/test_conformance/basic/test_async_copy.cpp b/test_conformance/compatibility/test_conformance/basic/test_async_copy.cpp deleted file mode 100644 index c3ac7bb6..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_async_copy.cpp +++ /dev/null @@ -1,276 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - - -#include "procs.h" -#include "harness/conversions.h" - -static const char *async_global_to_local_kernel = -"%s\n" // optional pragma string -"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n" -"{\n" -" int i;\n" -// Zero the local storage first -" for(i=0; i max_local_workgroup_size[0]) - max_workgroup_size = max_local_workgroup_size[0]; - - size_t numberOfCopiesPerWorkitem = 13; - size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize; - size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem); - - // Calculation can return 0 on embedded devices due to 1KB local mem limit - if(maxLocalWorkgroupSize == 0) - { - maxLocalWorkgroupSize = 1; - } - - size_t localWorkgroupSize = maxLocalWorkgroupSize; - if (maxLocalWorkgroupSize > max_workgroup_size) - localWorkgroupSize = max_workgroup_size; - - size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem; - size_t numberOfLocalWorkgroups = 1111; - size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize; - size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize; - - inBuffer = (void*)malloc(globalBufferSize); - outBuffer = (void*)malloc(globalBufferSize); - memset(outBuffer, 0, globalBufferSize); - - cl_int copiesPerWorkItemInt, copiesPerWorkgroup; - copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem; - copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize); - - log_info("Global: %d, local %d, local buffer %db, global buffer %db, each work group will copy %d elements and each work item item will copy %d elements.\n", - (int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, copiesPerWorkgroup, copiesPerWorkItemInt); - - threads[0] = globalWorkgroupSize; - localThreads[0] = localWorkgroupSize; - - d = init_genrand( gRandomSeed ); - generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer ); - free_mtdata(d); d = NULL; - - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error ); - test_error( error, "Unable to create input buffer" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error ); - test_error( error, "Unable to create output buffer" ); - - error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 2, localBufferSize, NULL ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt ); - test_error( error, "Unable to set kernel argument" ); - - // Enqueue - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Unable to queue kernel" ); - - // Read - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - // Verify - if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 ) - { - log_error( "ERROR: Results of copy did not validate!\n" ); - unsigned char * inchar = (unsigned char*)inBuffer; - unsigned char * outchar = (unsigned char*)outBuffer; - int failuresPrinted = 0; - for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) { - int failed = 0; - for (int j=0; j<(int)elementSize; j++) - if (inchar[i+j] != outchar[i+j]) - failed = 1; - char values[4096]; - values[0] = 0; - if (failed) { - sprintf(values + strlen( values), "%d -> [", i); - for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", inchar[i+j]); - sprintf(values + strlen(values), "] != ["); - for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", outchar[i+j]); - sprintf(values + strlen(values), "]"); - log_error("%s\n", values); - failuresPrinted++; - } - if (failuresPrinted > 5) { - log_error("Not printing further failures...\n"); - break; - } - } - return -1; - } - - free(inBuffer); - free(outBuffer); - - return 0; -} - -int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; - unsigned int size, typeIndex; - - int errors = 0; - - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) - { - if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) ) - continue; - - if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong ) - continue; - - for( size = 0; vecSizes[ size ] != 0; size++ ) - { - if (test_copy( deviceID, context, queue, kernelCode, vecType[typeIndex],vecSizes[size] )) { - errors++; - } - } - } - if (errors) - return -1; - return 0; -} - - - - -int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel ); -} - -int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_copy_all_types( deviceID, context, queue, async_local_to_global_kernel ); -} - -int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_copy_all_types( deviceID, context, queue, prefetch_kernel ); -} - diff --git a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp deleted file mode 100644 index 667c7539..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp +++ /dev/null @@ -1,267 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - - -#include "procs.h" -#include "harness/conversions.h" - -static const char *async_strided_global_to_local_kernel = -"%s\n" // optional pragma string -"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" -"{\n" -" int i;\n" -// Zero the local storage first -" for(i=0; i max_local_workgroup_size[0]) - max_workgroup_size = max_local_workgroup_size[0]; - - cl_ulong max_global_mem_size; - error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_mem_size), &max_global_mem_size, NULL); - test_error (error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE"); - - cl_bool unified_mem; - error = clGetDeviceInfo(deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unified_mem), &unified_mem, NULL); - test_error (error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY"); - - int number_of_global_mem_buffers = (unified_mem) ? 4 : 2; - - size_t numberOfCopiesPerWorkitem = 3; - size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize; - size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem); - - size_t localWorkgroupSize = maxLocalWorkgroupSize; - if (maxLocalWorkgroupSize > max_workgroup_size) - localWorkgroupSize = max_workgroup_size; - - size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem; - size_t numberOfLocalWorkgroups = 579;//1111; - - // Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed - // by the allocated buffer. This is done to avoid resource errors resulting from address space fragmentation. - size_t numberOfLocalWorkgroupsLimit = max_global_mem_size / (2 * number_of_global_mem_buffers * localBufferSize * stride); - if (numberOfLocalWorkgroups > numberOfLocalWorkgroupsLimit) numberOfLocalWorkgroups = numberOfLocalWorkgroupsLimit; - - size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride; - size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize; - - inBuffer = (void*)malloc(globalBufferSize); - outBuffer = (void*)malloc(globalBufferSize); - memset(outBuffer, 0, globalBufferSize); - - cl_int copiesPerWorkItemInt, copiesPerWorkgroup; - copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem; - copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize); - - log_info("Global: %d, local %d, local buffer %db, global buffer %db, copy stride %d, each work group will copy %d elements and each work item item will copy %d elements.\n", - (int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, (int)stride, copiesPerWorkgroup, copiesPerWorkItemInt); - - threads[0] = globalWorkgroupSize; - localThreads[0] = localWorkgroupSize; - - d = init_genrand( gRandomSeed ); - generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer ); - free_mtdata(d); d = NULL; - - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error ); - test_error( error, "Unable to create input buffer" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error ); - test_error( error, "Unable to create output buffer" ); - - error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 2, localBufferSize, NULL ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 5, sizeof(stride), &stride ); - test_error( error, "Unable to set kernel argument" ); - - // Enqueue - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Unable to queue kernel" ); - - // Read - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - // Verify - for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride) - { - if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, elementSize) != 0 ) - { - unsigned char * inchar = (unsigned char*)inBuffer + i; - unsigned char * outchar = (unsigned char*)outBuffer + i; - char values[4096]; - values[0] = 0; - - log_error( "ERROR: Results of copy did not validate!\n" ); - sprintf(values + strlen( values), "%d -> [", i); - for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", inchar[j]); - sprintf(values + strlen(values), "] != ["); - for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", outchar[j]); - sprintf(values + strlen(values), "]"); - log_error("%s\n", values); - - return -1; - } - } - - free(inBuffer); - free(outBuffer); - - return 0; -} - -int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) -{ - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 }; - unsigned int strideSizes[] = { 1, 3, 4, 5, 0 }; - unsigned int size, typeIndex, stride; - - int errors = 0; - - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) - { - if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) ) - continue; - - if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong ) - continue; - - for( size = 0; vecSizes[ size ] != 0; size++ ) - { - for( stride = 0; strideSizes[ stride ] != 0; stride++) - { - if (test_strided_copy( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], strideSizes[stride] )) - { - errors++; - } - } - } - } - if (errors) - return -1; - return 0; -} - - - - -int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel ); -} - -int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel ); -} -