diff --git a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt index 8e429fbd..e52d309d 100644 --- a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt +++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt @@ -2,16 +2,9 @@ set(MODULE_NAME COMPATIBILITY_API) set(${MODULE_NAME}_SOURCES main.c - test_retain.cpp - test_retain_program.c test_queries.cpp - test_create_kernels.c - test_kernels.c test_api_min_max.c - test_binary.cpp - test_create_context_from_type.cpp test_mem_object_info.cpp - test_null_buffer_arg.c test_kernel_arg_info.c test_queue_properties.cpp ) diff --git a/test_conformance/compatibility/test_conformance/api/main.c b/test_conformance/compatibility/test_conformance/api/main.c index 0e48433d..25a015b4 100644 --- a/test_conformance/compatibility/test_conformance/api/main.c +++ b/test_conformance/compatibility/test_conformance/api/main.c @@ -36,27 +36,9 @@ test_definition test_list[] = { ADD_TEST( get_command_queue_info ), ADD_TEST( get_context_info ), ADD_TEST( get_device_info ), - ADD_TEST( enqueue_task ), - ADD_TEST( binary_get ), - ADD_TEST( binary_create ), ADD_TEST( kernel_required_group_size ), - ADD_TEST( release_kernel_order ), - ADD_TEST( release_during_execute ), - - ADD_TEST( load_single_kernel ), - ADD_TEST( load_two_kernels ), - ADD_TEST( load_two_kernels_in_one ), - ADD_TEST( load_two_kernels_manually ), - ADD_TEST( get_program_info_kernel_names ), ADD_TEST( get_kernel_arg_info ), - ADD_TEST( create_kernels_in_program ), - ADD_TEST( get_kernel_info ), - ADD_TEST( execute_kernel_local_sizes ), - ADD_TEST( set_kernel_arg_by_index ), - ADD_TEST( set_kernel_arg_constant ), - ADD_TEST( set_kernel_arg_struct_array ), - ADD_TEST( kernel_global_constant ), ADD_TEST( min_max_thread_dimensions ), ADD_TEST( min_max_work_items_sizes ), @@ -86,16 +68,6 @@ test_definition test_list[] = { ADD_TEST( min_max_device_version ), ADD_TEST( min_max_language_version ), - ADD_TEST( create_context_from_type ), - - ADD_TEST( repeated_setup_cleanup ), - - ADD_TEST( retain_queue_single ), - ADD_TEST( retain_queue_multiple ), - ADD_TEST( retain_mem_object_single ), - ADD_TEST( retain_mem_object_multiple ), - - ADD_TEST( null_buffer_arg ), ADD_TEST( get_buffer_info ), ADD_TEST( get_image2d_info ), ADD_TEST( get_image3d_info ), diff --git a/test_conformance/compatibility/test_conformance/api/test_binary.cpp b/test_conformance/compatibility/test_conformance/api/test_binary.cpp deleted file mode 100644 index 41431252..00000000 --- a/test_conformance/compatibility/test_conformance/api/test_binary.cpp +++ /dev/null @@ -1,226 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "testBase.h" - -static const char *sample_binary_kernel_source[] = { -"__kernel void sample_test(__global float *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (int)src[tid] + 1;\n" -"\n" -"}\n" }; - - -int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - size_t binarySize; - - - program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error ); - test_error( error, "Unable to create program from source" ); - - // Build so we have a binary to get - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build test program" ); - - // Get the size of the resulting binary (only one device) - error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); - - // Sanity check - if( binarySize == 0 ) - { - log_error( "ERROR: Binary size of program is zero\n" ); - return -1; - } - - // Create a buffer and get the actual binary - unsigned char *binary; - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - unsigned char *buffers[ 1 ] = { binary }; - - // Do another sanity check here first - size_t size; - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) - { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size ); - free(binary); - return -1; - } - - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); - - // No way to verify the binary is correct, so just be good with that - free(binary); - return 0; -} - - -int test_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - /* To test this in a self-contained fashion, we have to create a program with - source, then get the binary, then use that binary to reload the program, and then verify */ - - int error; - clProgramWrapper program, program_from_binary; - size_t binarySize; - - - program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error ); - test_error( error, "Unable to create program from source" ); - - // Build so we have a binary to get - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build test program" ); - - // Get the size of the resulting binary (only one device) - error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); - - // Sanity check - if( binarySize == 0 ) - { - log_error( "ERROR: Binary size of program is zero\n" ); - return -1; - } - - // Create a buffer and get the actual binary - unsigned char *binary = (unsigned char*)malloc(binarySize); - const unsigned char *buffers[ 1 ] = { binary }; - - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); - - cl_int loadErrors[ 1 ]; - program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error ); - test_error( error, "Unable to load valid program binary" ); - test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" ); - - error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build binary program" ); - - // Get the size of the binary built from the first binary - size_t binary2Size; - error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL ); - test_error( error, "Unable to get size for the binary program" ); - - // Now get the binary one more time and verify it loaded the right binary - unsigned char *binary2 = (unsigned char*)malloc(binary2Size); - buffers[ 0 ] = binary2; - error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary second time" ); - - // Try again, this time without passing the status ptr in, to make sure we still - // get a valid binary - clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error ); - test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" ); - - error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build binary program created without binary_status" ); - - // Get the size of the binary created without passing binary_status - size_t binary3Size; - error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL ); - test_error( error, "Unable to get size for the binary program created without binary_status" ); - - // Now get the binary one more time - unsigned char *binary3 = (unsigned char*)malloc(binary3Size); - buffers[ 0 ] = binary3; - error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary from the program created without binary_status" ); - - // We no longer need these intermediate binaries - free(binary); - free(binary2); - free(binary3); - - // Now execute them both to see that they both do the same thing. - clMemWrapper in, out, out_binary; - clKernelWrapper kernel, kernel_binary; - cl_int *out_data, *out_data_binary; - cl_float *in_data; - size_t size_to_run = 1000; - - // Allocate some data - in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run); - out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run); - out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run); - memset(out_data, 0, sizeof(cl_int)*size_to_run); - memset(out_data_binary, 0, sizeof(cl_int)*size_to_run); - for (size_t i=0; i -#endif - -#include "harness/testHarness.h" -#include "harness/conversions.h" - -extern cl_uint gRandomSeed; - -int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - clContextWrapper context_to_test; - clCommandQueueWrapper queue_to_test; - size_t threads[1], localThreads[1]; - cl_float inputData[10]; - cl_int outputData[10]; - int i; - RandomSeed seed( gRandomSeed ); - - const char *sample_single_test_kernel[] = { - "__kernel void sample_test(__global float *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = (int)src[tid];\n" - "\n" - "}\n" }; - - cl_device_type type; - error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL); - test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n"); - - cl_platform_id platform; - error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); - test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n"); - - cl_context_properties properties[3] = { - (cl_context_properties)CL_CONTEXT_PLATFORM, - (cl_context_properties)platform, - NULL - }; - - context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error); - test_error(error, "clCreateContextFromType failed"); - if (context_to_test == NULL) { - log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS."); - return -1; - } - - queue_to_test = clCreateCommandQueue(context_to_test, deviceID, NULL, &error); - test_error(error, "clCreateCommandQueue failed"); - if (queue_to_test == NULL) { - log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS."); - return -1; - } - - /* Create a kernel to test with */ - if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 ) - { - return -1; - } - - /* Create some I/O streams */ - streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - - /* Write some test data */ - memset( outputData, 0, sizeof( outputData ) ); - - for (i=0; i<10; i++) - inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed); - - error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL); - test_error( error, "Unable to set testing kernel data" ); - - /* Test setting the arguments by index manually */ - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - - - /* Test running the kernel and verifying it */ - threads[0] = (size_t)10; - - error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<10; i++) - { - if (outputData[i] != (int)inputData[i]) - { - log_error( "ERROR: Data did not verify on first pass!\n" ); - return -1; - } - } - - return 0; -} - - diff --git a/test_conformance/compatibility/test_conformance/api/test_create_kernels.c b/test_conformance/compatibility/test_conformance/api/test_create_kernels.c deleted file mode 100644 index f846d2e2..00000000 --- a/test_conformance/compatibility/test_conformance/api/test_create_kernels.c +++ /dev/null @@ -1,643 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "testBase.h" -#include "harness/testHarness.h" - - -const char *sample_single_kernel[] = { - "__kernel void sample_test(__global float *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = (int)src[tid];\n" - "\n" - "}\n" }; - -size_t sample_single_kernel_lengths[1]; - -const char *sample_two_kernels[] = { - "__kernel void sample_test(__global float *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = (int)src[tid];\n" - "\n" - "}\n", - "__kernel void sample_test2(__global int *src, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = (float)src[tid];\n" - "\n" - "}\n" }; - -size_t sample_two_kernel_lengths[2]; - -const char *sample_two_kernels_in_1[] = { - "__kernel void sample_test(__global float *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = (int)src[tid];\n" - "\n" - "}\n" - "__kernel void sample_test2(__global int *src, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = (float)src[tid];\n" - "\n" - "}\n" }; - -size_t sample_two_kernels_in_1_lengths[1]; - - -const char *repeate_test_kernel = -"__kernel void test_kernel(__global int *src, __global int *dst)\n" -"{\n" -" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n" -"}\n"; - - - -int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - cl_program testProgram; - clKernelWrapper kernel; - cl_context testContext; - unsigned int numKernels; - cl_char testName[512]; - cl_uint testArgCount; - size_t realSize; - - - /* Preprocess: calc the length of each source file line */ - sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] ); - - /* Create a program */ - program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error ); - if( program == NULL || error != CL_SUCCESS ) - { - print_error( error, "Unable to create single kernel program" ); - return -1; - } - - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build single kernel program" ); - error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels); - test_error( error, "Unable to create single kernel program" ); - - /* Check program and context pointers */ - error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize ); - test_error( error, "Unable to get kernel's program" ); - if( (cl_program)testProgram != (cl_program)program ) - { - log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program ); - return -1; - } - if( realSize != sizeof( cl_program ) ) - { - log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize ); - return -1; - } - - error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize ); - test_error( error, "Unable to get kernel's context" ); - if( (cl_context)testContext != (cl_context)context ) - { - log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context ); - return -1; - } - if( realSize != sizeof( cl_context ) ) - { - log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize ); - return -1; - } - - /* Test arg count */ - error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize ); - test_error( error, "Unable to get size of arg count info from kernel" ); - - if( realSize != sizeof( testArgCount ) ) - { - log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize ); - return -1; - } - - error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL ); - test_error( error, "Unable to get arg count from kernel" ); - - if( testArgCount != 2 ) - { - log_error( "ERROR: Kernel arg count does not match!\n" ); - return -1; - } - - - /* Test function name */ - error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize ); - test_error( error, "Unable to get name from kernel" ); - - if( strcmp( (char *)testName, "sample_test" ) != 0 ) - { - log_error( "ERROR: Kernel names do not match!\n" ); - return -1; - } - if( realSize != strlen( (char *)testName ) + 1 ) - { - log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize ); - return -1; - } - - /* All done */ - - return 0; -} - -int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel[2]; - unsigned int numKernels; - cl_char testName[ 512 ]; - cl_uint testArgCount; - - - /* Preprocess: calc the length of each source file line */ - sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] ); - sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] ); - - /* Now create a test program */ - program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error ); - if( program == NULL || error != CL_SUCCESS ) - { - print_error( error, "Unable to create dual kernel program!" ); - return -1; - } - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build dual kernel program" ); - error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels); - test_error( error, "Unable to create dual kernel program" ); - - if( numKernels != 2 ) - { - log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels ); - return -1; - } - - /* Check first kernel */ - error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL ); - test_error( error, "Unable to get function name from kernel" ); - - int found_kernel1 = 0, found_kernel2 = 0; - - if( strcmp( (char *)testName, "sample_test" ) == 0 ) { - found_kernel1 = 1; - } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) { - found_kernel2 = 1; - } else { - log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2"); - return -1; - } - - error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL ); - test_error( error, "Unable to get function name from second kernel" ); - - if( strcmp( (char *)testName, "sample_test" ) == 0 ) { - if (found_kernel1) { - log_error("Kernel \"%s\" returned twice.\n", (char *)testName); - return -1; - } - found_kernel1 = 1; - } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) { - if (found_kernel2) { - log_error("Kernel \"%s\" returned twice.\n", (char *)testName); - return -1; - } - found_kernel2 = 1; - } else { - log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2"); - return -1; - } - - if( !found_kernel1 || !found_kernel2 ) - { - log_error( "ERROR: Kernel names do not match.\n" ); - if (!found_kernel1) - log_error("Kernel \"%s\" not returned.\n", "sample_test"); - if (!found_kernel2) - log_error("Kernel \"%s\" not returned.\n", "sample_test"); - return -1; - } - - error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL ); - test_error( error, "Unable to get arg count from kernel" ); - - if( testArgCount != 2 ) - { - log_error( "ERROR: wrong # of args for kernel\n" ); - return -1; - } - - /* All done */ - return 0; -} - -int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel[2]; - unsigned int numKernels; - cl_char testName[512]; - cl_uint testArgCount; - - - /* Preprocess: calc the length of each source file line */ - sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] ); - - /* Now create a test program */ - program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error ); - if( program == NULL || error != CL_SUCCESS ) - { - print_error( error, "Unable to create dual kernel program" ); - return -1; - } - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build dual kernel program" ); - error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels); - test_error( error, "Unable to create dual kernel program" ); - - if( numKernels != 2 ) - { - log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels ); - return -1; - } - - /* Check first kernel */ - error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL ); - test_error( error, "Unable to get function name from kernel" ); - - int found_kernel1 = 0, found_kernel2 = 0; - - if( strcmp( (char *)testName, "sample_test" ) == 0 ) { - found_kernel1 = 1; - } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) { - found_kernel2 = 1; - } else { - log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2"); - return -1; - } - - error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL ); - test_error( error, "Unable to get arg count from kernel" ); - - if( testArgCount != 2 ) - { - log_error( "ERROR: wrong # of args for kernel\n" ); - return -1; - } - - /* Check second kernel */ - error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL ); - test_error( error, "Unable to get function name from kernel" ); - - if( strcmp( (char *)testName, "sample_test" ) == 0 ) { - if (found_kernel1) { - log_error("Kernel \"%s\" returned twice.\n", (char *)testName); - return -1; - } - found_kernel1 = 1; - } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) { - if (found_kernel2) { - log_error("Kernel \"%s\" returned twice.\n", (char *)testName); - return -1; - } - found_kernel2 = 1; - } else { - log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2"); - return -1; - } - - if( !found_kernel1 || !found_kernel2 ) - { - log_error( "ERROR: Kernel names do not match.\n" ); - if (!found_kernel1) - log_error("Kernel \"%s\" not returned.\n", "sample_test"); - if (!found_kernel2) - log_error("Kernel \"%s\" not returned.\n", "sample_test"); - return -1; - } - - /* All done */ - return 0; -} - -int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - clProgramWrapper program; - clKernelWrapper kernel1, kernel2; - int error; - - - /* Now create a test program */ - program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error ); - if( program == NULL || error != CL_SUCCESS ) - { - print_error( error, "Unable to create dual kernel program" ); - return -1; - } - - /* Compile the program */ - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build kernel program" ); - - /* Try manually creating kernels (backwards just in case) */ - kernel1 = clCreateKernel( program, "sample_test2", &error ); - - if( kernel1 == NULL || error != CL_SUCCESS ) - { - print_error( error, "Could not get kernel 1" ); - return -1; - } - - kernel2 = clCreateKernel( program, "sample_test", &error ); - - if( kernel2 == NULL ) - { - print_error( error, "Could not get kernel 2" ); - return -1; - } - - return 0; -} - -int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - clProgramWrapper program; - clKernelWrapper kernel1, kernel2; - int error; - size_t i; - - /* Now create a test program */ - program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error ); - if( program == NULL || error != CL_SUCCESS ) - { - print_error( error, "Unable to create dual kernel program" ); - return -1; - } - - /* Compile the program */ - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build kernel program" ); - - /* Lookup the number of kernels in the program. */ - size_t total_kernels = 0; - error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL); - test_error( error, "Unable to get program info num kernels"); - - if (total_kernels != 2) - { - print_error( error, "Program did not contain two kernels" ); - return -1; - } - - /* Lookup the kernel names. */ - const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ; - - size_t kernel_names_len = 0; - error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len); - test_error( error, "Unable to get length of kernel names list." ); - - if (kernel_names_len != (strlen(actual_names[0])+1)) - { - print_error( error, "Kernel names length did not match"); - return -1; - } - - const size_t len = (kernel_names_len+1)*sizeof(char); - char* kernel_names = (char*)malloc(len); - error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len); - test_error( error, "Unable to get kernel names list." ); - - /* Check to see if the kernel name array is null terminated. */ - if (kernel_names[kernel_names_len-1] != '\0') - { - free(kernel_names); - print_error( error, "Kernel name list was not null terminated"); - return -1; - } - - /* Check to see if the correct kernel name string was returned. */ - for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ ) - if( 0 == strcmp(actual_names[i],kernel_names) ) - break; - - if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) ) - { - free(kernel_names); - log_error( "Kernel names \"%s\" did not match:\n", kernel_names ); - for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ ) - log_error( "\t\t\"%s\"\n", actual_names[0] ); - return -1; - } - free(kernel_names); - - /* Try manually creating kernels (backwards just in case) */ - kernel1 = clCreateKernel( program, "sample_test", &error ); - if( kernel1 == NULL || error != CL_SUCCESS ) - { - print_error( error, "Could not get kernel 1" ); - return -1; - } - - kernel2 = clCreateKernel( program, "sample_test2", &error ); - if( kernel2 == NULL ) - { - print_error( error, "Could not get kernel 2" ); - return -1; - } - - return 0; -} - -static const char *single_task_kernel[] = { - "__kernel void sample_test(__global int *dst, int count)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " for( int i = 0; i < count; i++ )\n" - " dst[i] = tid + i;\n" - "\n" - "}\n" }; - -int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper output; - cl_int count; - - - if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) ) - return -1; - - // Create args - count = 100; - output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error ); - test_error( error, "Unable to create output buffer" ); - - error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count ); - test_error( error, "Unable to set kernel argument" ); - - // Run task - error = clEnqueueTask( queue, kernel, 0, NULL, NULL ); - test_error( error, "Unable to run task" ); - - // Read results - cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count); - error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - // Validate - for( cl_int i = 0; i < count; i++ ) - { - if( results[ i ] != i ) - { - log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] ); - free(results); - return -1; - } - } - - /* All done */ - free(results); - return 0; -} - - - -#define TEST_SIZE 1000 -int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - - cl_context local_context; - cl_command_queue local_queue; - cl_program local_program; - cl_kernel local_kernel; - cl_mem local_mem_in, local_mem_out; - cl_event local_event; - size_t global_dim[3]; - int i, j, error; - global_dim[0] = TEST_SIZE; - global_dim[1] = 1; global_dim[2] = 1; - cl_int *inData, *outData; - cl_int status; - - inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE); - outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE); - for (i=0; iA[tid] + src->B[tid];\n" -"\n" -"}\n" }; - -const char *sample_struct_array_test_kernel[] = { -"typedef struct {\n" -"int A;\n" -"int B;\n" -"} input_pair_t;\n" -"\n" -"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = src[tid].A + src[tid].B;\n" -"\n" -"}\n" }; - -const char *sample_const_test_kernel[] = { -"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = src1[tid] + src2[tid];\n" -"\n" -"}\n" }; - -const char *sample_const_global_test_kernel[] = { -"__constant int addFactor = 1024;\n" -"__kernel void sample_test(__global int *src1, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = src1[tid] + addFactor;\n" -"\n" -"}\n" }; - -const char *sample_two_kernel_program[] = { -"__kernel void sample_test(__global float *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (int)src[tid];\n" -"\n" -"}\n", -"__kernel void sample_test2(__global int *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (float)src[tid];\n" -"\n" -"}\n" }; - - - - -int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - cl_program program, testProgram; - cl_context testContext; - cl_kernel kernel; - cl_char name[ 512 ]; - cl_uint numArgs, numInstances; - size_t paramSize; - - - /* Create reference */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 ) - { - return -1; - } - - error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, ¶mSize ); - test_error( error, "Unable to get kernel function name param size" ); - if( paramSize != strlen( "sample_test" ) + 1 ) - { - log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize ); - return -1; - } - - error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL ); - test_error( error, "Unable to get kernel function name" ); - if( strcmp( (char *)name, "sample_test" ) != 0 ) - { - log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name ); - return -1; - } - - - error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, ¶mSize ); - test_error( error, "Unable to get kernel arg count param size" ); - if( paramSize != sizeof( numArgs ) ) - { - log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize ); - return -1; - } - - error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL ); - test_error( error, "Unable to get kernel arg count" ); - if( numArgs != 2 ) - { - log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs ); - return -1; - } - - - error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, ¶mSize ); - test_error( error, "Unable to get kernel reference count param size" ); - if( paramSize != sizeof( numInstances ) ) - { - log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize ); - return -1; - } - - error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL ); - test_error( error, "Unable to get kernel reference count" ); - - - error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, ¶mSize ); - test_error( error, "Unable to get kernel program param size" ); - if( paramSize != sizeof( testProgram ) ) - { - log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize ); - return -1; - } - - error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL ); - test_error( error, "Unable to get kernel program" ); - if( testProgram != program ) - { - log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram ); - return -1; - } - - error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL ); - test_error( error, "Unable to get kernel context" ); - if( testContext != context ) - { - log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext ); - return -1; - } - - /* Release memory */ - clReleaseKernel( kernel ); - clReleaseProgram( program ); - return 0; -} - -int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - cl_float inputData[100]; - cl_int outputData[100]; - RandomSeed seed( gRandomSeed ); - int i; - - /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 ) - { - return -1; - } - - /* Create some I/O streams */ - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 100, NULL, &error); - test_error( error, "Creating test array failed" ); - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error); - test_error( error, "Creating test array failed" ); - - /* Write some test data */ - memset( outputData, 0, sizeof( outputData ) ); - - for (i=0; i<100; i++) - inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed); - - error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL); - test_error( error, "Unable to set testing kernel data" ); - - /* Set the arguments */ - error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] ); - test_error( error, "Unable to set kernel arguments" ); - - /* Test running the kernel and verifying it */ - threads[0] = (size_t)100; - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<100; i++) - { - if (outputData[i] != (int)inputData[i]) - { - log_error( "ERROR: Data did not verify on first pass!\n" ); - return -1; - } - } - - /* Try again */ - if( localThreads[0] > 1 ) - localThreads[0] /= 2; - while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] ) - localThreads[0]--; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<100; i++) - { - if (outputData[i] != (int)inputData[i]) - { - log_error( "ERROR: Data did not verify on first pass!\n" ); - return -1; - } - } - - /* And again */ - if( localThreads[0] > 1 ) - localThreads[0] /= 2; - while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] ) - localThreads[0]--; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<100; i++) - { - if (outputData[i] != (int)inputData[i]) - { - log_error( "ERROR: Data did not verify on first pass!\n" ); - return -1; - } - } - - /* One more time */ - localThreads[0] = (unsigned int)1; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<100; i++) - { - if (outputData[i] != (int)inputData[i]) - { - log_error( "ERROR: Data did not verify on first pass!\n" ); - return -1; - } - } - - return 0; -} - -int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - cl_float inputData[10]; - cl_int outputData[10]; - RandomSeed seed( gRandomSeed ); - int i; - - /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 ) - { - return -1; - } - - /* Create some I/O streams */ - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - - /* Write some test data */ - memset( outputData, 0, sizeof( outputData ) ); - - for (i=0; i<10; i++) - inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed); - - error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL); - test_error( error, "Unable to set testing kernel data" ); - - /* Test setting the arguments by index manually */ - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - - - /* Test running the kernel and verifying it */ - threads[0] = (size_t)10; - - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<10; i++) - { - if (outputData[i] != (int)inputData[i]) - { - log_error( "ERROR: Data did not verify on first pass!\n" ); - return -1; - } - } - - return 0; -} - -int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - cl_program program; - cl_kernel kernel; - void *args[2]; - cl_mem outStream; - size_t threads[1], localThreads[1]; - cl_int outputData[10]; - int i; - cl_int randomTestDataA[10], randomTestDataB[10]; - MTdata d; - - struct img_pair_t - { - cl_mem streamA; - cl_mem streamB; - } image_pair; - - - /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 ) - { - return -1; - } - - /* Create some I/O streams */ - d = init_genrand( gRandomSeed ); - for( i = 0; i < 10; i++ ) - { - randomTestDataA[i] = (cl_int)genrand_int32(d); - randomTestDataB[i] = (cl_int)genrand_int32(d); - } - free_mtdata(d); d = NULL; - - image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error); - test_error( error, "Creating test array failed" ); - image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error); - test_error( error, "Creating test array failed" ); - outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - - /* Set the arguments */ - args[0] = &image_pair; - args[1] = outStream; - - error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]); - test_error( error, "Unable to set indexed kernel arguments" ); - - /* Test running the kernel and verifying it */ - threads[0] = (size_t)10; - - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<10; i++) - { - if (outputData[i] != randomTestDataA[i] + randomTestDataB[i]) - { - log_error( "ERROR: Data did not verify!\n" ); - return -1; - } - } - - - clReleaseMemObject( image_pair.streamA ); - clReleaseMemObject( image_pair.streamB ); - clReleaseMemObject( outStream ); - clReleaseKernel( kernel ); - clReleaseProgram( program ); - - return 0; -} - -int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[3]; - size_t threads[1], localThreads[1]; - cl_int outputData[10]; - int i; - cl_int randomTestDataA[10], randomTestDataB[10]; - cl_ulong maxSize; - MTdata d; - - /* Verify our test buffer won't be bigger than allowed */ - error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( error, "Unable to get max constant buffer size" ); - if( maxSize < sizeof( cl_int ) * 10 ) - { - log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize ); - return -1; - } - - /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 ) - { - return -1; - } - - /* Create some I/O streams */ - d = init_genrand( gRandomSeed ); - for( i = 0; i < 10; i++ ) - { - randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff; /* Make sure values are positive, just so we don't have to */ - randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff; /* deal with overflow on the verification */ - } - free_mtdata(d); d = NULL; - - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error); - test_error( error, "Creating test array failed" ); - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error); - test_error( error, "Creating test array failed" ); - streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - - /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]); - test_error( error, "Unable to set indexed kernel arguments" ); - - - /* Test running the kernel and verifying it */ - threads[0] = (size_t)10; - - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<10; i++) - { - if (outputData[i] != randomTestDataA[i] + randomTestDataB[i]) - { - log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) ); - return -1; - } - } - - return 0; -} - -int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - cl_int outputData[10]; - int i; - MTdata d; - - typedef struct img_pair_type - { - int A; - int B; - } image_pair_t; - - image_pair_t image_pair[ 10 ]; - - - /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 ) - { - return -1; - } - - /* Create some I/O streams */ - d = init_genrand( gRandomSeed ); - for( i = 0; i < 10; i++ ) - { - image_pair[i].A = (cl_int)genrand_int32(d); - image_pair[i].A = (cl_int)genrand_int32(d); - } - free_mtdata(d); d = NULL; - - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error); - test_error( error, "Creating test array failed" ); - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - - /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); - - /* Test running the kernel and verifying it */ - threads[0] = (size_t)10; - - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<10; i++) - { - if (outputData[i] != image_pair[i].A + image_pair[i].B) - { - log_error( "ERROR: Data did not verify!\n" ); - return -1; - } - } - - return 0; -} - -int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - cl_program program; - cl_kernel kernel[3]; - unsigned int kernelCount; - - /* Create a test program */ - program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create test program!\n" ); - return -1; - } - - /* Build */ - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build test program" ); - - /* Try getting the kernel count */ - error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount ); - test_error( error, "Unable to get kernel count for built program" ); - if( kernelCount != 2 ) - { - log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount ); - return -1; - } - - /* Try actually getting the kernels */ - error = clCreateKernelsInProgram( program, 2, kernel, NULL ); - test_error( error, "Unable to get kernels for built program" ); - clReleaseKernel( kernel[0] ); - clReleaseKernel( kernel[1] ); - - clReleaseProgram( program ); - return 0; -} - -int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - cl_int outputData[10]; - int i; - cl_int randomTestDataA[10]; - MTdata d; - - - /* Create a kernel to test with */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 ) - { - return -1; - } - - /* Create some I/O streams */ - d = init_genrand( gRandomSeed ); - for( i = 0; i < 10; i++ ) - { - randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff; /* Make sure values are positive and small, just so we don't have to */ - } - free_mtdata(d); d = NULL; - - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error); - test_error( error, "Creating test array failed" ); - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - - /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]); - test_error( error, "Unable to set indexed kernel arguments" ); - - - /* Test running the kernel and verifying it */ - threads[0] = (size_t)10; - - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Kernel execution failed" ); - - error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL ); - test_error( error, "Unable to get result data" ); - - for (i=0; i<10; i++) - { - if (outputData[i] != randomTestDataA[i] + 1024) - { - log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) ); - return -1; - } - } - - return 0; -} - - - diff --git a/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c b/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c deleted file mode 100644 index 0d792136..00000000 --- a/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c +++ /dev/null @@ -1,162 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include -#if defined(__APPLE__) -#include -#include -#else -#include -#include -#endif -#include "procs.h" - - -enum { SUCCESS, FAILURE }; -typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type; - -#define NITEMS 4096 - -/* places the casted long value of the src ptr into each element of the output - * array, to allow testing that the kernel actually _gets_ the NULL value */ -const char *kernel_string = -"kernel void test_kernel(global float *src, global long *dst)\n" -"{\n" -" uint tid = get_global_id(0);\n" -" dst[tid] = (long)src;\n" -"}\n"; - -/* - * The guts of the test: - * call setKernelArgs with a regular buffer, &NULL, or NULL depending on - * the value of 'test_type' - */ -static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel, - cl_mem test_buf, cl_mem result_buf, test_type type) -{ - unsigned int test_success = 0; - - unsigned int i; - cl_int status; - char *typestr; - - if (type == NON_NULL_PATH) { - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); - typestr = "non-NULL"; - } else if (type == ADDROF_NULL_PATH) { - test_buf = NULL; - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); - typestr = "&NULL"; - } else if (type == NULL_PATH) { - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL); - typestr = "NULL"; - } - - log_info("Testing setKernelArgs with %s buffer.\n", typestr); - - if (status != CL_SUCCESS) { - log_error("clSetKernelArg failed with status: %d\n", status); - return FAILURE; // no point in continuing *this* test - } - - size_t global = NITEMS; - status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, - NULL, 0, NULL, NULL); - test_error(status, "NDRangeKernel failed."); - - cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long)); - status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0, - sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL); - test_error(status, "ReadBuffer failed."); - - // in the non-null case, we expect NONZERO values: - if (type == NON_NULL_PATH) { - for (i=0; i -#endif // !_WIN32 - -// Note: According to spec, the various functions to get instance counts should return an error when passed in an object -// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable -// this define. -//#define VERIFY_AFTER_RELEASE 1 - -#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 ) -#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 ) - -#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \ -log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \ -return -1; } - -int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements) -{ - cl_command_queue queue; - cl_uint numInstances; - int err; - - - /* Create a test queue */ - queue = clCreateCommandQueue( context, deviceID, 0, &err ); - test_error( err, "Unable to create command queue to test with" ); - - /* Test the instance count */ - GET_QUEUE_INSTANCE_COUNT( queue ); - test_error( err, "Unable to get queue instance count" ); - VERIFY_INSTANCE_COUNT( numInstances, 1 ); - - /* Now release the program */ - clReleaseCommandQueue( queue ); -#ifdef VERIFY_AFTER_RELEASE - /* We're not allowed to get the instance count after the object has been completely released. But that's - exactly how we can tell the release worked--by making sure getting the instance count fails! */ - GET_QUEUE_INSTANCE_COUNT( queue ); - if( err != CL_INVALID_COMMAND_QUEUE ) - { - print_error( err, "Command queue was not properly released" ); - return -1; - } -#endif - - return 0; -} - -int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements) -{ - cl_command_queue queue; - unsigned int numInstances, i; - int err; - - - /* Create a test program */ - queue = clCreateCommandQueue( context, deviceID, 0, &err ); - test_error( err, "Unable to create command queue to test with" ); - - /* Increment 9 times, which should bring the count to 10 */ - for( i = 0; i < 9; i++ ) - { - clRetainCommandQueue( queue ); - } - - /* Test the instance count */ - GET_QUEUE_INSTANCE_COUNT( queue ); - test_error( err, "Unable to get queue instance count" ); - VERIFY_INSTANCE_COUNT( numInstances, 10 ); - - /* Now release 5 times, which should take us to 5 */ - for( i = 0; i < 5; i++ ) - { - clReleaseCommandQueue( queue ); - } - - GET_QUEUE_INSTANCE_COUNT( queue ); - test_error( err, "Unable to get queue instance count" ); - VERIFY_INSTANCE_COUNT( numInstances, 5 ); - - /* Retain again three times, which should take us to 8 */ - for( i = 0; i < 3; i++ ) - { - clRetainCommandQueue( queue ); - } - - GET_QUEUE_INSTANCE_COUNT( queue ); - test_error( err, "Unable to get queue instance count" ); - VERIFY_INSTANCE_COUNT( numInstances, 8 ); - - /* Release 7 times, which should take it to 1 */ - for( i = 0; i < 7; i++ ) - { - clReleaseCommandQueue( queue ); - } - - GET_QUEUE_INSTANCE_COUNT( queue ); - test_error( err, "Unable to get queue instance count" ); - VERIFY_INSTANCE_COUNT( numInstances, 1 ); - - /* And one last one */ - clReleaseCommandQueue( queue ); - -#ifdef VERIFY_AFTER_RELEASE - /* We're not allowed to get the instance count after the object has been completely released. But that's - exactly how we can tell the release worked--by making sure getting the instance count fails! */ - GET_QUEUE_INSTANCE_COUNT( queue ); - if( err != CL_INVALID_COMMAND_QUEUE ) - { - print_error( err, "Command queue was not properly released" ); - return -1; - } -#endif - - return 0; -} - -int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem object; - cl_uint numInstances; - int err; - - - /* Create a test object */ - object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err ); - test_error( err, "Unable to create buffer to test with" ); - - /* Test the instance count */ - GET_MEM_INSTANCE_COUNT( object ); - test_error( err, "Unable to get mem object count" ); - VERIFY_INSTANCE_COUNT( numInstances, 1 ); - - /* Now release the program */ - clReleaseMemObject( object ); -#ifdef VERIFY_AFTER_RELEASE - /* We're not allowed to get the instance count after the object has been completely released. But that's - exactly how we can tell the release worked--by making sure getting the instance count fails! */ - GET_MEM_INSTANCE_COUNT( object ); - if( err != CL_INVALID_MEM_OBJECT ) - { - print_error( err, "Mem object was not properly released" ); - return -1; - } -#endif - - return 0; -} - -int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem object; - unsigned int numInstances, i; - int err; - - - /* Create a test object */ - object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err ); - test_error( err, "Unable to create buffer to test with" ); - - /* Increment 9 times, which should bring the count to 10 */ - for( i = 0; i < 9; i++ ) - { - clRetainMemObject( object ); - } - - /* Test the instance count */ - GET_MEM_INSTANCE_COUNT( object ); - test_error( err, "Unable to get mem object count" ); - VERIFY_INSTANCE_COUNT( numInstances, 10 ); - - /* Now release 5 times, which should take us to 5 */ - for( i = 0; i < 5; i++ ) - { - clReleaseMemObject( object ); - } - - GET_MEM_INSTANCE_COUNT( object ); - test_error( err, "Unable to get mem object count" ); - VERIFY_INSTANCE_COUNT( numInstances, 5 ); - - /* Retain again three times, which should take us to 8 */ - for( i = 0; i < 3; i++ ) - { - clRetainMemObject( object ); - } - - GET_MEM_INSTANCE_COUNT( object ); - test_error( err, "Unable to get mem object count" ); - VERIFY_INSTANCE_COUNT( numInstances, 8 ); - - /* Release 7 times, which should take it to 1 */ - for( i = 0; i < 7; i++ ) - { - clReleaseMemObject( object ); - } - - GET_MEM_INSTANCE_COUNT( object ); - test_error( err, "Unable to get mem object count" ); - VERIFY_INSTANCE_COUNT( numInstances, 1 ); - - /* And one last one */ - clReleaseMemObject( object ); - -#ifdef VERIFY_AFTER_RELEASE - /* We're not allowed to get the instance count after the object has been completely released. But that's - exactly how we can tell the release worked--by making sure getting the instance count fails! */ - GET_MEM_INSTANCE_COUNT( object ); - if( err != CL_INVALID_MEM_OBJECT ) - { - print_error( err, "Mem object was not properly released" ); - return -1; - } -#endif - - return 0; -} - diff --git a/test_conformance/compatibility/test_conformance/api/test_retain_program.c b/test_conformance/compatibility/test_conformance/api/test_retain_program.c deleted file mode 100644 index 2683decb..00000000 --- a/test_conformance/compatibility/test_conformance/api/test_retain_program.c +++ /dev/null @@ -1,109 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "testBase.h" - -#if !defined(_WIN32) -#include -#endif - -#include "harness/compat.h" - -int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_program program; - cl_kernel kernel; - int error; - const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" }; - - /* Create a test program */ - program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error); - test_error( error, "Unable to create program to test with" ); - - /* Compile the program */ - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build sample program to test with" ); - - /* And create a kernel from it */ - kernel = clCreateKernel( program, "sample_test", &error ); - test_error( error, "Unable to create kernel" ); - - /* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */ - clReleaseProgram( program ); - clReleaseKernel( kernel ); - - /* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */ - return 0; -} - -const char *sample_delay_kernel[] = { -"__kernel void sample_test(__global float *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -" for( int i = 0; i < 1000000; i++ ); \n" -" dst[tid] = (int)src[tid];\n" -"\n" -"}\n" }; - -int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - cl_program program; - cl_kernel kernel; - cl_mem streams[2]; - size_t threads[1] = { 10 }, localThreadSize; - - - /* We now need an event to test. So we'll execute a kernel to get one */ - if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) ) - { - return -1; - } - - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error); - test_error( error, "Creating test array failed" ); - - /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]); - test_error( error, "Unable to set indexed kernel arguments" ); - - error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize ); - test_error( error, "Unable to calc local thread size" ); - - - /* Execute the kernel */ - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); - - /* The kernel should still be executing, but we should still be able to release it. It's not terribly - useful, but we should be able to do it, if the internal refcounting is indeed correct. */ - - clReleaseMemObject( streams[ 1 ] ); - clReleaseMemObject( streams[ 0 ] ); - clReleaseKernel( kernel ); - clReleaseProgram( program ); - - /* Now make sure we're really finished before we go on. */ - error = clFinish(queue); - test_error( error, "Unable to finish context."); - - return 0; -} - - diff --git a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt index 7d8eb76e..143ba737 100644 --- a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt @@ -2,47 +2,20 @@ set(MODULE_NAME COMPATIBILITY_BASIC) set(${MODULE_NAME}_SOURCES main.c - test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c - test_intmath_int.c test_intmath_int2.c test_intmath_int4.c - test_intmath_long.c test_intmath_long2.c test_intmath_long4.c - test_hiloeo.c test_local.c test_pointercast.c - test_if.c test_loop.c test_readimage.c test_readimage_int16.c test_readimage_fp32.c test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c - test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c + test_writeimage.c test_multireadimageonefmt.c test_multireadimagemultifmt.c test_imagedim.c - test_vloadstore.c - test_int2float.c test_float2int.c - test_createkernelsinprogram.c - test_hostptr.c - test_explicit_s2v.cpp - test_constant.c test_image_multipass.c - test_imagereadwrite.c test_imagereadwrite3d.c test_image_param.c test_imagenpot.c test_image_r8.c - test_barrier.c - test_basic_parameter_types.c - test_arrayreadwrite.c test_imagearraycopy3d.c - test_imagecopy.c - test_imagerandomcopy.c - test_arrayimagecopy.c - test_arrayimagecopy3d.c - test_imagecopy3d.c - test_enqueue_map.cpp - test_work_item_functions.cpp - test_astype.cpp test_async_copy.cpp test_sizeof.c - test_vec_type_hint.c - test_constant_source.cpp test_bufferreadwriterect.c test_async_strided_copy.cpp - test_kernel_memory_alignment.cpp - test_local_kernel_scope.cpp ) set(${MODULE_NAME}_LIBS harness-compat) diff --git a/test_conformance/compatibility/test_conformance/basic/main.c b/test_conformance/compatibility/test_conformance/basic/main.c index ed8fa4e9..3678992e 100644 --- a/test_conformance/compatibility/test_conformance/basic/main.c +++ b/test_conformance/compatibility/test_conformance/basic/main.c @@ -31,99 +31,33 @@ cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT; bool gTestRounding = false; test_definition test_list[] = { - ADD_TEST( hostptr ), - ADD_TEST( fpmath_float ), - ADD_TEST( fpmath_float2 ), - ADD_TEST( fpmath_float4 ), - ADD_TEST( intmath_int ), - ADD_TEST( intmath_int2 ), - ADD_TEST( intmath_int4 ), - ADD_TEST( intmath_long ), - ADD_TEST( intmath_long2 ), - ADD_TEST( intmath_long4 ), - ADD_TEST( hiloeo ), - ADD_TEST( if ), ADD_TEST( sizeof ), - ADD_TEST( loop ), - ADD_TEST( pointer_cast ), - ADD_TEST( local_arg_def ), - ADD_TEST( local_kernel_def ), - ADD_TEST( local_kernel_scope ), - ADD_TEST( constant ), - ADD_TEST( constant_source ), ADD_TEST( readimage ), ADD_TEST( readimage_int16 ), ADD_TEST( readimage_fp32 ), ADD_TEST( writeimage ), - ADD_TEST( writeimage_int16 ), - ADD_TEST( writeimage_fp32 ), ADD_TEST( mri_one ), ADD_TEST( mri_multiple ), ADD_TEST( image_r8 ), - ADD_TEST( barrier ), - ADD_TEST( int2float ), - ADD_TEST( float2int ), - ADD_TEST( imagereadwrite ), - ADD_TEST( imagereadwrite3d ), ADD_TEST( readimage3d ), ADD_TEST( readimage3d_int16 ), ADD_TEST( readimage3d_fp32 ), ADD_TEST( bufferreadwriterect ), - ADD_TEST( arrayreadwrite ), ADD_TEST( imagearraycopy3d ), - ADD_TEST( imagecopy ), - ADD_TEST( imagecopy3d ), - ADD_TEST( imagerandomcopy ), - ADD_TEST( arrayimagecopy ), - ADD_TEST( arrayimagecopy3d ), ADD_TEST( imagenpot ), - ADD_TEST( vload_global ), - ADD_TEST( vload_local ), - ADD_TEST( vload_constant ), - ADD_TEST( vload_private ), - ADD_TEST( vstore_global ), - ADD_TEST( vstore_local ), - ADD_TEST( vstore_private ), - - ADD_TEST( createkernelsinprogram ), ADD_TEST( imagedim_pow2 ), ADD_TEST( imagedim_non_pow2 ), ADD_TEST( image_param ), ADD_TEST( image_multipass_integer_coord ), ADD_TEST( image_multipass_float_coord ), - ADD_TEST( explicit_s2v_bool ), - ADD_TEST( explicit_s2v_char ), - ADD_TEST( explicit_s2v_uchar ), - ADD_TEST( explicit_s2v_short ), - ADD_TEST( explicit_s2v_ushort ), - ADD_TEST( explicit_s2v_int ), - ADD_TEST( explicit_s2v_uint ), - ADD_TEST( explicit_s2v_long ), - ADD_TEST( explicit_s2v_ulong ), - ADD_TEST( explicit_s2v_float ), - ADD_TEST( explicit_s2v_double ), - - ADD_TEST( enqueue_map_buffer ), - ADD_TEST( enqueue_map_image ), - - ADD_TEST( work_item_functions ), - - ADD_TEST( astype ), ADD_TEST( async_copy_global_to_local ), ADD_TEST( async_copy_local_to_global ), ADD_TEST( async_strided_copy_global_to_local ), ADD_TEST( async_strided_copy_local_to_global ), ADD_TEST( prefetch ), - - ADD_TEST( parameter_types ), - ADD_TEST( vec_type_hint ), - ADD_TEST( kernel_memory_alignment_local ), - ADD_TEST( kernel_memory_alignment_global ), - ADD_TEST( kernel_memory_alignment_constant ), - ADD_TEST( kernel_memory_alignment_private ), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c deleted file mode 100644 index f7af8d6b..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c +++ /dev/null @@ -1,143 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format) -{ - cl_uchar *bufptr, *imgptr; - clMemWrapper buffer, image; - int img_width = 512; - int img_height = 512; - size_t elem_size; - size_t buffer_size; - int i; - cl_int err; - MTdata d; - cl_event copyevent; - - log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type)); - - image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err); - test_error(err, "create_image_2d failed"); - - err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL); - test_error(err, "clGetImageInfo failed"); - - buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height; - - buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err); - test_error(err, "clCreateBuffer failed"); - - bufptr = (cl_uchar*)malloc(buffer_size); - - d = init_genrand( gRandomSeed ); - bufptr = (cl_uchar*)malloc(buffer_size); - for (i=0; i<(int)buffer_size; i++) { - bufptr[i] = (cl_uchar)genrand_int32(d); - } - free_mtdata(d); d = NULL; - - size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1}; - err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed"); - - err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, ©event ); - test_error(err, "clEnqueueCopyImageToBuffer failed"); - - imgptr = (cl_uchar*)malloc(buffer_size); - - err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, ©event, NULL ); - test_error(err, "clEnqueueReadBuffer failed"); - - if (memcmp(bufptr, imgptr, buffer_size) != 0) { - log_error( "ERROR: Results did not validate!\n" ); - unsigned char * inchar = (unsigned char*)bufptr; - unsigned char * outchar = (unsigned char*)imgptr; - int failuresPrinted = 0; - int i; - for (i=0; i< (int)buffer_size; i+=(int)elem_size) { - int failed = 0; - int j; - for (j=0; j<(int)elem_size; j++) - if (inchar[i+j] != outchar[i+j]) - failed = 1; - char values[4096]; - values[0] = 0; - if (failed) { - sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i); - int j; - for (j=0; j<(int)elem_size; j++) - sprintf(values + strlen( values), "0x%02x ", inchar[i+j]); - sprintf(values + strlen(values), "] != expected ["); - for (j=0; j<(int)elem_size; j++) - sprintf(values + strlen( values), "0x%02x ", outchar[i+j]); - sprintf(values + strlen(values), "]"); - log_error("%s\n", values); - failuresPrinted++; - } - if (failuresPrinted > 5) { - log_error("Not printing further failures...\n"); - break; - } - } - err = -1; - } - - free(bufptr); - free(imgptr); - - if (err) - log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n", - (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type); - - return err; -} - -int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_int err; - cl_image_format *formats; - cl_uint num_formats; - cl_uint i; - - PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) - - err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats); - test_error(err, "clGetSupportedImageFormats failed"); - - formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format)); - - err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL); - test_error(err, "clGetSupportedImageFormats failed"); - - for (i = 0; i < num_formats; i++) { - err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]); - } - - if (err) - log_error("ARRAY to IMAGE copy test failed\n"); - else - log_info("ARRAY to IMAGE copy test passed\n"); - - return err; -} diff --git a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c deleted file mode 100644 index c1430814..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c +++ /dev/null @@ -1,144 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format) -{ - cl_uchar *bufptr, *imgptr; - clMemWrapper buffer, image; - int img_width = 128; - int img_height = 128; - int img_depth = 32; - size_t elem_size; - size_t buffer_size; - int i; - cl_int err; - MTdata d; - cl_event copyevent; - - log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type)); - - image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err); - test_error(err, "create_image_3d failed"); - - err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL); - test_error(err, "clGetImageInfo failed"); - - buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth; - - buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err); - test_error(err, "clCreateBuffer failed"); - - bufptr = (cl_uchar*)malloc(buffer_size); - - d = init_genrand( gRandomSeed ); - bufptr = (cl_uchar*)malloc(buffer_size); - for (i=0; i<(int)buffer_size; i++) { - bufptr[i] = (cl_uchar)genrand_int32(d); - } - free_mtdata(d); d = NULL; - - size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth}; - err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed"); - - err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, ©event ); - test_error(err, "clEnqueueCopyImageToBuffer failed"); - - imgptr = (cl_uchar*)malloc(buffer_size); - - err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, ©event, NULL ); - test_error(err, "clEnqueueReadBuffer failed"); - - if (memcmp(bufptr, imgptr, buffer_size) != 0) { - log_error( "ERROR: Results did not validate!\n" ); - unsigned char * inchar = (unsigned char*)bufptr; - unsigned char * outchar = (unsigned char*)imgptr; - int failuresPrinted = 0; - int i; - for (i=0; i< (int)buffer_size; i+=(int)elem_size) { - int failed = 0; - int j; - for (j=0; j<(int)elem_size; j++) - if (inchar[i+j] != outchar[i+j]) - failed = 1; - char values[4096]; - values[0] = 0; - if (failed) { - sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i); - int j; - for (j=0; j<(int)elem_size; j++) - sprintf(values + strlen( values), "0x%02x ", inchar[i+j]); - sprintf(values + strlen(values), "] != expected ["); - for (j=0; j<(int)elem_size; j++) - sprintf(values + strlen( values), "0x%02x ", outchar[i+j]); - sprintf(values + strlen(values), "]"); - log_error("%s\n", values); - failuresPrinted++; - } - if (failuresPrinted > 5) { - log_error("Not printing further failures...\n"); - break; - } - } - err = -1; - } - - free(bufptr); - free(imgptr); - - if (err) - log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n", - (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type); - - return err; -} - -int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_int err; - cl_image_format *formats; - cl_uint num_formats; - cl_uint i; - - PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) - - err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats); - test_error(err, "clGetSupportedImageFormats failed"); - - formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format)); - - err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL); - test_error(err, "clGetSupportedImageFormats failed"); - - for (i = 0; i < num_formats; i++) { - err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]); - } - - if (err) - log_error("ARRAY to IMAGE3D copy test failed\n"); - else - log_info("ARRAY to IMAGE3D copy test passed\n"); - - return err; -} diff --git a/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c b/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c deleted file mode 100644 index a3c5fc06..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c +++ /dev/null @@ -1,94 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - - -int -test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_uint *inptr, *outptr; - cl_mem streams[1]; - int num_tries = 400; - num_elements = 1024 * 1024 * 4; - int i, j, err; - MTdata d; - - inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint)); - outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint)); - - // randomize data - d = init_genrand( gRandomSeed ); - for (i=0; i 0 && offset < num_elements) - break; - } while (1); - cb = (int)(genrand_int32(d) & 0x7FFFFFFF); - if (cb > (num_elements - offset)) - cb = num_elements - offset; - - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed"); - - err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL ); - test_error(err, "clEnqueueReadBuffer failed"); - - for (j=offset; j -#include -#include -#include -#include -#include - - -#include "procs.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - - -static const char *astype_kernel_pattern = -"%s\n" -"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( src[ tid ] );\n" -" dst[ tid ] = tmp;\n" -"}\n"; - -static const char *astype_kernel_pattern_V3srcV3dst = -"%s\n" -"__kernel void test_fn( __global %s *src, __global %s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( vload3(tid,src) );\n" -" vstore3(tmp,tid,dst);\n" -"}\n"; -// in the printf, remove the third and fifth argument, each of which -// should be a "3", when copying from the printf for astype_kernel_pattern - -static const char *astype_kernel_pattern_V3dst = -"%s\n" -"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s3 tmp = as_%s3( src[ tid ] );\n" -" vstore3(tmp,tid,dst);\n" -"}\n"; -// in the printf, remove the fifth argument, which -// should be a "3", when copying from the printf for astype_kernel_pattern - - -static const char *astype_kernel_pattern_V3src = -"%s\n" -"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( vload3(tid,src) );\n" -" dst[ tid ] = tmp;\n" -"}\n"; -// in the printf, remove the third argument, which -// should be a "3", when copying from the printf for astype_kernel_pattern - - -int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType, - unsigned int vecSize, unsigned int outVecSize, - int numElements ) -{ - int error; - - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[ 2 ]; - - char programSrc[ 10240 ]; - size_t threads[ 1 ], localThreads[ 1 ]; - size_t typeSize = get_explicit_type_size( inVecType ); - size_t outTypeSize = get_explicit_type_size(outVecType); - char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; - MTdata d; - - - - // Create program - if(outVecSize == 3 && vecSize == 3) { - // astype_kernel_pattern_V3srcV3dst - sprintf( programSrc, astype_kernel_pattern_V3srcV3dst, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), // sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] ); - } else if(outVecSize == 3) { - // astype_kernel_pattern_V3dst - sprintf( programSrc, astype_kernel_pattern_V3dst, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), - get_explicit_type_name( outVecType ), - get_explicit_type_name( outVecType )); - - } else if(vecSize == 3) { - // astype_kernel_pattern_V3src - sprintf( programSrc, astype_kernel_pattern_V3src, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ),// sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]); - } else { - sprintf( programSrc, astype_kernel_pattern, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]); - } - - const char *ptr = programSrc; - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); - test_error( error, "Unable to create testing kernel" ); - - - // Create some input values - size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize; - char *inBuffer = (char*)malloc( inBufferSize ); - size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize; - char *outBuffer = (char*)malloc( outBufferSize ); - - d = init_genrand( gRandomSeed ); - generate_random_data( inVecType, numElements * vecSize, - d, inBuffer ); - free_mtdata(d); d = NULL; - - // Create I/O streams and set arguments - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error ); - test_error( error, "Unable to create I/O stream" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error ); - test_error( error, "Unable to create I/O stream" ); - - error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); - test_error( error, "Unable to set kernel argument" ); - error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] ); - test_error( error, "Unable to set kernel argument" ); - - - // Run the kernel - threads[ 0 ] = numElements; - error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] ); - test_error( error, "Unable to get group size to run with" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Unable to run kernel" ); - - - // Get the results and compare - // The beauty is that astype is supposed to return the bit pattern as a different type, which means - // the output should have the exact same bit pattern as the input. No interpretation necessary! - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - char *expected = inBuffer; - char *actual = outBuffer; - size_t compSize = typeSize*vecSize; - if(outTypeSize*outVecSize < compSize) { - compSize = outTypeSize*outVecSize; - } - - if(outVecSize == 4 && vecSize == 3) - { - // as_type4(vec3) should compile but produce undefined results?? - free(inBuffer); - free(outBuffer); - return 0; - } - - if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize) - { - // as_typen(vecm) should compile and run but produce - // implementation-defined results for m != n - // and n*sizeof(type) = sizeof(vecm) - free(inBuffer); - free(outBuffer); - return 0; - } - - for( int i = 0; i < numElements; i++ ) - { - if( memcmp( expected, actual, compSize ) != 0 ) - { - char expectedString[ 1024 ], actualString[ 1024 ]; - log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n", - (int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize, - GetDataVectorString( expected, typeSize, vecSize, expectedString ), - GetDataVectorString( actual, typeSize, vecSize, actualString ) ); - log_error("Src is :\n%s\n----\n%d threads %d localthreads\n", - programSrc, (int)threads[0],(int) localThreads[0]); - free(inBuffer); - free(outBuffer); - return 1; - } - expected += typeSize * vecSize; - actual += outTypeSize * outVecSize; - } - - free(inBuffer); - free(outBuffer); - return 0; -} - -int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - // Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is - // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way - // for us to verify what is "valid". So the only thing we can test are types that match in size independent - // of the element count (char -> uchar, etc) - ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx; - size_t inTypeSize, outTypeSize; - int error = 0; - - for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ ) - { - inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]); - - if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) - continue; - - if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong ) - continue; - - for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ ) - { - outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]); - if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) { - continue; - } - - if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong ) - continue; - - // change this check - if( inTypeIdx == outTypeIdx ) { - continue; - } - - log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) ); - fflush( stdout ); - - for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ ) - { - - for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++) - { - if(vecSizes[sizeIdx]*inTypeSize != - vecSizes[outSizeIdx]*outTypeSize ) - { - continue; - } - error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems ); - - - } - - } - if(get_explicit_type_size(vecTypes[inTypeIdx]) == - get_explicit_type_size(vecTypes[outTypeIdx])) { - // as_type3(vec4) allowed, as_type4(vec3) not allowed - error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems ); - error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems ); - } - - } - } - return error; -} - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_barrier.c b/test_conformance/compatibility/test_conformance/basic/test_barrier.c deleted file mode 100644 index e4887099..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_barrier.c +++ /dev/null @@ -1,158 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - -const char *barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i1; i = hadd(i,1))\n" -" {\n" -" barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *tmpptr, int *outptr, int n) -{ - int r = 0; - int i; - - for (i=0; i max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; - - // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; - - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); - - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err); - test_error(err, "clCreateBuffer failed."); - - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - -const char *kernel_code = -"__kernel void test_kernel(\n" -"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n" -"__global float%s *result)\n" -"{\n" -" result[0] = %s(c);\n" -" result[1] = %s(uc);\n" -" result[2] = %s(s);\n" -" result[3] = %s(us);\n" -" result[4] = %s(i);\n" -" result[5] = %s(ui);\n" -" result[6] = f;\n" -"}\n"; - -const char *kernel_code_long = -"__kernel void test_kernel_long(\n" -"long%s l, ulong%s ul,\n" -"__global float%s *result)\n" -"{\n" -" result[0] = %s(l);\n" -" result[1] = %s(ul);\n" -"}\n"; - -int test_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - clMemWrapper results; - int error; - size_t global[3] = {1, 1, 1}; - float results_back[2*16]; - int count, index; - const char* types[] = { "long", "ulong" }; - char kernel_string[8192]; - int sizes[] = {1, 2, 4, 8, 16}; - const char* size_strings[] = {"", "2", "4", "8", "16"}; - float expected; - int total_errors = 0; - int size_to_test; - char *ptr; - char convert_string[1024]; - size_t max_parameter_size; - - // We don't really care about the contents since we're just testing that the types work. - cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - - // Calculate how large our paramter size is to the kernel - size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong); - - // Init our strings. - kernel_string[0] = '\0'; - convert_string[0] = '\0'; - - // Get the maximum parameter size allowed - error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL ); - test_error( error, "Unable to get max parameter size from device" ); - - // Create the results buffer - results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - // Go over all the vector sizes - for (size_to_test = 0; size_to_test < 5; size_to_test++) { - clProgramWrapper program; - clKernelWrapper kernel; - - size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem); - if (total_parameter_size > max_parameter_size) { - log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n", - (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size); - continue; - } - - log_info("Testing vector size %d\n", sizes[size_to_test]); - - // If size is > 1, then we need a explicit convert call. - if (sizes[size_to_test] > 1) { - sprintf(convert_string, "convert_float%s", size_strings[size_to_test]); - } else { - sprintf(convert_string, " "); - } - - // Build the kernel - sprintf(kernel_string, kernel_code_long, - size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test], - convert_string, convert_string - ); - - ptr = kernel_string; - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long"); - test_error(error, "create single kernel failed"); - - // Set the arguments - for (count = 0; count < 2; count++) { - switch (count) { - case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break; - case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break; - default: log_error("Test error"); break; - } - if (error) - log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]); - test_error(error, "clSetKernelArgs failed"); - } - error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results); - test_error(error, "clSetKernelArgs failed"); - - // Execute - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - // Read back the results - error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL); - test_error(error, "clEnqueueReadBuffer failed"); - - // Verify the results - for (count = 0; count < 2; count++) { - for (index=0; index < sizes[size_to_test]; index++) { - switch (count) { - case 0: expected = (float)l[index]; break; - case 1: expected = (float)ul[index]; break; - default: log_error("Test error"); break; - } - - if (results_back[count*sizes[size_to_test]+index] != expected) { - total_errors++; - log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test], - index, results_back[count*sizes[size_to_test]+index], expected); - } - } - } - } - - return total_errors; -} - -int test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - clMemWrapper results; - int error; - size_t global[3] = {1, 1, 1}; - float results_back[7*16]; - int count, index; - const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"}; - char kernel_string[8192]; - int sizes[] = {1, 2, 4, 8, 16}; - const char* size_strings[] = {"", "2", "4", "8", "16"}; - float expected; - int total_errors = 0; - int size_to_test; - char *ptr; - char convert_string[1024]; - size_t max_parameter_size; - - // We don't really care about the contents since we're just testing that the types work. - cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15}; - - // Calculate how large our paramter size is to the kernel - size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) + - sizeof(cl_short) +sizeof(cl_ushort) + - sizeof(cl_int) +sizeof(cl_uint) + - sizeof(cl_float); - - // Init our strings. - kernel_string[0] = '\0'; - convert_string[0] = '\0'; - - // Get the maximum parameter size allowed - error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL ); - test_error( error, "Unable to get max parameter size from device" ); - - // Create the results buffer - results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - // Go over all the vector sizes - for (size_to_test = 0; size_to_test < 5; size_to_test++) { - clProgramWrapper program; - clKernelWrapper kernel; - - size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem); - if (total_parameter_size > max_parameter_size) { - log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n", - (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size); - continue; - } - - log_info("Testing vector size %d\n", sizes[size_to_test]); - - // If size is > 1, then we need a explicit convert call. - if (sizes[size_to_test] > 1) { - sprintf(convert_string, "convert_float%s", size_strings[size_to_test]); - } else { - sprintf(convert_string, " "); - } - - // Build the kernel - sprintf(kernel_string, kernel_code, - size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test], - size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test], - size_strings[size_to_test], size_strings[size_to_test], - convert_string, convert_string, convert_string, - convert_string, convert_string, convert_string - ); - - ptr = kernel_string; - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel"); - test_error(error, "create single kernel failed"); - - // Set the arguments - for (count = 0; count < 7; count++) { - switch (count) { - case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break; - case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break; - case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break; - case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break; - case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break; - case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break; - case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break; - default: log_error("Test error"); break; - } - if (error) - log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]); - test_error(error, "clSetKernelArgs failed"); - } - error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results); - test_error(error, "clSetKernelArgs failed"); - - // Execute - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - // Read back the results - error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL); - test_error(error, "clEnqueueReadBuffer failed"); - - // Verify the results - for (count = 0; count < 7; count++) { - for (index=0; index < sizes[size_to_test]; index++) { - switch (count) { - case 0: expected = (float)c[index]; break; - case 1: expected = (float)uc[index]; break; - case 2: expected = (float)s[index]; break; - case 3: expected = (float)us[index]; break; - case 4: expected = (float)i[index]; break; - case 5: expected = (float)ui[index]; break; - case 6: expected = (float)f[index]; break; - default: log_error("Test error"); break; - } - - if (results_back[count*sizes[size_to_test]+index] != expected) { - total_errors++; - log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test], - index, results_back[count*sizes[size_to_test]+index], expected); - } - } - } - } - - if (gHasLong) { - log_info("Testing long types...\n"); - total_errors += test_parameter_types_long( device, context, queue, num_elements ); - } - else { - log_info("Longs unsupported, skipping."); - } - - return total_errors; -} - - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_constant.c b/test_conformance/compatibility/test_conformance/basic/test_constant.c deleted file mode 100644 index 4e82c01f..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_constant.c +++ /dev/null @@ -1,275 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - -const char *constant_kernel_code = -"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" float ftmp = tmpF[tid]; \n" -" float Itmp = tmpI[tid]; \n" -" out[tid] = ftmp * Itmp; \n" -"}\n"; - -const char *loop_constant_kernel_code = -"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n" -"{\n" -" int tid = get_global_id(0);\n" -" float sum = 0;\n" -" for (int i = 0; i < num; i++) {\n" -" float pos = i_pos[i*3];\n" -" sum += pos;\n" -" }\n" -" out[tid] = sum;\n" -"}\n"; - - -static int -verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n) -{ - int i; - - for (i=0; i < n; i++) - { - float f = tmpF[i] * tmpI[i]; - if( out[i] != f ) - { - log_error("CONSTANT test failed\n"); - return -1; - } - } - - log_info("CONSTANT test passed\n"); - return 0; -} - - -static int -verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n) -{ - int i; - cl_int j; - for (i=0; i < n; i++) - { - float sum = 0; - for (j=0; j < l; ++j) - sum += tmp[j*3]; - - if( out[i] != sum ) - { - log_error("loop CONSTANT test failed\n"); - return -1; - } - } - - log_info("loop CONSTANT test passed\n"); - return 0; -} - -int -test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem streams[3]; - cl_int *tmpI; - cl_float *tmpF, *out; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - int err; - unsigned int i; - cl_ulong maxSize, maxGlobalSize, maxAllocSize; - size_t num_floats, num_ints, constant_values; - MTdata d; - RoundingMode oldRoundMode; - int isRTZ = 0; - - /* Verify our test buffer won't be bigger than allowed */ - err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( err, "Unable to get max constant buffer size" ); - - log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize); - - // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE - err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); - test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - - if (maxSize > maxGlobalSize / 4) - maxSize = maxGlobalSize / 4; - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); - test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); - - if (maxSize > maxAllocSize) - maxSize = maxAllocSize; - - maxSize/=4; - num_ints = (size_t)maxSize/sizeof(cl_int); - num_floats = (size_t)maxSize/sizeof(cl_float); - if (num_ints >= num_floats) { - constant_values = num_floats; - } else { - constant_values = num_ints; - } - - log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n", - constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float)); - - tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values); - tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values); - out = (cl_float*)malloc(sizeof(cl_float) * constant_values); - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * constant_values, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *constant_source_kernel_code[] = { -"__constant int outVal = 42;\n" -"__constant int outIndex = 7;\n" -"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n" -"\n" -"__kernel void constant_kernel( __global int *out )\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" if( tid == 0 )\n" -" {\n" -" out[ 0 ] = outVal;\n" -" out[ 1 ] = outValues[ outIndex ];\n" -" }\n" -" else\n" -" {\n" -" out[ tid + 1 ] = outValues[ tid ];\n" -" }\n" -"}\n" }; - -int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - clProgramWrapper program; - clKernelWrapper kernel; - - clMemWrapper outStream; - cl_int outValues[ 17 ]; - cl_int expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 }; - - cl_int error; - - - // Create a kernel to test with - error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" ); - test_error( error, "Unable to create testing kernel" ); - - // Create our output buffer - outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error ); - test_error( error, "Unable to create output buffer" ); - - // Set the argument - error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream ); - test_error( error, "Unable to set kernel argument" ); - - // Run test kernel - size_t threads[ 1 ] = { 16 }; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); - test_error( error, "Unable to enqueue kernel" ); - - // Read results - error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - // Verify results - for( int i = 0; i < 17; i++ ) - { - if( expectedValues[ i ] != outValues[ i ] ) - { - if( i == 0 ) - log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] ); - else if( i == 1 ) - log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] ); - else - log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] ); - return -1; - } - } - - return 0; -} - - - - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c b/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c deleted file mode 100644 index 86eaf092..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c +++ /dev/null @@ -1,121 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - -const char *sample_single_kernel = { -"__kernel void sample_test(__global float *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (int)src[tid];\n" -"\n" -"}\n"}; - -const char *sample_double_kernel = { -"__kernel void sample_test(__global float *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (int)src[tid];\n" -"\n" -"}\n" -"__kernel void sample_test2(__global float *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (int)src[tid];\n" -"\n" -"}\n"}; - - -int -test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_program program; - cl_kernel kernel[2]; - unsigned int num_kernels; - size_t lengths[2]; - int err; - - lengths[0] = strlen(sample_single_kernel); - program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL); - if (!program) - { - log_error("clCreateProgramWithSource failed\n"); - return -1; - } - - err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clBuildProgramExecutable failed\n"); - return -1; - } - - err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels); - if ( (err != CL_SUCCESS) || (num_kernels != 1) ) - { - log_error("clCreateKernelsInProgram test failed for a single kernel\n"); - return -1; - } - - clReleaseKernel(kernel[0]); - clReleaseProgram(program); - - lengths[0] = strlen(sample_double_kernel); - program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL); - if (!program) - { - log_error("clCreateProgramWithSource failed\n"); - return -1; - } - - err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clBuildProgramExecutable failed\n"); - return -1; - } - - err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels); - if ( (err != CL_SUCCESS) || (num_kernels != 2) ) - { - log_error("clCreateKernelsInProgram test failed for two kernels\n"); - return -1; - } - - log_info("clCreateKernelsInProgram test passed\n"); - - clReleaseKernel(kernel[0]); - clReleaseKernel(kernel[1]); - clReleaseProgram(program); - - - return err; -} - - - - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp deleted file mode 100644 index ab0c2eef..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp +++ /dev/null @@ -1,253 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - -const cl_mem_flags flag_set[] = { - CL_MEM_ALLOC_HOST_PTR, - CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, - CL_MEM_USE_HOST_PTR, - CL_MEM_COPY_HOST_PTR, - 0 -}; -const char* flag_set_names[] = { - "CL_MEM_ALLOC_HOST_PTR", - "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR", - "CL_MEM_USE_HOST_PTR", - "CL_MEM_COPY_HOST_PTR", - "0" -}; - -int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - const size_t bufferSize = 256*256; - int src_flag_id; - MTdata d = init_genrand( gRandomSeed ); - cl_char *initialData = (cl_char*)malloc(bufferSize); - cl_char *finalData = (cl_char*)malloc(bufferSize); - - for (src_flag_id=0; src_flag_id < 5; src_flag_id++) - { - clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); - - generate_random_data( kChar, (unsigned int)bufferSize, d, initialData ); - - if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) - memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), initialData, &error); - else - memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), NULL, &error); - test_error( error, "Unable to create testing buffer" ); - - if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) - { - error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL); - test_error( error, "clEnqueueWriteBuffer failed"); - } - - for( int i = 0; i < 128; i++ ) - { - - size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d ); - size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d ); - - cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, length, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapBuffer call failed" ); - log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length ); - free( initialData ); - free( finalData ); - free_mtdata(d); - return -1; - } - - // Write into the region - for( size_t j = 0; j < length; j++ ) - { - cl_char spin = (cl_char)genrand_int32( d ); - - // Test read AND write in one swipe - cl_char value = mappedRegion[ j ]; - value = spin - value; - mappedRegion[ j ] = value; - - // Also update the initial data array - value = initialData[ offset + j ]; - value = spin - value; - initialData[ offset + j ] = value; - } - - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); - } - - // Final validation: read actual values of buffer and compare against our reference - error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - for( size_t q = 0; q < bufferSize; q++ ) - { - if( initialData[ q ] != finalData[ q ] ) - { - log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] ); - free( initialData ); - free( finalData ); - free_mtdata(d); - return -1; - } - } - } // cl_mem flags - - free( initialData ); - free( finalData ); - free_mtdata(d); - - return 0; -} - -int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 }; - const size_t imageSize = 256; - int src_flag_id; - cl_uint *initialData; - cl_uint *finalData; - MTdata d; - - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) - - initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint)); - finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint)); - - if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) ) - { - log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" ); - free(initialData); - free(finalData); - return -1; - } - - d = init_genrand( gRandomSeed ); - for (src_flag_id=0; src_flag_id < 5; src_flag_id++) { - clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); - - generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData ); - - if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) - memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format, - imageSize, imageSize, 0, initialData, &error ); - else - memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format, - imageSize, imageSize, 0, NULL, &error ); - test_error( error, "Unable to create testing buffer" ); - - if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) { - size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1}; - error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL); - test_error( error, "Unable to write to testing buffer" ); - } - - for( int i = 0; i < 128; i++ ) - { - - size_t offset[3], region[3]; - size_t rowPitch; - - offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d ); - offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d ); - offset[ 2 ] = 0; - region[ 2 ] = 1; - cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, region, &rowPitch, NULL, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapImage call failed" ); - log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] ); - free(initialData); - free(finalData); - free_mtdata(d); - return -1; - } - - // Write into the region - cl_uint *mappedPtr = mappedRegion; - for( size_t y = 0; y < region[ 1 ]; y++ ) - { - for( size_t x = 0; x < region[ 0 ] * 4; x++ ) - { - cl_int spin = (cl_int)random_in_range( 16, 1024, d ); - - cl_int value; - // Test read AND write in one swipe - value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ]; - value = spin - value; - mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value; - - // Also update the initial data array - value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ]; - value = spin - value; - initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value; - } - } - - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); - } - - // Final validation: read actual values of buffer and compare against our reference - size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 }; - error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - for( size_t q = 0; q < imageSize * imageSize * 4; q++ ) - { - if( initialData[ q ] != finalData[ q ] ) - { - log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ), - (int)finalData[ q ], (int)initialData[ q ] ); - free(initialData); - free(finalData); - free_mtdata(d); - return -1; - } - } - } // cl_mem_flags - - free(initialData); - free(finalData); - free_mtdata(d); - return 0; -} - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp deleted file mode 100644 index 4079d714..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp +++ /dev/null @@ -1,384 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - -#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \ -"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n" \ -"{\n" \ -" int tid = get_global_id(0);\n" \ -" " srctype " src = sourceValues[tid];\n" \ -"\n" \ -" destValues[tid] = (" dsttype #size ")src;\n" \ -"\n" \ -"}\n" - -#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \ -{ \ -DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \ -DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \ -DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \ -DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \ -} - -#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL } - -/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */ - -#define DECLARE_S2V_IDENT_KERNELS_SET(srctype) \ -{ \ -DECLARE_S2V_IDENT_KERNELS(#srctype,bool), \ - DECLARE_S2V_IDENT_KERNELS(#srctype,char), \ - DECLARE_S2V_IDENT_KERNELS(#srctype,uchar), \ - DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,short), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,ushort), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,int), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,uint), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,long), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,ulong), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long), \ -DECLARE_S2V_IDENT_KERNELS(#srctype,float), \ -DECLARE_EMPTY \ -} - -#define DECLARE_EMPTY_SET \ -{ \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY, \ -DECLARE_EMPTY \ -} - - -/* The overall array */ -const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = { - DECLARE_S2V_IDENT_KERNELS_SET(bool), - DECLARE_S2V_IDENT_KERNELS_SET(char), - DECLARE_S2V_IDENT_KERNELS_SET(uchar), - DECLARE_S2V_IDENT_KERNELS_SET(unsigned char), - DECLARE_S2V_IDENT_KERNELS_SET(short), - DECLARE_S2V_IDENT_KERNELS_SET(ushort), - DECLARE_S2V_IDENT_KERNELS_SET(unsigned short), - DECLARE_S2V_IDENT_KERNELS_SET(int), - DECLARE_S2V_IDENT_KERNELS_SET(uint), - DECLARE_S2V_IDENT_KERNELS_SET(unsigned int), - DECLARE_S2V_IDENT_KERNELS_SET(long), - DECLARE_S2V_IDENT_KERNELS_SET(ulong), - DECLARE_S2V_IDENT_KERNELS_SET(unsigned long), - DECLARE_S2V_IDENT_KERNELS_SET(float), - DECLARE_EMPTY_SET -}; - -int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc, - ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData ) -{ - clProgramWrapper program; - clKernelWrapper kernel; - int error; - clMemWrapper streams[2]; - void *outData; - unsigned char convertedData[ 8 ]; /* Max type size is 8 bytes */ - size_t threadSize[3], groupSize[3]; - unsigned int i, s; - unsigned char *inPtr, *outPtr; - size_t paramSize, destTypeSize; - - const char* finalProgramSrc[2] = { - "", // optional pragma - programSrc - }; - - if (srcType == kDouble || destType == kDouble) { - finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - } - - - if( programSrc == NULL ) - return 0; - - paramSize = get_explicit_type_size( srcType ); - destTypeSize = get_explicit_type_size( destType ); - - size_t destStride = destTypeSize * vecSize; - - outData = malloc( destStride * count ); - - if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) ) - { - log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] ); - return -1; - } - - streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error); - test_error( error, "clCreateBuffer failed"); - streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), destStride * count, NULL, &error); - test_error( error, "clCreateBuffer failed"); - - /* Set the arguments */ - error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] ); - test_error( error, "Unable to set indexed kernel arguments" ); - error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] ); - test_error( error, "Unable to set indexed kernel arguments" ); - - /* Run the kernel */ - threadSize[0] = count; - - error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] ); - test_error( error, "Unable to get work group size to use" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL ); - test_error( error, "Unable to execute test kernel" ); - - /* Now verify the results. Each value should have been duplicated four times, and we should be able to just - do a memcpy instead of relying on the actual type of data */ - error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL ); - test_error( error, "Unable to read output values!" ); - - inPtr = (unsigned char *)inputData; - outPtr = (unsigned char *)outData; - - for( i = 0; i < count; i++ ) - { - /* Convert the input data element to our output data type to compare against */ - convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType ); - - /* Now compare every element of the vector */ - for( s = 0; s < vecSize; s++ ) - { - if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 ) - { - unsigned int *p = (unsigned int *)outPtr; - log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize ); - log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) ); - log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] ); - return -1; - } - } - inPtr += paramSize; - outPtr += destStride; - } - - free( outData ); - - return 0; -} - -int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType, - unsigned int count, void *inputData ) -{ - unsigned int sizes[] = { 2, 4, 8, 16, 0 }; - int i, dstType, failed = 0; - - - for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ ) - { - if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) ) - continue; - - if (( dstType == kLong || dstType == kULong ) && !gHasLong ) - continue; - - for( i = 0; sizes[i] != 0; i++ ) - { - if( dstType != srcType ) - continue; - if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL || - strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL ) - continue; - - if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ], - srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 ) - { - log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n", - get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) ); - failed = -1; - break; - } - } - } - - return failed; -} - -int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" ); - return 0; -#if 0 - bool data[128]; - - generate_random_data( kBool, 128, data ); - - return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data ); -#endif -} - -int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - char data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kChar, 128, seed, data ); - - return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data ); -} - -int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - unsigned char data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kUChar, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 ) - return -1; - if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 ) - return -1; - return 0; -} - -int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - short data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kShort, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 ) - return -1; - return 0; -} - -int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - unsigned short data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kUShort, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 ) - return -1; - if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 ) - return -1; - return 0; -} - -int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kInt, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 ) - return -1; - return 0; -} - -int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - unsigned int data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kUInt, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 ) - return -1; - if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 ) - return -1; - return 0; -} - -int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_long data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kLong, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kLong, 128, data ) != 0 ) - return -1; - return 0; -} - -int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_ulong data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kULong, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kULong, 128, data ) != 0 ) - return -1; - if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 ) - return -1; - return 0; -} - -int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - float data[128]; - RandomSeed seed(gRandomSeed); - - generate_random_data( kFloat, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 ) - return -1; - return 0; -} - - -int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - double data[128]; - RandomSeed seed(gRandomSeed); - - if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) { - log_info("Extension cl_khr_fp64 not supported. Skipping test.\n"); - return 0; - } - - generate_random_data( kDouble, 128, seed, data ); - - if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 ) - return -1; - return 0; -} - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_float2int.c b/test_conformance/compatibility/test_conformance/basic/test_float2int.c deleted file mode 100644 index 2a78f3a0..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_float2int.c +++ /dev/null @@ -1,160 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - -const char *float2int_kernel_code = -"__kernel void test_float2int(__global float *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (int)src[tid];\n" -"\n" -"}\n"; - - -int -verify_float2int(cl_float *inptr, cl_int *outptr, int n) -{ - int i; - - for (i=0; i -#include -#include -#include -#include "harness/rounding_mode.h" - -#include "procs.h" - -const char *fpadd_kernel_code = -"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *fpsub_kernel_code = -"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *fpmul_kernel_code = -"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - - -static const float MAX_ERR = 1e-5f; - -int -verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i -#include -#include -#include -#include "harness/rounding_mode.h" - - -#include "procs.h" - -const char *fpadd2_kernel_code = -"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *fpsub2_kernel_code = -"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *fpmul2_kernel_code = -"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - - -int -verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" -#include "harness/rounding_mode.h" - -const char *fpadd4_kernel_code = -"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *fpsub4_kernel_code = -"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *fpmul4_kernel_code = -"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - - -int -verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i -#include -#include -#include -#include - - -#include "procs.h" - -int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; } -int lo_offset( int index, int vectorSize) { return index; } -int even_offset( int index, int vectorSize ) { return index * 2; } -int odd_offset( int index, int vectorSize ) { return index * 2 + 1; } - -typedef int (*OffsetFunc)( int index, int vectorSize ); -static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset }; -typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName ); -static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" }; -static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" }; - -static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16}; -static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16}; -static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4}; -// if input is size vector_sizes[i], output is size -// vector_sizes[out_vector_idx[i]] -// input type name is strcat(gentype, vector_size_names[i]); -// and output type name is -// strcat(gentype, vector_size_names[out_vector_idx[i]]); -static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4, - -1,-1,-1,-1,-1,-1,-1,5}; -static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"}; - -static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 }; -static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse ); - -int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_int *input_ptr, *output_ptr, *p; - int err; - cl_uint i; - int hasDouble = is_extension_available( device, "cl_khr_fp64" ); - cl_uint vectorSize, operatorToUse; - cl_uint type; - MTdata d; - - int expressionMode; - int numExpressionModes = 2; - - size_t length = sizeof(cl_int) * 4 * n_elems; - - input_ptr = (cl_int*)malloc(length); - output_ptr = (cl_int*)malloc(length); - - p = input_ptr; - d = init_genrand( gRandomSeed ); - for (i=0; i<4 * (cl_uint) n_elems; i++) - p[i] = genrand_int32(d); - free_mtdata(d); d = NULL; - - for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ ) - { - // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes - size_t elementCount = length / kSizes[type]; - cl_mem streams[2]; - - // skip double if unavailable - if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" ))) - continue; - - if( !gHasLong && - ( 0 == strcmp( test_str_names[type], "long" )) && - ( 0 == strcmp( test_str_names[type], "ulong" ))) - continue; - - log_info( "%s", test_str_names[type] ); - fflush( stdout ); - - // Set up data streams for the type - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } - - for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ ) - { - log_info( " %s", operatorToUse_names[ operatorToUse ] ); - fflush( stdout ); - for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) { - for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) { - - cl_program program = NULL; - cl_kernel kernel = NULL; - cl_uint outVectorSize = out_vector_idx[vectorSize]; - char expression[1024]; - - const char *source[] = { - "", // optional pragma string - "__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize], - "(__global ", test_str_names[type], vector_size_names[vectorSize], - " *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize], - " *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " ", test_str_names[type], - vector_size_names[out_vector_idx[vectorSize]], - " tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n" - " dst[tid] = tmp;\n" - "}\n" - }; - - if(expressionMode == 0) { - sprintf(expression, "srcA[tid]"); - } else if(expressionMode == 1) { - switch(vector_sizes[vectorSize]) { - case 16: - sprintf(expression, - "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))", - test_str_names[type] - ); - break; - case 8: - sprintf(expression, - "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))", - test_str_names[type] - ); - break; - case 4: - sprintf(expression, - "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))", - test_str_names[type] - ); - break; - case 3: - sprintf(expression, - "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))", - test_str_names[type] - ); - break; - case 2: - sprintf(expression, - "((%s2)(srcA[tid].s0, srcA[tid].s1))", - test_str_names[type] - ); - break; - default : - sprintf(expression, "srcA[tid]"); - log_info("Default\n"); - } - } else { - sprintf(expression, "srcA[tid]"); - } - - if (0 == strcmp( test_str_names[type], "double" )) - source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - - char kernelName[128]; - snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] ); - err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName ); - if (err) - return -1; - - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } - - //Wipe the output buffer clean - uint32_t pattern = 0xdeadbeef; - memset_pattern4( output_ptr, &pattern, length ); - err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } - - size_t size = elementCount / (vector_aligns[vectorSize]); - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } - - err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } - - char *inP = (char *)input_ptr; - char *outP = (char *)output_ptr; - outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) - - ( vector_sizes[ out_vector_idx[vectorSize] ] ) ); - // was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) ); - for( size_t e = 0; e < size; e++ ) - { - if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) { - - log_info("e is %d\n", (int)e); - fflush(stdout); - // break; - return -1; - } - inP += kSizes[type] * ( vector_aligns[vectorSize] ); - outP += kSizes[type] * ( vector_aligns[outVectorSize] ); - } - - clReleaseKernel( kernel ); - clReleaseProgram( program ); - log_info( "." ); - fflush( stdout ); - } - } - } - - clReleaseMemObject( streams[0] ); - clReleaseMemObject( streams[1] ); - log_info( "done\n" ); - } - - log_info("HiLoEO test passed\n"); - - free(input_ptr); - free(output_ptr); - - return err; -} - -static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse ) -{ - cl_ulong array[8]; - void *p = array; - size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]]; - size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]]; - // was 1 << (vectorSize-1); - OffsetFunc f = offsetFuncs[ operatorToUse ]; - size_t elementSize = kSizes[type]; - - if(vector_size_names[vectorSize][0] == '3') { - if(operatorToUse_names[operatorToUse][0] == 'h' || - operatorToUse_names[operatorToUse][0] == 'o') // hi or odd - { - cmpVectorSize = 1; // special case for vec3 ignored values - } - } - - switch( elementSize ) - { - case 1: - { - char *i = (char*)in; - char *o = (char*)out; - size_t j; - cl_uint k; - OffsetFunc f = offsetFuncs[ operatorToUse ]; - - for( k = 0; k < elementCount; k++ ) - { - char *o2 = (char*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", %d", i[j] ); - log_info( " } --> { %d", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", %d", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; - - case 2: - { - short *i = (short*)in; - short *o = (short*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - short *o2 = (short*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", %d", i[j] ); - log_info( " } --> { %d", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", %d", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; - - case 4: - { - int *i = (int*)in; - int *o = (int*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - int *o2 = (int *)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - for( j = 0; j < cmpVectorSize; j++ ) - { - /* Allow float nans to be binary different */ - if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j]))) - { - log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", 0x%8.8x", i[j] ); - log_info( " } --> { 0x%8.8x", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", 0x%8.8x", o[j] ); - log_info( " }\n" ); - return -1; - } - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; - - case 8: - { - cl_ulong *i = (cl_ulong*)in; - cl_ulong *o = (cl_ulong*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - cl_ulong *o2 = (cl_ulong*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", 0x%16.16llx", i[j] ); - log_info( " } --> { 0x%16.16llx", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", 0x%16.16llx", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; - - default: - log_info( "Internal error. Unknown data type\n" ); - return -2; - } - - return 0; -} - - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_hostptr.c b/test_conformance/compatibility/test_conformance/basic/test_hostptr.c deleted file mode 100644 index 92c2318e..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_hostptr.c +++ /dev/null @@ -1,276 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - -const char *hostptr_kernel_code = -"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -static const float MAX_ERR = 1e-5f; - -static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n) -{ - cl_float r; - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *conditional_kernel_code = -"__kernel void test_if(__global int *src, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" if (src[tid] == 0)\n" -" dst[tid] = 0x12345678;\n" -" else if (src[tid] == 1)\n" -" dst[tid] = 0x23456781;\n" -" else if (src[tid] == 2)\n" -" dst[tid] = 0x34567812;\n" -" else if (src[tid] == 3)\n" -" dst[tid] = 0x45678123;\n" -" else if (src[tid] == 4)\n" -" dst[tid] = 0x56781234;\n" -" else if (src[tid] == 5)\n" -" dst[tid] = 0x67812345;\n" -" else if (src[tid] == 6)\n" -" dst[tid] = 0x78123456;\n" -" else if (src[tid] == 7)\n" -" dst[tid] = 0x81234567;\n" -" else\n" -" dst[tid] = 0x7FFFFFFF;\n" -"\n" -"}\n"; - -const int results[] = { - 0x12345678, - 0x23456781, - 0x34567812, - 0x45678123, - 0x56781234, - 0x67812345, - 0x78123456, - 0x81234567, -}; - -int -verify_if(int *inptr, int *outptr, int n) -{ - int r, i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -static unsigned char * -generate_rgba8_image(int w, int h, MTdata d) -{ - unsigned char *ptr = (unsigned char*)malloc(w * h * 4); - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -static unsigned char * -generate_uint8_image(unsigned num_elements, MTdata d) -{ - unsigned char *ptr = (unsigned char*)malloc(num_elements); - unsigned i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -static unsigned char * -generate_rgba8_image(int w, int h, MTdata d) -{ - unsigned char *ptr = (unsigned char*)malloc(w * h * 4); - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -static unsigned char * -generate_rgba8_image(int w, int h, MTdata d) -{ - unsigned char *ptr = (unsigned char*)malloc(w * h * 4); - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -static unsigned char * -generate_rgba8_image(int w, int h, int d, MTdata mtData) -{ - unsigned char *ptr = (unsigned char*)malloc(w * h * d *4); - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *int2float_kernel_code = -"__kernel void test_int2float(__global int *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (float)src[tid];\n" -"\n" -"}\n"; - - -int -verify_int2float(cl_int *inptr, cl_float *outptr, int n) -{ - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *int_add_kernel_code = -"__kernel void test_int_add(__global int *srcA, __global int *srcB, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *int_sub_kernel_code = -"__kernel void test_int_sub(__global int *srcA, __global int *srcB, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *int_mul_kernel_code = -"__kernel void test_int_mul(__global int *srcA, __global int *srcB, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - -const char *int_mad_kernel_code = -"__kernel void test_int_mad(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n" -"}\n"; - -static const float MAX_ERR = 1e-5f; - -int -verify_int_add(int *inptrA, int *inptrB, int *outptr, int n) -{ - int r; - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *int_add2_kernel_code = -"__kernel void test_int_add2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *int_sub2_kernel_code = -"__kernel void test_int_sub2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *int_mul2_kernel_code = -"__kernel void test_int_mul2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - -const char *int_mad2_kernel_code = -"__kernel void test_int_mad2(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n" -"}\n"; - -int -verify_int_add2(int *inptrA, int *inptrB, int *outptr, int n) -{ - int r; - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *int_add4_kernel_code = -"__kernel void test_int_add4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *int_sub4_kernel_code = -"__kernel void test_int_sub4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *int_mul4_kernel_code = -"__kernel void test_int_mul4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - -const char *int_mad4_kernel_code = -"__kernel void test_int_mad4(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n" -"}\n"; - -int -verify_int_add4(int *inptrA, int *inptrB, int *outptr, int n) -{ - int r; - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *long_add_kernel_code = -"__kernel void test_long_add(__global long *srcA, __global long *srcB, __global long *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *long_sub_kernel_code = -"__kernel void test_long_sub(__global long *srcA, __global long *srcB, __global long *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *long_mul_kernel_code = -"__kernel void test_long_mul(__global long *srcA, __global long *srcB, __global long *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - -const char *long_mad_kernel_code = -"__kernel void test_long_mad(__global long *srcA, __global long *srcB, __global long *srcC, __global long *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n" -"}\n"; - -static const float MAX_ERR = 1e-5f; - -int -verify_long_add(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n) -{ - cl_long r; - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *long_add2_kernel_code = -"__kernel void test_long_add2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *long_sub2_kernel_code = -"__kernel void test_long_sub2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *long_mul2_kernel_code = -"__kernel void test_long_mul2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - -const char *long_mad2_kernel_code = -"__kernel void test_long_mad2(__global long2 *srcA, __global long2 *srcB, __global long2 *srcC, __global long2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n" -"}\n"; - -int -verify_long_add2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n) -{ - cl_long r; - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -const char *long_add4_kernel_code = -"__kernel void test_long_add4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] + srcB[tid];\n" -"}\n"; - -const char *long_sub4_kernel_code = -"__kernel void test_long_sub4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] - srcB[tid];\n" -"}\n"; - -const char *long_mul4_kernel_code = -"__kernel void test_long_mul4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid];\n" -"}\n"; - -const char *long_mad4_kernel_code = -"__kernel void test_long_mad4(__global long4 *srcA, __global long4 *srcB, __global long4 *srcC, __global long4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n" -"}\n"; - -int -verify_long_add4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n) -{ - cl_long r; - int i; - - for (i=0; i -#endif - -#include "procs.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" -#include "harness/errorHelpers.h" - -// For global, local, and constant -const char *parameter_kernel_long = -"%s\n" // optional pragma -"kernel void test(global ulong *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n" -"{\n" -" results[0] = (ulong)&mem0[0];\n" -" results[1] = (ulong)&mem2[0];\n" -" results[2] = (ulong)&mem3[0];\n" -" results[3] = (ulong)&mem4[0];\n" -" results[4] = (ulong)&mem8[0];\n" -" results[5] = (ulong)&mem16[0];\n" -"}\n"; - -// For private and local -const char *local_kernel_long = -"%s\n" // optional pragma -"kernel void test(global ulong *results)\n" -"{\n" -" %s %s mem0[3];\n" -" %s %s2 mem2[3];\n" -" %s %s3 mem3[3];\n" -" %s %s4 mem4[3];\n" -" %s %s8 mem8[3];\n" -" %s %s16 mem16[3];\n" -" results[0] = (ulong)&mem0[0];\n" -" results[1] = (ulong)&mem2[0];\n" -" results[2] = (ulong)&mem3[0];\n" -" results[3] = (ulong)&mem4[0];\n" -" results[4] = (ulong)&mem8[0];\n" -" results[5] = (ulong)&mem16[0];\n" -"}\n"; - -// For constant -const char *constant_kernel_long = -"%s\n" // optional pragma -" constant %s mem0[3] = {0};\n" -" constant %s2 mem2[3] = {(%s2)(0)};\n" -" constant %s3 mem3[3] = {(%s3)(0)};\n" -" constant %s4 mem4[3] = {(%s4)(0)};\n" -" constant %s8 mem8[3] = {(%s8)(0)};\n" -" constant %s16 mem16[3] = {(%s16)(0)};\n" -"\n" -"kernel void test(global ulong *results)\n" -"{\n" -" results[0] = (ulong)&mem0;\n" -" results[1] = (ulong)&mem2;\n" -" results[2] = (ulong)&mem3;\n" -" results[3] = (ulong)&mem4;\n" -" results[4] = (ulong)&mem8;\n" -" results[5] = (ulong)&mem16;\n" -"}\n"; - - -// For global, local, and constant -const char *parameter_kernel_no_long = -"%s\n" // optional pragma -"kernel void test(global uint *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n" -"{\n" -" results[0] = (uint)&mem0[0];\n" -" results[1] = (uint)&mem2[0];\n" -" results[2] = (uint)&mem3[0];\n" -" results[3] = (uint)&mem4[0];\n" -" results[4] = (uint)&mem8[0];\n" -" results[5] = (uint)&mem16[0];\n" -"}\n"; - -// For private and local -const char *local_kernel_no_long = -"%s\n" // optional pragma -"kernel void test(global uint *results)\n" -"{\n" -" %s %s mem0[3];\n" -" %s %s2 mem2[3];\n" -" %s %s3 mem3[3];\n" -" %s %s4 mem4[3];\n" -" %s %s8 mem8[3];\n" -" %s %s16 mem16[3];\n" -" results[0] = (uint)&mem0[0];\n" -" results[1] = (uint)&mem2[0];\n" -" results[2] = (uint)&mem3[0];\n" -" results[3] = (uint)&mem4[0];\n" -" results[4] = (uint)&mem8[0];\n" -" results[5] = (uint)&mem16[0];\n" -"}\n"; - -// For constant -const char *constant_kernel_no_long = -"%s\n" // optional pragma -" constant %s mem0[3] = {0};\n" -" constant %s2 mem2[3] = {(%s2)(0)};\n" -" constant %s3 mem3[3] = {(%s3)(0)};\n" -" constant %s4 mem4[3] = {(%s4)(0)};\n" -" constant %s8 mem8[3] = {(%s8)(0)};\n" -" constant %s16 mem16[3] = {(%s16)(0)};\n" -"\n" -"kernel void test(global uint *results)\n" -"{\n" -" results[0] = (uint)&mem0;\n" -" results[1] = (uint)&mem2;\n" -" results[2] = (uint)&mem3;\n" -" results[3] = (uint)&mem4;\n" -" results[4] = (uint)&mem8;\n" -" results[5] = (uint)&mem16;\n" -"}\n"; - -enum AddressSpaces -{ - kGlobal = 0, - kLocal, - kConstant, - kPrivate -}; - -typedef enum AddressSpaces AddressSpaces; - -#define DEBUG 0 - -const char * get_explicit_address_name( AddressSpaces address ) -{ - /* Quick method to avoid branching: make sure the following array matches the Enum order */ - static const char *sExplicitAddressNames[] = { "global", "local", "constant", "private"}; - - return sExplicitAddressNames[ address ]; -} - - -int test_kernel_memory_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, AddressSpaces address ) -{ - const char *constant_kernel; - const char *parameter_kernel; - const char *local_kernel; - - if ( gHasLong ) - { - constant_kernel = constant_kernel_long; - parameter_kernel = parameter_kernel_long; - local_kernel = local_kernel_long; - } - else - { - constant_kernel = constant_kernel_no_long; - parameter_kernel = parameter_kernel_no_long; - local_kernel = local_kernel_no_long; - } - - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - char *kernel_code = (char*)malloc(4096); - cl_kernel kernel; - cl_program program; - int error; - int total_errors = 0; - cl_mem results; - cl_ulong *results_data; - cl_mem mem0, mem2, mem3, mem4, mem8, mem16; - - results_data = (cl_ulong*)malloc(sizeof(cl_ulong)*6); - results = clCreateBuffer(context, 0, sizeof(cl_ulong)*6, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - mem0 = clCreateBuffer(context, 0, sizeof(cl_long), NULL, &error); - test_error(error, "clCreateBuffer failed"); - mem2 = clCreateBuffer(context, 0, sizeof(cl_long)*2, NULL, &error); - test_error(error, "clCreateBuffer failed"); - mem3 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error); - test_error(error, "clCreateBuffer failed"); - mem4 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error); - test_error(error, "clCreateBuffer failed"); - mem8 = clCreateBuffer(context, 0, sizeof(cl_long)*8, NULL, &error); - test_error(error, "clCreateBuffer failed"); - mem16 = clCreateBuffer(context, 0, sizeof(cl_long)*16, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - - // For each type - - // Calculate alignment mask for each size - - // For global, local, constant, private - - // If global, local or constant -- do parameter_kernel - // If private or local -- do local_kernel - // If constant -- do constant kernel - - int numConstantArgs; - clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(numConstantArgs), &numConstantArgs, NULL); - - int typeIndex; - for (typeIndex = 0; typeIndex < 10; typeIndex++) { - // Skip double tests if we don't support doubles - if (vecType[typeIndex] == kDouble && !is_extension_available(device, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); - continue; - } - - if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong ) - continue; - - log_info("Testing %s...\n", get_explicit_type_name(vecType[typeIndex])); - - // Determine the expected alignment masks. - // E.g., if it is supposed to be 4 byte aligned, we should get 4-1=3 = ... 000011 - // We can then and the returned address with that and we should have 0. - cl_ulong alignments[6]; - alignments[0] = get_explicit_type_size(vecType[typeIndex])-1; - alignments[1] = (get_explicit_type_size(vecType[typeIndex])<<1)-1; - alignments[2] = (get_explicit_type_size(vecType[typeIndex])<<2)-1; - alignments[3] = (get_explicit_type_size(vecType[typeIndex])<<2)-1; - alignments[4] = (get_explicit_type_size(vecType[typeIndex])<<3)-1; - alignments[5] = (get_explicit_type_size(vecType[typeIndex])<<4)-1; - - // Parameter kernel - if (address == kGlobal || address == kLocal || address == kConstant) { - log_info("\tTesting parameter kernel...\n"); - - if ( (gIsEmbedded) && (address == kConstant) && (numConstantArgs < 6)) { - sprintf(kernel_code, parameter_kernel, - vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]) - ); - } - else { - sprintf(kernel_code, parameter_kernel, - vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]) - ); - } - //printf("Kernel is: \n%s\n", kernel_code); - - // Create the kernel - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test"); - test_error(error, "create_single_kernel_helper failed"); - - // Initialize the results - memset(results_data, 0, sizeof(cl_long)*5); - error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*6, results_data, 0, NULL, NULL); - test_error(error, "clEnqueueWriteBuffer failed"); - - // Set the arguments - error = clSetKernelArg(kernel, 0, sizeof(results), &results); - test_error(error, "clSetKernelArg failed"); - if (address != kLocal) { - error = clSetKernelArg(kernel, 1, sizeof(mem0), &mem0); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 2, sizeof(mem2), &mem2); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 3, sizeof(mem3), &mem3); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 4, sizeof(mem4), &mem4); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 5, sizeof(mem8), &mem8); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 6, sizeof(mem16), &mem16); - test_error(error, "clSetKernelArg failed"); - } else { - error = clSetKernelArg(kernel, 1, get_explicit_type_size(vecType[typeIndex]), NULL); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 2, get_explicit_type_size(vecType[typeIndex])*2, NULL); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 3, get_explicit_type_size(vecType[typeIndex])*4, NULL); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 4, get_explicit_type_size(vecType[typeIndex])*4, NULL); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 5, get_explicit_type_size(vecType[typeIndex])*8, NULL); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 6, get_explicit_type_size(vecType[typeIndex])*16, NULL); - test_error(error, "clSetKernelArg failed"); - } - - // Enqueue the kernel - size_t global_size = 1; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - // Read back the results - error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*6, results_data, 0, NULL, NULL); - test_error(error, "clEnqueueReadBuffer failed"); - - // Verify the results - if (gHasLong) { - for (int i = 0; i < 6; i++) { - if ((results_data[i] & alignments[i]) != 0) { - total_errors++; - log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]); - } else { - if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]); - } - } - } - // Verify the results on devices that do not support longs - else { - cl_uint *results_data_no_long = (cl_uint *)results_data; - - for (int i = 0; i < 6; i++) { - if ((results_data_no_long[i] & alignments[i]) != 0) { - total_errors++; - log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]); - } else { - if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]); - } - } - } - - clReleaseKernel(kernel); - clReleaseProgram(program); - } - - - - - // Local kernel - if (address == kLocal || address == kPrivate) { - log_info("\tTesting local kernel...\n"); - sprintf(kernel_code, local_kernel, - vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]), - get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]) - ); - //printf("Kernel is: \n%s\n", kernel_code); - - // Create the kernel - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test"); - test_error(error, "create_single_kernel_helper failed"); - - // Initialize the results - memset(results_data, 0, sizeof(cl_long)*5); - error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL); - test_error(error, "clEnqueueWriteBuffer failed"); - - // Set the arguments - error = clSetKernelArg(kernel, 0, sizeof(results), &results); - test_error(error, "clSetKernelArg failed"); - - // Enqueue the kernel - size_t global_size = 1; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - // Read back the results - error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL); - test_error(error, "clEnqueueReadBuffer failed"); - - // Verify the results - if (gHasLong) { - for (int i = 0; i < 5; i++) { - if ((results_data[i] & alignments[i]) != 0) { - total_errors++; - log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]); - } else { - if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]); - } - } - } - // Verify the results on devices that do not support longs - else { - cl_uint *results_data_no_long = (cl_uint *)results_data; - - for (int i = 0; i < 5; i++) { - if ((results_data_no_long[i] & alignments[i]) != 0) { - total_errors++; - log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]); - } else { - if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]); - } - } - } - clReleaseKernel(kernel); - clReleaseProgram(program); - } - - - - // Constant kernel - if (address == kConstant) { - log_info("\tTesting constant kernel...\n"); - sprintf(kernel_code, constant_kernel, - vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]), - get_explicit_type_name(vecType[typeIndex]) - ); - //printf("Kernel is: \n%s\n", kernel_code); - - // Create the kernel - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test"); - test_error(error, "create_single_kernel_helper failed"); - - // Initialize the results - memset(results_data, 0, sizeof(cl_long)*5); - error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL); - test_error(error, "clEnqueueWriteBuffer failed"); - - // Set the arguments - error = clSetKernelArg(kernel, 0, sizeof(results), &results); - test_error(error, "clSetKernelArg failed"); - - // Enqueue the kernel - size_t global_size = 1; - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - // Read back the results - error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL); - test_error(error, "clEnqueueReadBuffer failed"); - - // Verify the results - if (gHasLong) { - for (int i = 0; i < 5; i++) { - if ((results_data[i] & alignments[i]) != 0) { - total_errors++; - log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]); - } else { - if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]); - } - } - } - // Verify the results on devices that do not support longs - else { - cl_uint *results_data_no_long = (cl_uint *)results_data; - - for (int i = 0; i < 5; i++) { - if ((results_data_no_long[i] & alignments[i]) != 0) { - total_errors++; - log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]); - } else { - if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]); - } - } - } - clReleaseKernel(kernel); - clReleaseProgram(program); - } - - - } - - clReleaseMemObject(results); - clReleaseMemObject(mem0); - clReleaseMemObject(mem2); - clReleaseMemObject(mem3); - clReleaseMemObject(mem4); - clReleaseMemObject(mem8); - clReleaseMemObject(mem16); - free( kernel_code ); - free( results_data ); - - if (total_errors != 0) - return -1; - return 0; - -} - - -int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - return test_kernel_memory_alignment( device, context, queue, n_elems, kLocal ); -} - -int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - return test_kernel_memory_alignment( device, context, queue, n_elems, kGlobal ); -} - -int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - // There is a class of approved OpenCL 1.0 conformant devices out there that in some circumstances - // are unable to meaningfully take (or more precisely use) the address of constant data by virtue - // of limitations in their ISA design. This feature was not tested in 1.0, so they were declared - // conformant by Khronos. The failure is however caught here. - // - // Unfortunately, determining whether or not these devices are 1.0 conformant is not the jurisdiction - // of the 1.1 tests -- We can't fail them from 1.1 conformance here because they are not 1.1 - // devices. They are merely 1.0 conformant devices that interop with 1.1 devices in a 1.1 platform. - // To add new binding tests now to conformant 1.0 devices would violate the workingroup requirement - // of no new tests for 1.0 devices. So certain allowances have to be made in intractable cases - // such as this one. - // - // There is some precedent. Similar allowances are made for other 1.0 hardware features such as - // local memory size. The minimum required local memory size grew from 16 kB to 32 kB in OpenCL 1.1. - - // Detect 1.0 devices - // Get CL_DEVICE_VERSION size - size_t string_size = 0; - int err; - if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, 0, NULL, &string_size ) ) ) - { - log_error( "FAILURE: Unable to get size of CL_DEVICE_VERSION string!" ); - return -1; - } - - //Allocate storage to hold the version string - char *version_string = (char*) malloc(string_size); - if( NULL == version_string ) - { - log_error( "FAILURE: Unable to allocate memory to hold CL_DEVICE_VERSION string!" ); - return -1; - } - - // Get CL_DEVICE_VERSION string - if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, string_size, version_string, NULL ) ) ) - { - log_error( "FAILURE: Unable to read CL_DEVICE_VERSION string!" ); - return -1; - } - - // easy out for 1.0 devices - const char *string_1_0 = "OpenCL 1.0 "; - if( 0 == strncmp( version_string, string_1_0, strlen(string_1_0)) ) - { - log_info( "WARNING: Allowing device to escape testing of difficult constant memory alignment case.\n\tDevice is not a OpenCL 1.1 device. CL_DEVICE_VERSION: \"%s\"\n", version_string ); - free(version_string); - return 0; - } - log_info( "Device version string: \"%s\"\n", version_string ); - free(version_string); - - // Everyone else is to be ground mercilessly under the wheels of progress - return test_kernel_memory_alignment( device, context, queue, n_elems, kConstant ); -} - -int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - return test_kernel_memory_alignment( device, context, queue, n_elems, kPrivate ); -} - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_local.c b/test_conformance/compatibility/test_conformance/basic/test_local.c deleted file mode 100644 index 21184c6f..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_local.c +++ /dev/null @@ -1,372 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - -const char *barrier_with_localmem_kernel_code[] = { -"__kernel void compute_sum_with_localmem(__global int *a, int n, __local int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i max_local_workgroup_size[0]) - kwgsize = max_local_workgroup_size[0]; - - // err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes); - err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements); - err |= clSetKernelArg(kernel, 2, wgsize * sizeof(cl_int), NULL); - err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } - - global_threads[0] = wgsize; - local_threads[0] = wgsize; - - // Adjust the local thread size to fit and be a nice multiple. - if (kwgsize < wgsize) { - log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize); - local_threads[0] = kwgsize; - } - while (global_threads[0] % local_threads[0] != 0) - local_threads[0]--; - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } - - err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } - - err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements); - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(tmp_ptr); - free(output_ptr); - - return err; -} - -int test_local_kernel_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem streams[2]; - cl_program program; - cl_kernel kernel; - - cl_int *input_ptr, *output_ptr, *tmp_ptr; - size_t global_threads[1], local_threads[1]; - size_t wgsize, kwgsize; - int err, i; - char *program_source = (char*)malloc(sizeof(char)*2048); - MTdata d = init_genrand( gRandomSeed ); - size_t max_local_workgroup_size[3]; - memset(program_source, 0, 2048); - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL); - if (err) { - log_error("clGetDeviceInfo failed, %d\n\n", err); - return -1; - } - wgsize/=2; - if (wgsize < 1) - wgsize = 1; - - size_t in_length = sizeof(cl_int) * num_elements; - size_t out_length = sizeof(cl_int) * wgsize; - - input_ptr = (cl_int *)malloc(in_length); - output_ptr = (cl_int *)malloc(out_length); - tmp_ptr = (cl_int *)malloc(out_length); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - for (i=0; i (localMemSize / (sizeof(cl_int)*sizeof(cl_int))) ) - { - wgsize = localMemSize / (sizeof(cl_int)*sizeof(cl_int)); - } - - sprintf(program_source, barrier_with_localmem_kernel_code[1], (int)(wgsize * sizeof(cl_int))); - - err = create_single_kernel_helper(context, &program, &kernel, 1, (const char**)&program_source, "compute_sum_with_localmem" ); - free(program_source); - if (err) - return -1; - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL); - test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE"); - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL); - test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); - - // Pick the minimum of the device and the kernel - if (kwgsize > max_local_workgroup_size[0]) - kwgsize = max_local_workgroup_size[0]; - - // err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes); - err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements); - err |= clSetKernelArg(kernel, 2, sizeof streams[1], &streams[1]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } - - global_threads[0] = wgsize; - local_threads[0] = wgsize; - - // Adjust the local thread size to fit and be a nice multiple. - if (kwgsize < wgsize) { - log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize); - local_threads[0] = kwgsize; - } - while (global_threads[0] % local_threads[0] != 0) - local_threads[0]--; - - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } - - err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } - - err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements); - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(tmp_ptr); - free(output_ptr); - - return err; -} - - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_local_kernel_scope.cpp b/test_conformance/compatibility/test_conformance/basic/test_local_kernel_scope.cpp deleted file mode 100644 index 51c5835f..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_local_kernel_scope.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - -#define MAX_LOCAL_STORAGE_SIZE 256 -#define MAX_LOCAL_STORAGE_SIZE_STRING "256" - -const char *kernelSource[] = { - "__kernel void test( __global unsigned int * input, __global unsigned int *outMaxes )\n" - "{\n" - " __local unsigned int localStorage[ " MAX_LOCAL_STORAGE_SIZE_STRING " ];\n" - " unsigned int theValue = input[ get_global_id( 0 ) ];\n" - "\n" - " // If we just write linearly, there's no verification that the items in a group share local data\n" - " // So we write reverse-linearly, which requires items to read the local data written by at least one\n" - " // different item\n" - " localStorage[ get_local_size( 0 ) - get_local_id( 0 ) - 1 ] = theValue;\n" - "\n" - " // The barrier ensures that all local items have written to the local storage\n" - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " // Now we loop back through the local storage and look for the max value. We only do this if\n" - " // we're the first item in a group\n" - " unsigned int max = 0;\n" - " if( get_local_id( 0 ) == 0 )\n" - " {\n" - " for( size_t i = 0; i < get_local_size( 0 ); i++ )\n" - " {\n" - " if( localStorage[ i ] > max )\n" - " max = localStorage[ i ];\n" - " }\n" - " outMaxes[ get_group_id( 0 ) ] = max;\n" - " }\n" - "}\n" -}; - -int test_local_kernel_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[ 2 ]; - MTdata randSeed = init_genrand( gRandomSeed ); - - // Create a test kernel - error = create_single_kernel_helper( context, &program, &kernel, 1, kernelSource, "test" ); - test_error( error, "Unable to create test kernel" ); - - - // Determine an appropriate test size - size_t workGroupSize; - error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workGroupSize ), &workGroupSize, NULL ); - test_error( error, "Unable to obtain kernel work group size" ); - - // Make sure the work group size doesn't overrun our local storage size in the kernel - while( workGroupSize > MAX_LOCAL_STORAGE_SIZE ) - workGroupSize >>= 1; - - size_t testSize = workGroupSize; - while( testSize < 1024 ) - testSize += workGroupSize; - size_t numGroups = testSize / workGroupSize; - log_info( "\tTesting with %ld groups, %ld elements per group...\n", numGroups, workGroupSize ); - - // Create two buffers for operation - cl_uint *inputData = (cl_uint*)malloc( testSize * sizeof(cl_uint) ); - generate_random_data( kUInt, testSize, randSeed, inputData ); - free_mtdata( randSeed ); - streams[ 0 ] = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, testSize * sizeof(cl_uint), inputData, &error ); - test_error( error, "Unable to create input buffer" ); - - cl_uint *outputData = (cl_uint*)malloc( numGroups *sizeof(cl_uint) ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_WRITE_ONLY, numGroups * sizeof(cl_uint), NULL, &error ); - test_error( error, "Unable to create output buffer" ); - - - // Set up the kernel args and run - error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); - test_error( error, "Unable to set kernel arg" ); - error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] ); - test_error( error, "Unable to set kernel arg" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &testSize, &workGroupSize, 0, NULL, NULL ); - test_error( error, "Unable to enqueue kernel" ); - - - // Read results and verify - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, numGroups * sizeof(cl_uint), outputData, 0, NULL, NULL ); - test_error( error, "Unable to read output data" ); - - // MingW compiler seems to have a bug that otimizes the code below incorrectly. - // adding the volatile keyword to size_t decleration to avoid aggressive optimization by the compiler. - for( volatile size_t i = 0; i < numGroups; i++ ) - { - // Determine the max in our case - cl_uint localMax = 0; - for( volatile size_t j = 0; j < workGroupSize; j++ ) - { - if( inputData[ i * workGroupSize + j ] > localMax ) - localMax = inputData[ i * workGroupSize + j ]; - } - - if( outputData[ i ] != localMax ) - { - log_error( "ERROR: Local max validation failed! (expected %u, got %u for i=%lu)\n", localMax, outputData[ i ] , i ); - free(inputData); - free(outputData); - return -1; - } - } - - free(inputData); - free(outputData); - return 0; -} - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_loop.c b/test_conformance/compatibility/test_conformance/basic/test_loop.c deleted file mode 100644 index b20aa38c..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_loop.c +++ /dev/null @@ -1,184 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - -const char *loop_kernel_code = -"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -" int n = get_global_size(0);\n" -" int i, j;\n" -"\n" -" dst[tid] = 0;\n" -" for (i=0,j=loopindx[tid]; i= n)\n" -" j = 0;\n" -" dst[tid] += src[j];\n" -" }\n" -"\n" -"}\n"; - - -int -verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n) -{ - int r, i, j, k; - - for (i=0; i= n) - k = 0; - r += inptr[k]; - } - - if (r != outptr[i]) - { - log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r); - return -1; - } - } - - log_info("LOOP test passed\n"); - return 0; -} - -int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem streams[4]; - cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[1]; - int err, i; - - size_t length = sizeof(cl_int) * num_elements; - input_ptr = (cl_int*)malloc(length); - loop_indx = (cl_int*)malloc(length); - loop_cnt = (cl_int*)malloc(length); - output_ptr = (cl_int*)malloc(length); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - MTdata d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -static const char *pointer_cast_kernel_code = -"__kernel void test_pointer_cast(__global unsigned char *src, __global unsigned int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -" __global unsigned int *p = (__global unsigned int *)src;\n" -"\n" -" dst[tid] = p[tid];\n" -"\n" -"}\n"; - - -int -verify_pointer_cast(unsigned char *inptr, unsigned int *outptr, int n) -{ - unsigned int *p = (unsigned int *)inptr; - int i; - cl_uint r; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - - -static const char *sample_kernel = { - "%s\n" // optional pragma string - "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = src[tid];\n" - "\n" - "}\n" -}; - -int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - int vec_type_index, vec_size_index; - - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - const char *size_names[] = {"", "2", "4", "8", "16"}; - char *program_source; - - program_source = (char*)malloc(sizeof(char)*4096); - - for (vec_type_index=0; vec_type_index<10; vec_type_index++) { - if (vecType[vec_type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); - continue; - } - log_info("Testing doubles.\n"); - } - - for (vec_size_index=0; vec_size_index<5; vec_size_index++) { - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper in, out; - size_t global[] = {1,1,1}; - - log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - program_source[0] = '\0'; - sprintf(program_source, sample_kernel, - (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" ); - if( error != 0 ) - return error; - - in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - error = clSetKernelArg(kernel, 0, sizeof(in), &in); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 1, sizeof(out), &out); - test_error(error, "clSetKernelArg failed"); - - error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - error = clFinish(queue); - test_error(error, "clFinish failed"); - } - } - - free(program_source); - - return 0; -} diff --git a/test_conformance/compatibility/test_conformance/basic/test_vloadstore.c b/test_conformance/compatibility/test_conformance/basic/test_vloadstore.c deleted file mode 100644 index 4c9c0196..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_vloadstore.c +++ /dev/null @@ -1,985 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include - - -#include "procs.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" -#include "harness/errorHelpers.h" - -// Outputs debug information for stores -#define DEBUG 0 -// Forces stores/loads to be done with offsets = tid -#define LINEAR_OFFSETS 0 -#define NUM_LOADS 512 - -static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - -#pragma mark -------------------- vload harness -------------------------- - -typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ); - -int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize, - create_vload_program_fn createFn, size_t bufferSize, MTdata d ) -{ - int error; - - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[ 4 ]; - const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS; - - if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128; - - size_t threads[ 1 ], localThreads[ 1 ]; - clProtectedArray inBuffer( bufferSize ); - char programSrc[ 10240 ]; - cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ]; - size_t numElements, typeSize, i; - unsigned int outVectorSize; - - - typeSize = get_explicit_type_size( type ); - numElements = bufferSize / ( typeSize * vecSize ); - bufferSize = numElements * typeSize * vecSize; // To account for rounding - - if (DEBUG) log_info("Testing: numLoads: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numLoads, (int)typeSize, vecSize, (int)numElements, (int)bufferSize); - - // Create some random input data and random offsets to load from - generate_random_data( type, numElements * vecSize, d, (void *)inBuffer ); - for( i = 0; i < numLoads; i++ ) - { - offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 1, d ); - if( offsets[ i ] < numElements - 2 ) - alignmentOffsets[ i ] = (cl_uint)random_in_range( 0, (int)vecSize - 1, d ); - else - alignmentOffsets[ i ] = 0; - if (LINEAR_OFFSETS) offsets[i] = (cl_uint)i; - } - if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n"); - - // 32-bit fixup - outVectorSize = vecSize; - - // Declare output buffers now -#if !(defined(_WIN32) && defined(_MSC_VER)) - char outBuffer[ numLoads * typeSize * outVectorSize ]; - char referenceBuffer[ numLoads * typeSize * vecSize ]; -#else - char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char)); - char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char)); -#endif - - // Create the program - - - createFn( programSrc, numElements, type, vecSize, outVectorSize); - - // Create our kernel - const char *ptr = programSrc; - - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); - test_error( error, "Unable to create testing kernel" ); - if (DEBUG) log_info("Kernel: \n%s\n", programSrc); - - // Get the number of args to differentiate the kernels with local storage. (They have 5) - cl_uint numArgs; - error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL); - test_error( error, "clGetKernelInfo failed"); - - // Set up parameters - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, bufferSize, (void *)inBuffer, &error ); - test_error( error, "Unable to create kernel stream" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(offsets[0]), offsets, &error ); - test_error( error, "Unable to create kernel stream" ); - streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error ); - test_error( error, "Unable to create kernel stream" ); - streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error ); - test_error( error, "Unable to create kernel stream" ); - - // Set parameters and run - if (numArgs == 5) { - // We need to set the size of the local storage - error = clSetKernelArg(kernel, 0, bufferSize, NULL); - test_error( error, "clSetKernelArg for buffer failed"); - for( i = 0; i < 4; i++ ) - { - error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] ); - test_error( error, "Unable to set kernel argument" ); - } - } else { - // No local storage - for( i = 0; i < 4; i++ ) - { - error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] ); - test_error( error, "Unable to set kernel argument" ); - } - } - - threads[ 0 ] = numLoads; - error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] ); - test_error( error, "Unable to get local thread size" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Unable to exec kernel" ); - - // Get the results - error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - - // Create the reference results - memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char)); - for( i = 0; i < numLoads; i++ ) - { - memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize, - typeSize * vecSize ); - } - - // Validate the results now - char *expected = referenceBuffer; - char *actual = outBuffer; - char *in = (char *)(void *)inBuffer; - - if (DEBUG) { - log_info("Memory contents:\n"); - for (i=0; i 10240 ) - localSize = 10240; - if (localSize > 4096) - localSize -= 2048; - else - localSize /= 2; - - return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize ); -} - - -void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) -{ - const char *pattern = - "%s%s" - "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - - const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, typeName, - typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize, - (int)inVectorSize, typeName ); - } -} - -int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - // Determine the max size of a local buffer that we can test against - cl_ulong maxSize; - int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL ); - test_error( error, "Unable to get max size of constant memory buffer" ); - if( maxSize > 10240 ) - maxSize = 10240; - if (maxSize > 4096) - maxSize -= 2048; - else - maxSize /= 2; - - return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize ); -} - - -void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) -{ - const char *pattern = - "%s%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "#define PRIV_TYPE %s%d\n" - "#define PRIV_SIZE %d\n" - "__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" - " int tid = get_global_id( 0 );\n" - "\n" - " for( int i = 0; i < %d; i++ )\n" - " sPrivateStorage[ i ] = src[ i ];\n" - // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for - // anybody else to sync up - "\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "#define PRIV_TYPE %s\n" - "#define PRIV_SIZE %d\n" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" - " int tid = get_global_id( 0 );\n" - "\n" - " for( int i = 0; i < PRIV_SIZE; i++ )\n" - " {\n" - " sPrivateStorage[ i ] = src[ i ];\n" - " }\n" - // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for - // anybody else to sync up - "\n" - " %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - - const char *typeName = get_explicit_type_name(type); - if(inVectorSize ==3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, 3*((int)inBufferSize), - typeName, typeName, - typeName ); - // log_info("Src is \"\n%s\n\"\n", destBuffer); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, (int)inVectorSize, (int)inBufferSize, - typeName, (int)inVectorSize, typeName, (int)outVectorSize, - (int)inBufferSize, - typeName, (int)inVectorSize, (int)inVectorSize, typeName ); - } -} - -int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - // We have no idea how much actual private storage is available, so just pick a reasonable value, - // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes - return test_vloadset( device, context, queue, create_private_load_code, 256 ); -} - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#pragma mark -------------------- vstore harness -------------------------- - -typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ); - -int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize, - create_vstore_program_fn createFn, size_t bufferSize, MTdata d ) -{ - int error; - - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[ 3 ]; - - size_t threads[ 1 ], localThreads[ 1 ]; - - size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS; - - if (DEBUG) - bufferSize = (bufferSize < 128) ? bufferSize : 128; - - typeSize = get_explicit_type_size( type ); - numElements = bufferSize / ( typeSize * vecSize ); - bufferSize = numElements * typeSize * vecSize; // To account for rounding - if( numStores > numElements * 2 / 3 ) - { - // Note: unlike load, we have to restrict the # of stores here, since all offsets must be unique for our test - // (Plus, we leave some room for extra values to make sure didn't get written) - numStores = numElements * 2 / 3; - if( numStores < 1 ) - numStores = 1; - } - if (DEBUG) - log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize); -#if !(defined(_WIN32) && defined(_MSC_VER)) - cl_uint offsets[ numStores ]; -#else - cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint)); -#endif - char programSrc[ 10240 ]; - size_t i; - -#if !(defined(_WIN32) && defined(_MSC_VER)) - char inBuffer[ numStores * typeSize * vecSize ]; -#else - char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char)); -#endif - clProtectedArray outBuffer( numElements * typeSize * vecSize ); -#if !(defined(_WIN32) && defined(_MSC_VER)) - char referenceBuffer[ numElements * typeSize * vecSize ]; -#else - char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char)); -#endif - - // Create some random input data and random offsets to load from - generate_random_data( type, numStores * vecSize, d, (void *)inBuffer ); - - // Note: make sure no two offsets are the same, otherwise the output would depend on - // the order that threads ran in, and that would be next to impossible to verify -#if !(defined(_WIN32) && defined(_MSC_VER)) - char flags[ numElements ]; -#else - char* flags = (char*)_malloca( numElements * sizeof(char)); -#endif - - memset( flags, 0, numElements * sizeof(char) ); - for( i = 0; i < numStores; i++ ) - { - do - { - offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 2, d ); // Note: keep it one vec below the end for offset testing - } while( flags[ offsets[ i ] ] != 0 ); - flags[ offsets[ i ] ] = -1; - if (LINEAR_OFFSETS) - offsets[i] = (int)i; - } - if (LINEAR_OFFSETS) - log_info("Offsets set to thread IDs to simplify output.\n"); - - createFn( programSrc, numElements, type, vecSize ); - - // Create our kernel - const char *ptr = programSrc; - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); - test_error( error, "Unable to create testing kernel" ); - if (DEBUG) log_info("Kernel: \n%s\n", programSrc); - - // Get the number of args to differentiate the kernels with local storage. (They have 5) - cl_uint numArgs; - error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL); - test_error( error, "clGetKernelInfo failed"); - - // Set up parameters - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error ); - test_error( error, "Unable to create kernel stream" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error ); - test_error( error, "Unable to create kernel stream" ); - streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error ); - test_error( error, "Unable to create kernel stream" ); - - // Set parameters and run - if (numArgs == 5) - { - // We need to set the size of the local storage - error = clSetKernelArg(kernel, 0, bufferSize, NULL); - test_error( error, "clSetKernelArg for buffer failed"); - for( i = 0; i < 3; i++ ) - { - error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] ); - test_error( error, "Unable to set kernel argument" ); - } - } - else - { - // No local storage - for( i = 0; i < 3; i++ ) - { - error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] ); - if (error) - log_info("%s\n", programSrc); - test_error( error, "Unable to set kernel argument" ); - } - } - - threads[ 0 ] = numStores; - error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] ); - test_error( error, "Unable to get local thread size" ); - - // Run in a loop, changing the address offset from 0 to ( vecSize - 1 ) each time, since - // otherwise stores might overlap each other, and it'd be a nightmare to test! - for( cl_uint addressOffset = 0; addressOffset < vecSize; addressOffset++ ) - { - if (DEBUG) - log_info("\tstore addressOffset is %d, executing with threads %d\n", addressOffset, (int)threads[0]); - - // Clear the results first - memset( outBuffer, 0, numElements * typeSize * vecSize ); - error = clEnqueueWriteBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL ); - test_error( error, "Unable to erase result stream" ); - - // Set up the new offset and run - if (numArgs == 5) - error = clSetKernelArg( kernel, 3+1, sizeof( cl_uint ), &addressOffset ); - else - error = clSetKernelArg( kernel, 3, sizeof( cl_uint ), &addressOffset ); - test_error( error, "Unable to set address offset argument" ); - - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Unable to exec kernel" ); - - // Get the results - error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - - // Create the reference results - memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) ); - for( i = 0; i < numStores; i++ ) - { - memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize ); - } - - // Validate the results now - char *expected = referenceBuffer; - char *actual = (char *)(void *)outBuffer; - - if (DEBUG) - { - log_info("Memory contents:\n"); - for (i=0; i>2) ], offsets[ tid ], destBuffer + alignmentOffset );\n" - " } else {\n" - " vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n" - " }\n" - "}\n"; - - const char *typeName = get_explicit_type_name(type); - - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, typeName, typeName); - - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, typeName, (int)inVectorSize ); - } - // if(inVectorSize == 3 || inVectorSize == 4) { - // log_info("\n----\n%s\n----\n", destBuffer); - // } -} - -int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - return test_vstoreset( device, context, queue, create_global_store_code, 10240 ); -} - - -void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) -{ - const char *pattern = - "%s" - "\n" - "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n" - " sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];\n" - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n" - "\n" - // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output - // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - // Note: we only copy the relevant portion of our local storage over to the dest buffer, because - // otherwise, local threads would be overwriting results from other local threads - " int i;\n" - " __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n" - " for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n" - " dp[i] = sp[i];\n" - "}\n"; - - const char *patternV3 = - "%s" - "\n" - "__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sSharedStorage[ 3*offsets[tid] ] = (%s)0;\n" - " sSharedStorage[ 3*offsets[tid] +1 ] = \n" - " sSharedStorage[ 3*offsets[tid] ];\n" - " sSharedStorage[ 3*offsets[tid] +2 ] = \n" - " sSharedStorage[ 3*offsets[tid]];\n" - " sSharedStorage[ 3*offsets[tid] +3 ] = \n" - " sSharedStorage[ 3*offsets[tid]];\n" - " sSharedStorage[ 3*offsets[tid] +4 ] = \n" - " sSharedStorage[ 3*offsets[tid] ];\n" - " sSharedStorage[ 3*offsets[tid] +5 ] = \n" - " sSharedStorage[ 3*offsets[tid]];\n" - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " vstore3( vload3(tid,srcValues), offsets[ tid ], sSharedStorage + alignmentOffset );\n" - "\n" - // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output - // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - // Note: we only copy the relevant portion of our local storage over to the dest buffer, because - // otherwise, local threads would be overwriting results from other local threads - " int i;\n" - " __local %s *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n" - " for( i = 0; i < 3; i++ ) \n" - " dp[i] = sp[i];\n" - "}\n"; - - const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, - typeName, - typeName, typeName, - typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, - typeName, (int)inVectorSize, typeName, (int)inVectorSize, - typeName, (int)inVectorSize, typeName, - (int)inVectorSize, typeName, typeName, - typeName, typeName, typeName ); - } - // log_info(destBuffer); -} - -int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - // Determine the max size of a local buffer that we can test against - cl_ulong localSize; - int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL ); - test_error( error, "Unable to get max size of local memory buffer" ); - if( localSize > 10240 ) - localSize = 10240; - if (localSize > 4096) - localSize -= 2048; - else - localSize /= 2; - return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize ); -} - - -void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) -{ - const char *pattern = - "%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "\n" - "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n" - "{\n" - " __private %s%d sPrivateStorage[ %d ];\n" - " int tid = get_global_id( 0 );\n" - // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sPrivateStorage[tid] = (%s%d)(%s)0;\n" - "\n" - " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n" - "\n" - // Note: we only copy the relevant portion of our local storage over to the dest buffer, because - // otherwise, local threads would be overwriting results from other local threads - " uint i;\n" - " __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n" - " for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n" - " dp[i] = sp[i];\n" - "}\n"; - - - const char *patternV3 = - "%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "\n" - "__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n" - "{\n" - " __private %s3 sPrivateStorage[ %d ];\n" // keep this %d - " int tid = get_global_id( 0 );\n" - // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sPrivateStorage[tid] = (%s3)(%s)0;\n" - "\n" - - " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n" - "\n" - // Note: we only copy the relevant portion of our local storage over to the dest buffer, because - // otherwise, local threads would be overwriting results from other local threads - " uint i;\n" - " __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n" - " __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n" - " for( i = 0; i < 3; i++ ) \n" - " dp[i] = sp[i];\n" - "}\n"; - - const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, typeName, - typeName, (int)inBufferSize, - typeName, typeName, - typeName, typeName, typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, typeName, (int)inVectorSize, - typeName, (int)inVectorSize, (int)inBufferSize, - typeName, (int)inVectorSize, typeName, - (int)inVectorSize, typeName, typeName, typeName, typeName, typeName ); - } -} - -int test_vstore_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) -{ - // We have no idea how much actual private storage is available, so just pick a reasonable value, - // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes - return test_vstoreset( device, context, queue, create_private_store_code, 256 ); -} - - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/compatibility/test_conformance/basic/test_work_item_functions.cpp deleted file mode 100644 index 55843b5a..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_work_item_functions.cpp +++ /dev/null @@ -1,177 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - -typedef struct work_item_data -{ - cl_uint workDim; - cl_uint globalSize[ 3 ]; - cl_uint globalID[ 3 ]; - cl_uint localSize[ 3 ]; - cl_uint localID[ 3 ]; - cl_uint numGroups[ 3 ]; - cl_uint groupID[ 3 ]; -}; - -static const char *workItemKernelCode = -"typedef struct {\n" -" uint workDim;\n" -" uint globalSize[ 3 ];\n" -" uint globalID[ 3 ];\n" -" uint localSize[ 3 ];\n" -" uint localID[ 3 ];\n" -" uint numGroups[ 3 ];\n" -" uint groupID[ 3 ];\n" -" } work_item_data;\n" -"\n" -"__kernel void sample_kernel( __global work_item_data *outData )\n" -"{\n" -" int id = get_global_id(0);\n" -" outData[ id ].workDim = (uint)get_work_dim();\n" -" for( uint i = 0; i < get_work_dim(); i++ )\n" -" {\n" -" outData[ id ].globalSize[ i ] = (uint)get_global_size( i );\n" -" outData[ id ].globalID[ i ] = (uint)get_global_id( i );\n" -" outData[ id ].localSize[ i ] = (uint)get_local_size( i );\n" -" outData[ id ].localID[ i ] = (uint)get_local_id( i );\n" -" outData[ id ].numGroups[ i ] = (uint)get_num_groups( i );\n" -" outData[ id ].groupID[ i ] = (uint)get_group_id( i );\n" -" }\n" -"}"; - -#define NUM_TESTS 1 - -int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - int error; - - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper outData; - work_item_data testData[ 10240 ]; - size_t threads[3], localThreads[3]; - MTdata d; - - - error = create_single_kernel_helper( context, &program, &kernel, 1, &workItemKernelCode, "sample_kernel" ); - test_error( error, "Unable to create testing kernel" ); - - outData = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( testData ), NULL, &error ); - test_error( error, "Unable to create output buffer" ); - - error = clSetKernelArg( kernel, 0, sizeof( outData ), &outData ); - test_error( error, "Unable to set kernel arg" ); - - d = init_genrand( gRandomSeed ); - for( size_t dim = 1; dim <= 3; dim++ ) - { - for( int i = 0; i < NUM_TESTS; i++ ) - { - size_t numItems = 1; - for( size_t j = 0; j < dim; j++ ) - { - // All of our thread sizes should be within the max local sizes, since they're all <= 20 - threads[ j ] = (size_t)random_in_range( 1, 20, d ); - localThreads[ j ] = threads[ j ] / (size_t)random_in_range( 1, (int)threads[ j ], d ); - while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) ) - localThreads[ j ]--; - - numItems *= threads[ j ]; - - // Hack for now: localThreads > 1 are iffy - localThreads[ j ] = 1; - } - error = clEnqueueNDRangeKernel( queue, kernel, (cl_uint)dim, NULL, threads, localThreads, 0, NULL, NULL ); - test_error( error, "Unable to run kernel" ); - - error = clEnqueueReadBuffer( queue, outData, CL_TRUE, 0, sizeof( testData ), testData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); - - // Validate - for( size_t q = 0; q < threads[0]; q++ ) - { - // We can't really validate the actual value of each one, but we can validate that they're within a sane range - if( testData[ q ].workDim != (cl_uint)dim ) - { - log_error( "ERROR: get_work_dim() did not return proper value for %d dimensions (expected %d, got %d)\n", (int)dim, (int)dim, (int)testData[ q ].workDim ); - free_mtdata(d); - return -1; - } - for( size_t j = 0; j < dim; j++ ) - { - if( testData[ q ].globalSize[ j ] != (cl_uint)threads[ j ] ) - { - log_error( "ERROR: get_global_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n", - (int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalSize[ j ] ); - free_mtdata(d); - return -1; - } - if( testData[ q ].globalID[ j ] < 0 || testData[ q ].globalID[ j ] >= (cl_uint)threads[ j ] ) - { - log_error( "ERROR: get_global_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n", - (int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalID[ j ] ); - free_mtdata(d); - return -1; - } - if( testData[ q ].localSize[ j ] != (cl_uint)localThreads[ j ] ) - { - log_error( "ERROR: get_local_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n", - (int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localSize[ j ] ); - free_mtdata(d); - return -1; - } - if( testData[ q ].localID[ j ] < 0 && testData[ q ].localID[ j ] >= (cl_uint)localThreads[ j ] ) - { - log_error( "ERROR: get_local_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n", - (int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localID[ j ] ); - free_mtdata(d); - return -1; - } - size_t groupCount = ( threads[ j ] + localThreads[ j ] - 1 ) / localThreads[ j ]; - if( testData[ q ].numGroups[ j ] != (cl_uint)groupCount ) - { - log_error( "ERROR: get_num_groups(%d) did not return proper value for %d dimensions (expected %d with global dim %d and local dim %d, got %d)\n", - (int)j, (int)dim, (int)groupCount, (int)threads[ j ], (int)localThreads[ j ], (int)testData[ q ].numGroups[ j ] ); - free_mtdata(d); - return -1; - } - if( testData[ q ].groupID[ j ] < 0 || testData[ q ].groupID[ j ] >= (cl_uint)groupCount ) - { - log_error( "ERROR: get_group_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n", - (int)j, (int)dim, (int)groupCount, (int)testData[ q ].groupID[ j ] ); - free_mtdata(d); - return -1; - } - } - } - } - } - - free_mtdata(d); - return 0; -} - - diff --git a/test_conformance/compatibility/test_conformance/basic/test_writeimage_fp32.c b/test_conformance/compatibility/test_conformance/basic/test_writeimage_fp32.c deleted file mode 100644 index 22bac050..00000000 --- a/test_conformance/compatibility/test_conformance/basic/test_writeimage_fp32.c +++ /dev/null @@ -1,188 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - - -#include "procs.h" - - -static const char *rgbaFFFF_write_kernel_code = -"__kernel void test_rgbaFFFF_write(__global float *src, write_only image2d_t dstimg)\n" -"{\n" -" int tid_x = get_global_id(0);\n" -" int tid_y = get_global_id(1);\n" -" int indx = tid_y * get_image_width(dstimg) + tid_x;\n" -" float4 color;\n" -"\n" -" indx *= 4;\n" -" color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);\n" -" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n" -"\n" -"}\n"; - - -static float * -generate_float_image(int w, int h, MTdata d) -{ - float *ptr = (float*)malloc(w * h * 4 * sizeof(float)); - int i; - - for (i=0; i -#include -#include -#include - - -#include "procs.h" - -static const char *rgba16_write_kernel_code = -"__kernel void test_rgba16_write(__global unsigned short *src, write_only image2d_t dstimg)\n" -"{\n" -" int tid_x = get_global_id(0);\n" -" int tid_y = get_global_id(1);\n" -" int indx = tid_y * get_image_width(dstimg) + tid_x;\n" -" float4 color;\n" -"\n" -" indx *= 4;\n" -" color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n" -" color /= 65535.0f;\n" -" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n" -"\n" -"}\n"; - - -static unsigned short * -generate_16bit_image(int w, int h, MTdata d) -{ - cl_ushort *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort)); - int i; - - for (i=0; i MAX_ERR) - { - log_error("%s failed\n", string); - return -1; - } - } - - log_info("%s passed\n", string); - return 0; -} - -int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem streams[3]; - cl_program program; - cl_kernel kernel[2]; - cl_image_format img_format; - cl_ushort *input_ptr, *output_ptr; - size_t threads[2]; - int img_width = 512; - int img_height = 512; - int i, err, any_err = 0; - size_t origin[3] = {0, 0, 0}; - size_t region[3] = {img_width, img_height, 1}; - size_t length = img_width * img_height * 4 * sizeof(cl_ushort); - - PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) - - MTdata d = init_genrand( gRandomSeed ); - input_ptr = generate_16bit_image(img_width, img_height, d); - free_mtdata(d); d = NULL; - - output_ptr = (cl_ushort*)malloc(length); - - img_format.image_channel_order = CL_RGBA; - img_format.image_channel_data_type = CL_UNORM_INT16; - streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL); - if (!streams[0]) - { - log_error("create_image_2d failed\n"); - return -1; - } - - img_format.image_channel_order = CL_RGBA; - img_format.image_channel_data_type = CL_UNORM_INT16; - streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL); - if (!streams[1]) - { - log_error("create_image_2d failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateArray failed\n"); - return -1; - } - - err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } - - err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" ); - if (err) - return -1; - kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL); - if (!kernel[1]) - { - log_error("clCreateKernel failed\n"); - return -1; - } - - err = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]); - err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } - - err = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]); - err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } - - threads[0] = (unsigned int)img_width; - threads[1] = (unsigned int)img_height; - - for (i=0; i<2; i++) - { - err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clExecuteKernel failed\n"); - return -1; - } - - err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clReadImage failed\n"); - return -1; - } - - err = verify_16bit_image((i == 0) ? "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_READ_WRITE" : - "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_WRITE_ONLY", - input_ptr, output_ptr, img_width, img_height); - any_err |= err; - } - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseKernel(kernel[0]); - clReleaseKernel(kernel[1]); - clReleaseProgram(program); - free(input_ptr); - free(output_ptr); - - return any_err; -} - -