diff --git a/CMakeLists.txt b/CMakeLists.txt index a1f40261..6b8ee156 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ set(CLConform_VERSION "${CLConform_VERSION_MAJOR}.${CLConform_VERSION_MINOR}") set(CLConform_VERSION_FULL "${CLConform_VERSION}.${CLConform_VERSION_MICRO}${CLConform_VERSION_EXTRA}") -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.1) add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_1_APIS=1) add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1) diff --git a/build_android.py b/build_android.py old mode 100644 new mode 100755 diff --git a/build_lnx.sh b/build_lnx.sh old mode 100644 new mode 100755 diff --git a/clean_tests.py b/clean_tests.py old mode 100644 new mode 100755 diff --git a/readme-spir-v-binaries.txt b/readme-spir-v-binaries.txt index 91f29aba..1b6df9bc 100644 --- a/readme-spir-v-binaries.txt +++ b/readme-spir-v-binaries.txt @@ -1,17 +1,14 @@ -To run the 2.2 conformance tests test suite for the C++ features you need +To run the 2.2 conformance tests test suite for the C++ features you need need SPIR-V binaries. If you are using a conformance package then the binaries are included in the -package. If you are using conformance tests from git repositories then the -binaries need to be picked up using LFS: +package. If you are using conformance tests from gitlab repositories then the +binaries need to be picked up from Khronos SVN URL mentioned below: -1. Setup LFS by following instructions at https://git-lfs.github.com/ +https://cvs.khronos.org/svn/repos/OpenCL/trunk/Khronos/spirv/spirv10_2015.11.25.zip -2. The SPIR-V binaries can then be picked up from test_conformance/clcpp/spirv*.7z -Alternatively you can check out and build all of the below repositories -manually or use https://github.com/KhronosGroup/OpenCL-CTS-Framework which will -do it for you. +Alternatively you can check out and build all of the below repositories. 1. SPIRV-LLVM LLVM with support for SPIR-V (required by clang compiler) @@ -42,4 +39,4 @@ Branch: opencl22 5. OpenCL ICD (with 2.2 support) OpenCL ICD Repository: https://gitlab.khronos.org/opencl/icd -Branch: dev_cl22 +Branch: dev_cl22 \ No newline at end of file diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index 7606d1a3..a0cce7d4 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -479,16 +479,6 @@ struct AddressingTable static AddressingTable sAddressingTable; -bool alpha_is_x(cl_image_format *format){ - switch (format->image_channel_order) { - case CL_RGBx: - case CL_sRGBx: - return true; - default: - return false; - } -} - bool is_sRGBA_order(cl_channel_order image_channel_order){ switch (image_channel_order) { case CL_sRGB: @@ -508,19 +498,21 @@ int has_alpha(cl_image_format *format) { case CL_R: return 0; case CL_A: - case CL_Rx: return 1; + case CL_Rx: + return 0; case CL_RG: return 0; case CL_RA: - case CL_RGx: return 1; + case CL_RGx: + return 0; case CL_RGB: case CL_sRGB: return 0; case CL_RGBx: case CL_sRGBx: - return 1; + return 0; case CL_RGBA: return 1; case CL_BGRA: @@ -719,13 +711,6 @@ void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes, } } -int issubnormal(float a) -{ - union { cl_int i; cl_float f; } u; - u.f = a; - return (u.i & 0x7f800000U) == 0; -} - float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) { if (sampler->filter_mode == CL_FILTER_NEAREST) return 0.0f; @@ -790,6 +775,7 @@ float get_max_relative_error( cl_image_format *format, image_sampler_data *sampl { if( sampler->filter_mode != CL_FILTER_NEAREST ) { + extern cl_device_type gDeviceType; // The maximum if( gDeviceType == CL_DEVICE_TYPE_GPU ) maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4); // Some GPUs ain't so accurate @@ -1419,7 +1405,7 @@ void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, else { outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0; if (!has_alpha(imageInfo->format)) - outData[3] = alpha_is_x(imageInfo->format) ? 0 : 1; + outData[3] = 1; } return; } diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h index 83ff1377..ba80ea77 100644 --- a/test_common/harness/imageHelpers.h +++ b/test_common/harness/imageHelpers.h @@ -136,8 +136,6 @@ extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *d int has_alpha(cl_image_format *format); -extern bool alpha_is_x(cl_image_format *format); - extern bool is_sRGBA_order(cl_channel_order image_channel_order); inline float calculate_array_index( float coord, float extent ); @@ -594,7 +592,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor * extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine ); extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler); extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter ); -extern int issubnormal(float); #define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) ) diff --git a/test_common/harness/kernelHelpers.c b/test_common/harness/kernelHelpers.c index 591ab038..59143a3b 100644 --- a/test_common/harness/kernelHelpers.c +++ b/test_common/harness/kernelHelpers.c @@ -1070,14 +1070,14 @@ size_t get_pixel_bytes( const cl_image_format *fmt ) return 0; } -int verifyImageSupport( cl_device_id device ) +test_status verifyImageSupport( cl_device_id device ) { if( checkForImageSupport( device ) ) { log_error( "ERROR: Device does not supported images as required by this test!\n" ); - return CL_IMAGE_FORMAT_NOT_SUPPORTED; + return TEST_FAIL; } - return 0; + return TEST_PASS; } int checkForImageSupport( cl_device_id device ) diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h index b39113ec..5f4729c4 100644 --- a/test_common/harness/kernelHelpers.h +++ b/test_common/harness/kernelHelpers.h @@ -20,6 +20,7 @@ #include "../config.hpp" #include "compat.h" +#include "testHarness.h" #include #include @@ -128,8 +129,8 @@ extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl /* Helper to get pixel size for a pixel format */ size_t get_pixel_bytes( const cl_image_format *fmt ); -/* Verify the given device supports images. 0 means you're good to go, otherwise an error */ -extern int verifyImageSupport( cl_device_id device ); +/* Verify the given device supports images. */ +extern test_status verifyImageSupport( cl_device_id device ); /* Checks that the given device supports images. Same as verify, but doesn't print an error */ extern int checkForImageSupport( cl_device_id device ); diff --git a/test_common/harness/testHarness.c b/test_common/harness/testHarness.c index f513f787..5bc494a3 100644 --- a/test_common/harness/testHarness.c +++ b/test_common/harness/testHarness.c @@ -143,7 +143,7 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, log_info( "\tid\t\tIndicates device at index should be used (default 0).\n" ); log_info( "\t\tcpu|gpu|accelerator| (default CL_DEVICE_TYPE_DEFAULT)\n" ); - for( i = 0; i < num_fns - 1; i++ ) + for( i = 0; i < num_fns; i++ ) { log_info( "\t\t%s\n", fnNames[ i ] ); } @@ -439,10 +439,18 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, /* If we have a device checking function, run it */ - if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS ) + if( ( deviceCheckFn != NULL ) ) { - test_finish(); - return -1; + test_status status = deviceCheckFn( device ); + switch (status) + { + case TEST_PASS: + break; + case TEST_FAIL: + return 1; + case TEST_SKIP: + return 0; + } } if (num_elements <= 0) diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h index 2816b2a0..fc019aa3 100644 --- a/test_common/harness/testHarness.h +++ b/test_common/harness/testHarness.h @@ -25,6 +25,13 @@ extern "C" { #endif +typedef enum test_status +{ + TEST_PASS = 0, + TEST_FAIL = 1, + TEST_SKIP = 2, +} test_status; + extern cl_uint gReSeed; extern cl_uint gRandomSeed; @@ -34,8 +41,8 @@ extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns, basefn fnList[], const char *fnNames[], int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps ); -// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits. -typedef int (*DeviceCheckFn)( cl_device_id device ); +// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits. +typedef test_status (*DeviceCheckFn)( cl_device_id device ); // Same as runTestHarness, but also supplies a function that checks the created device for required functionality. extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt index c7d0219e..ad74a829 100644 --- a/test_conformance/SVM/CMakeLists.txt +++ b/test_conformance/SVM/CMakeLists.txt @@ -1,3 +1,4 @@ +set(CMAKE_CXX_STANDARD 11) set(MODULE_NAME SVM) set(${MODULE_NAME}_SOURCES diff --git a/test_conformance/SVM/test_enqueue_api.cpp b/test_conformance/SVM/test_enqueue_api.cpp index 6a04e955..f92872d6 100644 --- a/test_conformance/SVM/test_enqueue_api.cpp +++ b/test_conformance/SVM/test_enqueue_api.cpp @@ -17,10 +17,15 @@ #include "../../test_common/harness/mt19937.h" #include +#include + +#if !defined(_WIN32) +#include +#endif typedef struct { - cl_uint status; + std::atomic status; cl_uint num_svm_pointers; std::vector svm_pointers; } CallbackData; @@ -62,7 +67,7 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point clSVMFree(context, svm_pointers[i]); } - data->status = 1; + data->status.store(1, std::memory_order_release); } int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) @@ -231,7 +236,9 @@ int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue test_error(error, "clFinish failed"); //wait for the callback - while(data.status == 0) { } + while(data.status.load(std::memory_order_acquire) == 0) { + usleep(1); + } //check if number of SVM pointers returned in the callback matches with expected if (data.num_svm_pointers != buffers.size()) diff --git a/test_conformance/api/test_api_min_max.c b/test_conformance/api/test_api_min_max.c index 57b0ab64..fa3bdb18 100644 --- a/test_conformance/api/test_api_min_max.c +++ b/test_conformance/api/test_api_min_max.c @@ -1307,7 +1307,6 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, int error; clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper streams[3]; size_t threads[1], localThreads[1]; cl_int *constantData, *resultData; cl_ulong maxSize, stepSize, currentSize; @@ -1349,6 +1348,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, for(i=0; i<(int)(numberOfInts); i++) constantData[i] = (int)genrand_int32(d); + clMemWrapper streams[3]; streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error); test_error( error, "Creating test array failed" ); streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeToAllocate, NULL, &error); diff --git a/test_conformance/api/test_zero_sized_enqueue.cpp b/test_conformance/api/test_zero_sized_enqueue.cpp index 87aa0d21..dbe2af2e 100644 --- a/test_conformance/api/test_zero_sized_enqueue.cpp +++ b/test_conformance/api/test_zero_sized_enqueue.cpp @@ -190,7 +190,7 @@ int test_zero_sized_enqueue(cl_device_id deviceID, cl_context context, cl_comman cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL); test_error( error, "clGetDeviceInfo failed."); - if (props | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) + if (props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { // test out of order queue cl_queue_properties queue_prop_def[] = diff --git a/test_conformance/basic/run_array b/test_conformance/basic/run_array old mode 100644 new mode 100755 diff --git a/test_conformance/basic/run_array_image_copy b/test_conformance/basic/run_array_image_copy old mode 100644 new mode 100755 diff --git a/test_conformance/basic/run_image b/test_conformance/basic/run_image old mode 100644 new mode 100755 diff --git a/test_conformance/basic/run_multi_read_image b/test_conformance/basic/run_multi_read_image old mode 100644 new mode 100755 diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp index ec65101d..ca657787 100644 --- a/test_conformance/basic/test_async_strided_copy.cpp +++ b/test_conformance/basic/test_async_strided_copy.cpp @@ -209,14 +209,13 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu log_error( "ERROR: Results of copy did not validate!\n" ); sprintf(values + strlen( values), "%d -> [", i); for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]); + sprintf(values + strlen( values), "%2x ", inchar[j]); sprintf(values + strlen(values), "] != ["); for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]); + sprintf(values + strlen( values), "%2x ", outchar[j]); sprintf(values + strlen(values), "]"); log_error("%s\n", values); - - return -1; + return -1; } } diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index 7445444e..9eb211c2 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -864,12 +864,14 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co // We need to create 5 random values of the given type, // and read 4 of them back. - cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)]; - cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; + const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16); + const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16); + cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT); + cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT); - clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) ); + clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) ); test_error_ret(status,"Failed to allocate write buffer",status); - clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) ); + clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) ); test_error_ret(status,"Failed to allocate read buffer",status); status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); @@ -884,7 +886,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co // Generate new random data to push through. // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. - cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0); + cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // For boolean, random data cast to bool isn't very random. @@ -896,7 +898,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co } bool_iter++; } else { - l_set_randomly( write_data, sizeof(write_data), rand_state ); + l_set_randomly( write_data, write_data_size, rand_state ); } status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status); @@ -905,7 +907,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status); // Determine the expected values. - cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; + cl_uchar expected[read_data_size]; memset( expected, -1, sizeof(expected) ); l_copy( expected, 0, write_data, 0, ti ); l_copy( expected, 1, write_data, 1, ti ); @@ -922,8 +924,8 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i]; } - cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); - memset( read_data, -1, sizeof(read_data) ); + cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); + memset(read_data, -1, read_data_size); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); // Now run the kernel @@ -932,7 +934,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clFinish(queue); test_error_ret(status,"finish",status); - read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); + read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // Collapse down to one bit. @@ -951,7 +953,8 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } - + align_free(write_data); + align_free(read_data); return err; } @@ -1008,12 +1011,14 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, // We need to create 5 random values of the given type, // and read 4 of them back. - cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)]; - cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; + const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16); + const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16); - clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) ); + cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT); + cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT); + clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) ); test_error_ret(status,"Failed to allocate write buffer",status); - clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) ); + clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) ); test_error_ret(status,"Failed to allocate read buffer",status); status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); @@ -1033,7 +1038,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, // Generate new random data to push through. // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. - cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0); + cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // For boolean, random data cast to bool isn't very random. @@ -1045,7 +1050,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, } bool_iter++; } else { - l_set_randomly( write_data, sizeof(write_data), rand_state ); + l_set_randomly( write_data, write_data_size, rand_state ); } status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status); @@ -1061,7 +1066,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status); // Determine the expected values. - cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; + cl_uchar expected[read_data_size]; memset( expected, -1, sizeof(expected) ); if ( iteration ) { l_copy( expected, 0, write_data, 0, ti ); @@ -1092,8 +1097,8 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0); - cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); - memset( read_data, -1, sizeof(read_data) ); + cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); + memset( read_data, -1, read_data_size ); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); // Now run the kernel @@ -1107,7 +1112,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clFinish(queue); test_error_ret(status,"finish",status); - read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); + read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // Collapse down to one bit. @@ -1129,6 +1134,8 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } + align_free(write_data); + align_free(read_data); return err; } @@ -1340,6 +1347,13 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS); return status; } + + status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0); + if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status))) + { + print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS); + return status; + } } @@ -1360,12 +1374,12 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu test_error_ret(status,"Failed to create reader kernel for user type test",status); // Set up data. - cl_uchar CL_ALIGNED(ALIGNMENT) uchar_data; - cl_uint CL_ALIGNED(ALIGNMENT) uint_data; + cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT); + cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT); - clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uchar_data), &uchar_data, &status ) ); + clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) ); test_error_ret(status,"Failed to allocate uchar buffer",status); - clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uint_data), &uint_data, &status ) ); + clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) ); test_error_ret(status,"Failed to allocate uint buffer",status); status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status); @@ -1375,18 +1389,18 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu cl_uint expected_uint = 42; for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice // Read back data - uchar_data = -1; - uint_data = -1; + *uchar_data = -1; + *uint_data = -1; const size_t one = 1; status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clFinish(queue); test_error_ret(status,"finish",status); - cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uint_data), 0, 0, 0, 0); - cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uchar_data), 0, 0, 0, 0); + cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0); + cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0); - if ( expected_uchar != uchar_data || expected_uint != uint_data ) { + if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) { log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n", - iter, (int)uchar_data, uint_data, (int)expected_uchar, expected_uint ); + iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint ); err |= 1; } @@ -1398,16 +1412,17 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu expected_uint++; // Write the new values into persistent store. - uchar_data = expected_uchar; - uint_data = expected_uint; - status = clSetKernelArg(writer,0,sizeof(uchar_data),&uchar_data); test_error_ret(status,"set arg",status); - status = clSetKernelArg(writer,1,sizeof(uint_data),&uint_data); test_error_ret(status,"set arg",status); + *uchar_data = expected_uchar; + *uint_data = expected_uint; + status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status); + status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status); status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status); status = clFinish(queue); test_error_ret(status,"finish",status); } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } - + align_free(uchar_data); + align_free(uint_data); return err; } diff --git a/test_conformance/basic/test_sizeof.c b/test_conformance/basic/test_sizeof.c index 166cf206..bdbc40fe 100644 --- a/test_conformance/basic/test_sizeof.c +++ b/test_conformance/basic/test_sizeof.c @@ -50,7 +50,7 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; } - cl_int err = create_single_kernel_helper( context, &p, &k, 4, sizeof_kernel_code, "test_sizeof" ); + cl_int err = create_single_kernel_helper_with_build_options(context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", "-cl-std=CL2.0"); if( err ) return err; diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 542e893f..bafb3a02 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1566,6 +1566,7 @@ public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::UseSVM; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { @@ -1606,7 +1607,7 @@ public: program += " atomic_work_item_fence(" + std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + "memory_order_acquire," + - std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") + + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) + ");\n"; program += @@ -1632,7 +1633,7 @@ public: program += " atomic_work_item_fence(" + std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + "memory_order_release," + - std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") + + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) + ");\n"; program += diff --git a/test_conformance/compatibility/test_common/harness/compat.h b/test_conformance/compatibility/test_common/harness/compat.h index 22119c46..d177f87a 100644 --- a/test_conformance/compatibility/test_common/harness/compat.h +++ b/test_conformance/compatibility/test_common/harness/compat.h @@ -13,32 +13,63 @@ // See the License for the specific language governing permissions and // limitations under the License. // +/* + Header compat.h should be used instead of stdlib.h, stdbool.h, stdint.h, float.h, fenv.h, + math.h. It provides workarounds if these headers are not available or not complete. + + Important: It should be included before math.h, directly or indirectly, because Intel mathimf.h + is not compatible with Microsoft math.h. Including math.h before mathimf.h causes compile-time + error. +*/ #ifndef _COMPAT_H_ #define _COMPAT_H_ #if defined(_WIN32) && defined (_MSC_VER) - #include -#include -#include -#include -#include -#include - -#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z)) -#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z) -#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z)) - -#define isfinite(x) _finite(x) - -#if !defined(__cplusplus) -typedef char bool; -#define inline - -#else -extern "C" { #endif +#ifdef __cplusplus + #define EXTERN_C extern "C" +#else + #define EXTERN_C +#endif + + +// +// stdlib.h +// + +#include // On Windows, _MAX_PATH defined there. + +// llabs appeared in MS C v16 (VS 10/2010). +#if defined( _MSC_VER ) && _MSC_VER <= 1500 + EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; } +#endif + + +// +// stdbool.h +// + +// stdbool.h appeared in MS C v18 (VS 12/2013). +#if defined( _MSC_VER ) && MSC_VER <= 1700 +#if !defined(__cplusplus) +typedef char bool; + #define true 1 + #define false 0 + #endif +#else + #include +#endif + + + +// +// stdint.h +// + +// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12. +#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 ) typedef unsigned char uint8_t; typedef char int8_t; typedef unsigned short uint16_t; @@ -47,25 +78,83 @@ typedef unsigned int uint32_t; typedef int int32_t; typedef unsigned long long uint64_t; typedef long long int64_t; - -#define MAXPATHLEN MAX_PATH - -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; +#else +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + #include +#endif -#define INFINITY (FLT_MAX + FLT_MAX) -//#define NAN (INFINITY | 1) -//const static int PINFBITPATT_SP32 = INFINITY; + +// +// float.h +// + +#include + + + +// +// fenv.h +// + +// fenv.h appeared in MS C v18 (VS 12/2013). +#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER ) + // reimplement fenv.h because windows doesn't have it + #define FE_INEXACT 0x0020 + #define FE_UNDERFLOW 0x0010 + #define FE_OVERFLOW 0x0008 + #define FE_DIVBYZERO 0x0004 + #define FE_INVALID 0x0001 + #define FE_ALL_EXCEPT 0x003D + int fetestexcept(int excepts); + int feclearexcept(int excepts); +#else + #include +#endif + + +// +// math.h +// + +#if defined( __INTEL_COMPILER ) + #include +#else + #include +#endif + +#if defined( _MSC_VER ) + + #ifdef __cplusplus + extern "C" { + #endif #ifndef M_PI #define M_PI 3.14159265358979323846264338327950288 #endif + #if ! defined( __INTEL_COMPILER ) + #ifndef NAN + #define NAN (INFINITY - INFINITY) + #endif + #ifndef HUGE_VALF + #define HUGE_VALF (float)HUGE_VAL + #endif + #ifndef INFINITY + #define INFINITY (FLT_MAX + FLT_MAX) + #endif + #ifndef isfinite + #define isfinite(x) _finite(x) + #endif + #ifndef isnan #define isnan( x ) ((x) != (x)) + #endif + #ifndef isinf #define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY) + #endif double rint( double x); float rintf( float x); @@ -99,30 +188,6 @@ long double remquol( long double x, long double y, int *quo); long double scalblnl(long double x, long n); -inline long long -llabs(long long __x) { return __x >= 0 ? __x : -__x; } - - -// end of math functions - -uint64_t ReadTime( void ); -double SubtractTime( uint64_t endTime, uint64_t startTime ); - -#define sleep(X) Sleep(1000*X) -// snprintf added in _MSC_VER == 1900 (Visual Studio 2015) -#if _MSC_VER < 1900 - #define snprintf sprintf_s -#endif -//#define hypotl _hypot - -float make_nan(); -float nanf( const char* str); -double nan( const char* str); -long double nanl( const char* str); - -//#if defined USE_BOOST -//#include -//double hypot(double x, double y); float hypotf(float x, float y); long double hypotl(long double x, long double y) ; double lgamma(double x); @@ -147,70 +212,179 @@ double round(double x); float roundf(float x); long double roundl(long double x); -// Added in _MSC_VER == 1800 (Visual Studio 2013) -#if _MSC_VER < 1800 - int signbit(double x); -#endif -int signbitf(float x); + int cf_signbit(double x); + int cf_signbitf(float x); -//bool signbitl(long double x) { return boost::math::tr1::signbit(x); } -//#endif // USE_BOOST + static int signbit(double x) { return cf_signbit(x); } + static int signbitf(float x) { return cf_signbitf(x); } long int lrint (double flt); long int lrintf (float flt); - float int2float (int32_t ix); int32_t float2int (float fx); + #endif + + #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 + // These functions appeared in Intel C v13. + float nanf( const char* str); + double nan( const char* str); + long double nanl( const char* str); + #endif + + #ifdef __cplusplus + } + #endif + +#endif + +#if defined( __ANDROID__ ) + #define log2(X) (log(X)/log(2)) +#endif + + + +// +// stdio.h +// + +#if defined( _MSC_VER ) + #define snprintf sprintf_s +#endif + + + +// +// unistd.h +// + +#if defined( _MSC_VER ) + EXTERN_C unsigned int sleep( unsigned int sec ); + EXTERN_C int usleep( int usec ); +#endif + + + +// +// syscall.h +// + +#if defined( __ANDROID__ ) + // Android bionic's isn't providing SYS_sysctl wrappers. + #define SYS__sysctl __NR__sysctl +#endif + + + +// Some tests use _malloca which defined in malloc.h. +#if !defined (__APPLE__) +#include +#endif + + +// +// ??? +// + +#if defined( _MSC_VER ) + + #define MAXPATHLEN _MAX_PATH + + EXTERN_C uint64_t ReadTime( void ); + EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime ); + /** Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. */ -int __builtin_clz(unsigned int pattern); - - -static const double zero= 0.00000000000000000000e+00; -#define NAN (INFINITY - INFINITY) -#define HUGE_VALF (float)HUGE_VAL - -int usleep(int usec); - -// reimplement fenv.h because windows doesn't have it -#define FE_INEXACT 0x0020 -#define FE_UNDERFLOW 0x0010 -#define FE_OVERFLOW 0x0008 -#define FE_DIVBYZERO 0x0004 -#define FE_INVALID 0x0001 -#define FE_ALL_EXCEPT 0x003D - -int fetestexcept(int excepts); -int feclearexcept(int excepts); - -#ifdef __cplusplus -} -#endif - -#else // !((defined(_WIN32) && defined(_MSC_VER) -#if defined(__MINGW32__) -#include -#define sleep(X) Sleep(1000*X) + EXTERN_C int __builtin_clz(unsigned int pattern); #endif -#if defined(__linux__) || defined(__MINGW32__) || defined(__APPLE__) -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS + +#ifndef MIN + #define MIN(x,y) (((x)<(y))?(x):(y)) #endif -#include -#include -#include -#include +#ifndef MAX + #define MAX(x,y) (((x)>(y))?(x):(y)) #endif + + +/* + ------------------------------------------------------------------------------------------------ + WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG. + + This is a typical usage of the macros: + + double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2); + + (taken from math_brute_force/reference_math.c). There are two problems: + + 1. There is an error here. On Windows in will produce incorrect result + `0x1.5555555555555p+50'. To have a correct result it should be written as + `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the + third argument is not obvious -- sometimes it should be the same as exponent of the + first argument, but sometimes not. + + 2. Information is duplicated. It is easy to make a mistake. + + Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file). + ------------------------------------------------------------------------------------------------ +*/ +#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER ) + + #define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z)) + #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z) + #define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z)) + +#else + +// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead. #define MAKE_HEX_FLOAT(x,y,z) x #define MAKE_HEX_DOUBLE(x,y,z) x #define MAKE_HEX_LONG(x,y,z) x -#endif // !((defined(_WIN32) && defined(_MSC_VER) +#endif +/* + ------------------------------------------------------------------------------------------------ + HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long + double respectively. Arguments: + + sm -- sign of number, + int -- integer part of mantissa (without `0x' prefix), + fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes), + se -- sign of exponent, + exp -- absolute value of (binary) exponent. + + Example: + + double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2 + + Note: + + We have to pass signs as separate arguments because gcc pass negative integer values + (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result + `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point + literal. + ------------------------------------------------------------------------------------------------ +*/ +#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER ) + // If compiler does not support hex floating point literals: + #define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) ) + #define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) ) + #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) ) +#else + // If compiler supports hex floating point literals: just concatenate all the parts into a literal. + #define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F + #define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp + #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L +#endif + +#if defined(__MINGW32__) + #include + #define sleep(sec) Sleep((sec) * 1000) +#endif + #endif // _COMPAT_H_ diff --git a/test_conformance/compatibility/test_common/harness/kernelHelpers.c b/test_conformance/compatibility/test_common/harness/kernelHelpers.c index 3680c06e..42f41aa4 100644 --- a/test_conformance/compatibility/test_common/harness/kernelHelpers.c +++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.c @@ -475,14 +475,14 @@ size_t get_pixel_bytes( const cl_image_format *fmt ) return 0; } -int verifyImageSupport( cl_device_id device ) +test_status verifyImageSupport( cl_device_id device ) { if( checkForImageSupport( device ) ) { log_error( "ERROR: Device does not supported images as required by this test!\n" ); - return CL_IMAGE_FORMAT_NOT_SUPPORTED; + return TEST_FAIL; } - return 0; + return TEST_PASS; } int checkForImageSupport( cl_device_id device ) diff --git a/test_conformance/compatibility/test_common/harness/kernelHelpers.h b/test_conformance/compatibility/test_common/harness/kernelHelpers.h index 59c01b5f..09515e28 100644 --- a/test_conformance/compatibility/test_common/harness/kernelHelpers.h +++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.h @@ -17,6 +17,7 @@ #define _kernelHelpers_h #include "compat.h" +#include "testHarness.h" #include #include @@ -84,8 +85,8 @@ extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl /* Helper to get pixel size for a pixel format */ size_t get_pixel_bytes( const cl_image_format *fmt ); -/* Verify the given device supports images. 0 means you're good to go, otherwise an error */ -extern int verifyImageSupport( cl_device_id device ); +/* Verify the given device supports images. */ +extern test_status verifyImageSupport( cl_device_id device ); /* Checks that the given device supports images. Same as verify, but doesn't print an error */ extern int checkForImageSupport( cl_device_id device ); diff --git a/test_conformance/compatibility/test_common/harness/msvc9.c b/test_conformance/compatibility/test_common/harness/msvc9.c index 811c6654..093bb978 100644 --- a/test_conformance/compatibility/test_common/harness/msvc9.c +++ b/test_conformance/compatibility/test_common/harness/msvc9.c @@ -13,15 +13,18 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#if defined(_WIN32) && defined (_MSC_VER) - #include "compat.h" -#include -#include -#include -#include +#if defined ( _MSC_VER ) +#include +#include + +#include + +#include + +#if ! defined( __INTEL_COMPILER ) /////////////////////////////////////////////////////////////////// // @@ -387,86 +390,6 @@ long double log2l(long double x) return 1.44269504088896340735992468100189214L * log(x); } -/////////////////////////////////////////////////////////////////// -// -// misc functions -// -/////////////////////////////////////////////////////////////////// - -/* -// This function is commented out because the Windows implementation should never call munmap. -// If it is calling it, we have a bug. Please file a bugzilla. -int munmap(void *addr, size_t len) -{ -// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html - - return (int)VirtualAlloc( (LPVOID)addr, len, - MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS ); -} -*/ - -uint64_t ReadTime( void ) -{ - LARGE_INTEGER current; - QueryPerformanceCounter(¤t); - return (uint64_t)current.QuadPart; -} - -double SubtractTime( uint64_t endTime, uint64_t startTime ) -{ - static double PerformanceFrequency = 0.0; - - if (PerformanceFrequency == 0.0) { - LARGE_INTEGER frequency; - QueryPerformanceFrequency(&frequency); - PerformanceFrequency = (double) frequency.QuadPart; - } - - return (double)(endTime - startTime) / PerformanceFrequency * 1e9; -} - -float make_nan() -{ -/* This is the IEEE 754 single-precision format: - unsigned int mantissa: 22; - unsigned int quiet_nan: 1; - unsigned int exponent: 8; - unsigned int negative: 1; -*/ - //const static unsigned - static const int32_t _nan = 0x7fc00000; - return *(const float*)(&_nan); -} - -float nanf( const char* str) -{ - cl_uint u = atoi( str ); - u |= 0x7fc00000U; - return *( float*)(&u); -} - - -double nan( const char* str) -{ - cl_ulong u = atoi( str ); - u |= 0x7ff8000000000000ULL; - return *( double*)(&u); -} - -// double check this implementatation -long double nanl( const char* str) -{ - union - { - long double f; - struct { cl_ulong m; cl_ushort sexp; }u; - }u; - u.u.sexp = 0x7fff; - u.u.m = 0x8000000000000000ULL | atoi( str ); - - return u.f; -} - double trunc(double x) { double absx = fabs(x); @@ -589,31 +512,6 @@ long double roundl(long double x) return x; } -// Added in _MSC_VER == 1800 (Visual Studio 2013) -#if _MSC_VER < 1800 -int signbit(double x) -{ - union - { - double f; - cl_ulong u; - }u; - u.f = x; - return u.u >> 63; -} -#endif - -int signbitf(float x) -{ - union - { - float f; - cl_uint u; - }u; - u.f = x; - return u.u >> 31; -} - float cbrtf( float x ) { float z = pow( fabs((double) x), 1.0 / 3.0 ); @@ -625,6 +523,175 @@ double cbrt( double x ) return copysign( pow( fabs( x ), 1.0 / 3.0 ), x ); } +long int lrint (double x) +{ + double absx = fabs(x); + + if( x >= (double) LONG_MAX ) + return LONG_MAX; + + if( absx < 4503599627370496.0 /* 0x1.0p52 */ ) + { + double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x ); + double rounded = x + magic; + rounded -= magic; + return (long int) rounded; + } + + return (long int) x; +} + +long int lrintf (float x) +{ + float absx = fabsf(x); + + if( x >= (float) LONG_MAX ) + return LONG_MAX; + + if( absx < 8388608.0f /* 0x1.0p23f */ ) + { + float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x ); + float rounded = x + magic; + rounded -= magic; + return (long int) rounded; + } + + return (long int) x; +} + + +/////////////////////////////////////////////////////////////////// +// +// fenv functions +// +/////////////////////////////////////////////////////////////////// + +int fetestexcept(int excepts) +{ + unsigned int status = _statusfp(); + return excepts & ( + ((status & _SW_INEXACT) ? FE_INEXACT : 0) | + ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) | + ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) | + ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) | + ((status & _SW_INVALID) ? FE_INVALID : 0) + ); +} + +int feclearexcept(int excepts) +{ + _clearfp(); + return 0; +} + +#endif // __INTEL_COMPILER + +#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 + +float make_nan() +{ +/* This is the IEEE 754 single-precision format: + unsigned int mantissa: 22; + unsigned int quiet_nan: 1; + unsigned int exponent: 8; + unsigned int negative: 1; +*/ + //const static unsigned + static const int32_t _nan = 0x7fc00000; + return *(const float*)(&_nan); +} + +float nanf( const char* str) +{ + cl_uint u = atoi( str ); + u |= 0x7fc00000U; + return *( float*)(&u); +} + + +double nan( const char* str) +{ + cl_ulong u = atoi( str ); + u |= 0x7ff8000000000000ULL; + return *( double*)(&u); +} + +// double check this implementatation +long double nanl( const char* str) +{ + union + { + long double f; + struct { cl_ulong m; cl_ushort sexp; }u; + }u; + u.u.sexp = 0x7fff; + u.u.m = 0x8000000000000000ULL | atoi( str ); + + return u.f; +} + +#endif + +/////////////////////////////////////////////////////////////////// +// +// misc functions +// +/////////////////////////////////////////////////////////////////// + +/* +// This function is commented out because the Windows implementation should never call munmap. +// If it is calling it, we have a bug. Please file a bugzilla. +int munmap(void *addr, size_t len) +{ +// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html + + return (int)VirtualAlloc( (LPVOID)addr, len, + MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS ); +} +*/ + +uint64_t ReadTime( void ) +{ + LARGE_INTEGER current; + QueryPerformanceCounter(¤t); + return (uint64_t)current.QuadPart; +} + +double SubtractTime( uint64_t endTime, uint64_t startTime ) +{ + static double PerformanceFrequency = 0.0; + + if (PerformanceFrequency == 0.0) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + PerformanceFrequency = (double) frequency.QuadPart; + } + + return (double)(endTime - startTime) / PerformanceFrequency * 1e9; +} + +int cf_signbit(double x) +{ + union + { + double f; + cl_ulong u; + }u; + u.f = x; + return u.u >> 63; +} + +int cf_signbitf(float x) +{ + union + { + float f; + cl_uint u; + }u; + u.f = x; + return u.u >> 31; +} + float int2float (int32_t ix) { union { @@ -645,7 +712,7 @@ int32_t float2int (float fx) return u.i; } -#if defined(_MSC_VER) && !defined(_WIN64) +#if !defined(_WIN64) /** Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. @@ -685,45 +752,10 @@ int __builtin_clz(unsigned int pattern) return count; } -#endif //defined(_MSC_VER) && !defined(_WIN64) +#endif // !defined(_WIN64) #include #include -long int lrint (double x) -{ - double absx = fabs(x); - - if( x >= (double) LONG_MAX ) - return LONG_MAX; - - if( absx < 4503599627370496.0 /* 0x1.0p52 */ ) - { - double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x ); - double rounded = x + magic; - rounded -= magic; - return (long int) rounded; - } - - return (long int) x; -} - -long int lrintf (float x) -{ - float absx = fabsf(x); - - if( x >= (float) LONG_MAX ) - return LONG_MAX; - - if( absx < 8388608.0f /* 0x1.0p23f */ ) - { - float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x ); - float rounded = x + magic; - rounded -= magic; - return (long int) rounded; - } - - return (long int) x; -} int usleep(int usec) { @@ -731,24 +763,10 @@ int usleep(int usec) return 0; } -#if _MSC_VER < 1900 -int fetestexcept(int excepts) +unsigned int sleep( unsigned int sec ) { - unsigned int status = _statusfp(); - return excepts & ( - ((status & _SW_INEXACT) ? FE_INEXACT : 0) | - ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) | - ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) | - ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) | - ((status & _SW_INVALID) ? FE_INVALID : 0) - ); -} - -int feclearexcept(int excepts) -{ - _clearfp(); + Sleep( sec * 1000 ); return 0; } -#endif -#endif //defined(_WIN32) +#endif // defined( _MSC_VER ) diff --git a/test_conformance/compatibility/test_common/harness/testHarness.c b/test_conformance/compatibility/test_common/harness/testHarness.c index 1994399d..af05c8d5 100644 --- a/test_conformance/compatibility/test_common/harness/testHarness.c +++ b/test_conformance/compatibility/test_common/harness/testHarness.c @@ -439,10 +439,18 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, /* If we have a device checking function, run it */ - if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS ) + if( ( deviceCheckFn != NULL ) ) { - test_finish(); - return -1; + test_status status = deviceCheckFn( device ); + switch (status) + { + case TEST_PASS: + break; + case TEST_FAIL: + return 1; + case TEST_SKIP: + return 0; + } } if (num_elements <= 0) diff --git a/test_conformance/compatibility/test_common/harness/testHarness.h b/test_conformance/compatibility/test_common/harness/testHarness.h index c2620647..7c1d9cb2 100644 --- a/test_conformance/compatibility/test_common/harness/testHarness.h +++ b/test_conformance/compatibility/test_common/harness/testHarness.h @@ -23,6 +23,13 @@ extern "C" { #endif +typedef enum test_status +{ + TEST_PASS = 0, + TEST_FAIL = 1, + TEST_SKIP = 2, +} test_status; + extern cl_uint gReSeed; extern cl_uint gRandomSeed; @@ -32,8 +39,8 @@ extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns, basefn fnList[], const char *fnNames[], int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps ); -// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits. -typedef int (*DeviceCheckFn)( cl_device_id device ); +// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits. +typedef test_status (*DeviceCheckFn)( cl_device_id device ); // Same as runTestHarness, but also supplies a function that checks the created device for required functionality. extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, diff --git a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt index 50dd64ef..4af76989 100644 --- a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt +++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt @@ -20,6 +20,7 @@ set(${MODULE_NAME}_SOURCES test_mem_object_info.cpp test_null_buffer_arg.c test_kernel_arg_info.c + test_queue_properties.cpp ../../test_common/harness/errorHelpers.c ../../test_common/harness/threadTesting.c ../../test_common/harness/testHarness.c diff --git a/test_conformance/compatibility/test_conformance/api/main.c b/test_conformance/compatibility/test_conformance/api/main.c index 1870bf8f..a8e9e993 100644 --- a/test_conformance/compatibility/test_conformance/api/main.c +++ b/test_conformance/compatibility/test_conformance/api/main.c @@ -113,6 +113,7 @@ basefn basefn_list[] = { test_get_image1d_info, test_get_image1d_array_info, test_get_image2d_array_info, + test_queue_properties, }; @@ -200,6 +201,7 @@ const char *basefn_names[] = { "get_image1d_info", "get_image1d_array_info", "get_image2d_array_info", + "queue_properties", }; ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); diff --git a/test_conformance/compatibility/test_conformance/api/procs.h b/test_conformance/compatibility/test_conformance/api/procs.h index ff796a38..e0a85356 100644 --- a/test_conformance/compatibility/test_conformance/api/procs.h +++ b/test_conformance/compatibility/test_conformance/api/procs.h @@ -105,4 +105,5 @@ extern int test_get_image1d_info( cl_device_id deviceID, cl_context context extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements ); extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements ); extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); +extern int test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); diff --git a/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp b/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp new file mode 100644 index 00000000..3b368077 --- /dev/null +++ b/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp @@ -0,0 +1,174 @@ +// +// Copyright (c) 2018 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + + +#include "testBase.h" +#include "../../test_common/harness/typeWrappers.h" +#include "../../test_common/harness/conversions.h" +#include +#include +#include + +using namespace std; +/* +The test against cl_khr_create_command_queue extension. It validates if devices with Opencl 1.X can use clCreateCommandQueueWithPropertiesKHR function. +Based on device capabilities test will create queue with NULL properties, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property and +CL_QUEUE_PROFILING_ENABLE property. Finally simple kernel will be executed on such queue. +*/ + +const char *queue_test_kernel[] = { +"__kernel void vec_cpy(__global int *src, __global int *dst)\n" +"{\n" +" int tid = get_global_id(0);\n" +"\n" +" dst[tid] = src[tid];\n" +"\n" +"}\n" }; + +int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop_def, cl_device_id deviceID, clKernelWrapper& kernel, size_t num_elements) +{ + clMemWrapper streams[2]; + int error; + std::vector buf(num_elements); + clCreateCommandQueueWithPropertiesKHR_fn clCreateCommandQueueWithPropertiesKHR = NULL; + cl_platform_id platform; + clEventWrapper event; + + error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + + clCreateCommandQueueWithPropertiesKHR = (clCreateCommandQueueWithPropertiesKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clCreateCommandQueueWithPropertiesKHR"); + if (clCreateCommandQueueWithPropertiesKHR == NULL) + { + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n"); + return -1; + } + + clCommandQueueWrapper queue = clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def, &error); + test_error(error, "clCreateCommandQueueWithPropertiesKHR failed"); + + for (int i = 0; i < num_elements; ++i) + { + buf[i] = i; + } + + streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf.data(), &error); + test_error( error, "clCreateBuffer failed." ); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error); + test_error( error, "clCreateBuffer failed." ); + + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error( error, "clSetKernelArg failed." ); + + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error( error, "clSetKernelArg failed." ); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, &event); + test_error( error, "clEnqueueNDRangeKernel failed." ); + + error = clWaitForEvents(1, &event); + test_error(error, "clWaitForEvents failed."); + + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements, buf.data(), 0, NULL, NULL); + test_error( error, "clEnqueueReadBuffer failed." ); + + for (int i = 0; i < num_elements; ++i) + { + if (buf[i] != i) + { + log_error("ERROR: Incorrect vector copy result."); + return -1; + } + } + + return 0; +} + +int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + if (num_elements <= 0) + { + num_elements = 128; + } + int error = 0; + + clProgramWrapper program; + clKernelWrapper kernel; + size_t strSize; + std::string strExt(0, '\0'); + cl_queue_properties_khr device_props = NULL; + cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 }; + + // Query extension + error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &strSize); + test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed"); + strExt.resize(strSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, strExt.size(), &strExt[0], NULL); + test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed"); + log_info("CL_DEVICE_EXTENSIONS:\n%s\n\n", strExt.c_str()); + + if (strExt.find("cl_khr_create_command_queue") == string::npos) + { + log_info("extension cl_khr_create_command_queue is not supported.\n"); + return 0; + } + + error = create_single_kernel_helper(context, &program, &kernel, 1, queue_test_kernel, "vec_cpy"); + test_error(error, "create_single_kernel_helper failed"); + + log_info("Queue property NULL. Testing ... \n"); + error = enqueue_kernel(context, NULL,deviceID, kernel, (size_t)num_elements); + test_error(error, "enqueue_kernel failed"); + + error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL); + test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed"); + + if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) + { + log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE supported. Testing ... \n"); + queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements); + test_error(error, "enqueue_kernel failed"); + } else + { + log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not supported \n"); + } + + if (device_props & CL_QUEUE_PROFILING_ENABLE) + { + log_info("Queue property CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n"); + queue_prop_def[1] = CL_QUEUE_PROFILING_ENABLE; + error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements); + test_error(error, "enqueue_kernel failed"); + } else + { + log_info("Queue property CL_QUEUE_PROFILING_ENABLE not supported \n"); + } + + if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE && device_props & CL_QUEUE_PROFILING_ENABLE) + { + log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n"); + queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE; + error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements); + test_error(error, "enqueue_kernel failed"); + } + else + { + log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE or CL_QUEUE_PROFILING_ENABLE not supported \n"); + } + + return 0; +} \ No newline at end of file diff --git a/test_conformance/compatibility/test_conformance/basic/run_array b/test_conformance/compatibility/test_conformance/basic/run_array old mode 100644 new mode 100755 diff --git a/test_conformance/compatibility/test_conformance/basic/run_array_image_copy b/test_conformance/compatibility/test_conformance/basic/run_array_image_copy old mode 100644 new mode 100755 diff --git a/test_conformance/compatibility/test_conformance/basic/run_image b/test_conformance/compatibility/test_conformance/basic/run_image old mode 100644 new mode 100755 diff --git a/test_conformance/compatibility/test_conformance/basic/run_multi_read_image b/test_conformance/compatibility/test_conformance/basic/run_multi_read_image old mode 100644 new mode 100755 diff --git a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp index 6db06398..dca52b5e 100644 --- a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp +++ b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp @@ -202,10 +202,10 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu log_error( "ERROR: Results of copy did not validate!\n" ); sprintf(values + strlen( values), "%d -> [", i); for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]); + sprintf(values + strlen( values), "%2x ", inchar[j]); sprintf(values + strlen(values), "] != ["); for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]); + sprintf(values + strlen( values), "%2x ", outchar[j]); sprintf(values + strlen(values), "]"); log_error("%s\n", values); diff --git a/test_conformance/compatibility/test_conformance/basic/test_sizeof.c b/test_conformance/compatibility/test_conformance/basic/test_sizeof.c index bece1b94..9c56bb58 100644 --- a/test_conformance/compatibility/test_conformance/basic/test_sizeof.c +++ b/test_conformance/compatibility/test_conformance/basic/test_sizeof.c @@ -133,7 +133,7 @@ const size_table vector_table[] = const char *ptr_table[] = { - "void*", + "global void*", "size_t", "sizeof(int)", // check return type of sizeof "ptrdiff_t" diff --git a/test_conformance/compatibility/test_conformance/images/image_helpers.h b/test_conformance/compatibility/test_conformance/images/image_helpers.h index d2132c7e..cd0eff6c 100644 --- a/test_conformance/compatibility/test_conformance/images/image_helpers.h +++ b/test_conformance/compatibility/test_conformance/images/image_helpers.h @@ -481,7 +481,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor * extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine ); extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler); extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter ); -extern int issubnormal(float); #define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) ) diff --git a/test_conformance/compiler/test_async_build.c b/test_conformance/compiler/test_async_build.c index e32c64ab..b4c08b5e 100644 --- a/test_conformance/compiler/test_async_build.c +++ b/test_conformance/compiler/test_async_build.c @@ -86,9 +86,8 @@ int test_async_build_pieces(cl_device_id deviceID, cl_context context, cl_comman return -1; } - clReleaseProgram( program ); + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); return 0; } - - diff --git a/test_conformance/compiler/test_build_helpers.c b/test_conformance/compiler/test_build_helpers.c index 0fcb7103..35875e04 100644 --- a/test_conformance/compiler/test_build_helpers.c +++ b/test_conformance/compiler/test_build_helpers.c @@ -172,7 +172,8 @@ int test_load_two_kernel_source(cl_device_id deviceID, cl_context context, cl_co /* Should probably check binary here to verify the same results... */ /* All done! */ - clReleaseProgram( program ); + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); return 0; } @@ -198,7 +199,8 @@ int test_load_null_terminated_source(cl_device_id deviceID, cl_context context, /* Should probably check binary here to verify the same results... */ /* All done! */ - clReleaseProgram( program ); + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); return 0; } @@ -224,7 +226,8 @@ int test_load_null_terminated_multi_line_source(cl_device_id deviceID, cl_contex /* Should probably check binary here to verify the same results... */ /* All done! */ - clReleaseProgram( program ); + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); return 0; } @@ -262,7 +265,8 @@ int test_load_discreet_length_source(cl_device_id deviceID, cl_context context, /* Should probably check binary here to verify the same results... */ /* All done! */ - clReleaseProgram( program ); + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); return 0; } @@ -297,7 +301,8 @@ int test_load_null_terminated_partial_multi_line_source(cl_device_id deviceID, c /* Should probably check binary here to verify the same results... */ /* All done! */ - clReleaseProgram( program ); + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); return 0; } @@ -393,7 +398,9 @@ int test_get_program_info(cl_device_id deviceID, cl_context context, cl_command_ return -1; } - clReleaseProgram( program ); + /* All done! */ + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); return 0; } @@ -445,8 +452,10 @@ int test_get_program_source(cl_device_id deviceID, cl_context context, cl_comman return -1; } - /* if we got here, everything passed */ - clReleaseProgram( program ); + /* All done! */ + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); + return 0; } @@ -493,26 +502,25 @@ int test_get_program_build_info(cl_device_id deviceID, cl_context context, cl_co error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); test_error( error, "Unable to get program build log length" ); - log_info("Build log is %ld long.\n", length); + log_info("Build log is %ld long.\n", length); - buffer = (char*)malloc(length); + buffer = (char*)malloc(length); /* Try normal source */ error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, length, buffer, NULL ); test_error( error, "Unable to get program build log" ); - if( buffer[length-1] != '\0' ) - { + if( buffer[length-1] != '\0' ) + { log_error( "clGetProgramBuildInfo overwrote allocated space for build log! '%c'\n", buffer[length-1] ); return -1; - } + } /* Try both at once */ error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, length, buffer, &newLength ); test_error( error, "Unable to get program build log" ); - free(buffer); - + free(buffer); /***** Build options *****/ error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &length ); @@ -530,8 +538,10 @@ int test_get_program_build_info(cl_device_id deviceID, cl_context context, cl_co free(buffer); - /* Try with a valid option */ - clReleaseProgram( program ); + /* Try with a valid option */ + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); + program = clCreateProgramWithSource( context, 1, sample_kernel_code_single_line, NULL, &error ); if( program == NULL ) { @@ -546,10 +556,10 @@ int test_get_program_build_info(cl_device_id deviceID, cl_context context, cl_co return -1; } - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, NULL, NULL, &length ); + error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, NULL, NULL, &length ); test_error( error, "Unable to get program build options" ); - buffer = (char*)malloc(length); + buffer = (char*)malloc(length); error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, length, buffer, NULL ); test_error( error, "Unable to get program build options" ); @@ -559,13 +569,11 @@ int test_get_program_build_info(cl_device_id deviceID, cl_context context, cl_co return -1; } - free(buffer); + /* All done */ + free( buffer ); + + error = clReleaseProgram( program ); + test_error( error, "Unable to release program object" ); - /* if we got here, everything passed */ - clReleaseProgram( program ); return 0; } - - - - diff --git a/test_conformance/compiler/test_compile.c b/test_conformance/compiler/test_compile.c index 926d69bd..dc0d0972 100644 --- a/test_conformance/compiler/test_compile.c +++ b/test_conformance/compiler/test_compile.c @@ -558,6 +558,9 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device free( simple_kernels ); free( headers ); + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( my_newly_minted_library ); test_error( error, "Unable to release program object" ); @@ -728,6 +731,9 @@ int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, cl_ } free( simple_kernels ); + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( my_newly_linked_program ); test_error( error, "Unable to release program object" ); @@ -895,6 +901,9 @@ int test_large_multiple_files_multiple_libraries(cl_context context, cl_device_i } free( simple_kernels ); + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( my_newly_linked_program ); test_error( error, "Unable to release program object" ); @@ -1032,6 +1041,9 @@ int test_large_multiple_files(cl_context context, cl_device_id deviceID, cl_comm } free( lines ); + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( my_newly_linked_program ); test_error( error, "Unable to release program object" ); @@ -1311,6 +1323,9 @@ int test_simple_embedded_header_compile(cl_device_id deviceID, cl_context contex error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); + error = clReleaseProgram( header ); + test_error( error, "Unable to release program object" ); + return 0; } @@ -1517,6 +1532,9 @@ int test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); + error = clReleaseProgram( header ); + test_error( error, "Unable to release program object" ); + error = clReleaseProgram( simple_program ); test_error( error, "Unable to release program object" ); @@ -1571,7 +1589,7 @@ int test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event}; - clLinkProgram(context, 1, &deviceID, NULL, 1, &program, simple_link_callback, (void*)&simple_link_user_data, &error); + cl_program my_linked_library = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, simple_link_callback, (void*)&simple_link_user_data, &error); test_error( error, "Unable to link a simple program" ); error = clWaitForEvents(1, &link_program_completion_event); @@ -1584,6 +1602,9 @@ int test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); + error = clReleaseProgram( my_linked_library ); + test_error( error, "Unable to release program object" ); + return 0; } @@ -1738,6 +1759,9 @@ int test_execute_after_simple_compile_and_link(cl_device_id deviceID, cl_context return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -1774,6 +1798,9 @@ int test_execute_after_simple_compile_and_link_no_device_info(cl_device_id devic return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -1810,6 +1837,9 @@ int test_execute_after_simple_compile_and_link_with_defines(cl_device_id deviceI return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -1887,6 +1917,9 @@ int test_execute_after_serialize_reload_object(cl_device_id deviceID, cl_context return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -1991,6 +2024,12 @@ int test_execute_after_serialize_reload_library(cl_device_id deviceID, cl_contex return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + + error = clReleaseKernel( another_kernel ); + test_error( error, "Unable to release another kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -2065,7 +2104,8 @@ int test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id devic error = clWaitForEvents(1, &compile_program_completion_event); test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event"); - clReleaseEvent(compile_program_completion_event); + error = clReleaseEvent(compile_program_completion_event); + test_error( error, "Unable to release event object" ); link_program_completion_event = clCreateUserEvent(context, &error); test_error( error, "Unable to create a user event"); @@ -2077,7 +2117,8 @@ int test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id devic error = clWaitForEvents(1, &link_program_completion_event); test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event"); - clReleaseEvent(link_program_completion_event); + error = clReleaseEvent(link_program_completion_event); + test_error( error, "Unable to release event object" ); cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); test_error( error, "Unable to create a simple kernel" ); @@ -2087,6 +2128,9 @@ int test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id devic return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -2264,6 +2308,12 @@ int test_execute_after_simple_library_with_link(cl_device_id deviceID, cl_contex return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + + error = clReleaseKernel( another_kernel ); + test_error( error, "Unable to release another kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -2368,6 +2418,12 @@ int test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + + error = clReleaseKernel( another_kernel ); + test_error( error, "Unable to release another kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -2433,9 +2489,18 @@ int test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context co return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + + error = clReleaseKernel( another_kernel ); + test_error( error, "Unable to release another kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); + error = clReleaseProgram( header ); + test_error( error, "Unable to release program object" ); + error = clReleaseProgram( simple_program ); test_error( error, "Unable to release program object" ); @@ -2563,6 +2628,12 @@ int test_execute_after_included_header_link(cl_device_id deviceID, cl_context co return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + + error = clReleaseKernel( another_kernel ); + test_error( error, "Unable to release another kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -2803,6 +2874,12 @@ int test_program_binary_type(cl_device_id deviceID, cl_context context, cl_comma return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + + error = clReleaseKernel( another_kernel ); + test_error( error, "Unable to release another kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); @@ -3107,6 +3184,9 @@ int test_large_compile_and_link_status_options_log(cl_context context, cl_device return error; /* All done! */ + error = clReleaseKernel( kernel ); + test_error( error, "Unable to release kernel object" ); + error = clReleaseProgram( program ); test_error( error, "Unable to release program object" ); diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index a262d019..81e05232 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -55,6 +55,8 @@ const char *known_extensions[] = { "cl_khr_egl_event", "cl_khr_throttle_hints", "cl_khr_priority_hints", + "cl_khr_create_command_queue", + "cl_khr_il_program", }; size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*); @@ -413,6 +415,10 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context free(extensions_supported[i]); } free(extensions); + if( defines ) { + error = clReleaseMemObject( defines ); + test_error( error, "Unable to release memory object" ); + } if (total_errors) return -1; diff --git a/test_conformance/compiler/test_image_macro.c b/test_conformance/compiler/test_image_macro.c index 329a017c..f85f092f 100644 --- a/test_conformance/compiler/test_image_macro.c +++ b/test_conformance/compiler/test_image_macro.c @@ -89,7 +89,13 @@ int test_image_macro(cl_device_id deviceID, cl_context context, cl_command_queue log_info("CL_DEVICE_IMAGE_SUPPORT not set, __IMAGE_SUPPORT__ macro not set \n"); } - clReleaseProgram( program ); + status = clReleaseProgram( program ); + if( status ) + { + log_error ("Unable to release program object, [%d] \n", status ); + return status; + } + return status; } diff --git a/test_conformance/computeinfo/main.c b/test_conformance/computeinfo/main.c index 8126c95a..71bb1978 100644 --- a/test_conformance/computeinfo/main.c +++ b/test_conformance/computeinfo/main.c @@ -227,6 +227,10 @@ config_info config_infos[] = CONFIG_INFO( 2, 0, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT , cl_uint), CONFIG_INFO( 2, 0, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities), + + CONFIG_INFO( 2, 1, CL_DEVICE_IL_VERSION, string), + CONFIG_INFO( 2, 1, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint), + CONFIG_INFO( 2, 1, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, cl_uint), }; #define ENTRY(T) { T, #T } diff --git a/test_conformance/contractions/contractions.c b/test_conformance/contractions/contractions.c index 164fde6c..64502edb 100644 --- a/test_conformance/contractions/contractions.c +++ b/test_conformance/contractions/contractions.c @@ -401,6 +401,8 @@ static void PrintArch( void ) vlog( "\tARCH:\tx86_64\n" ); #elif defined( __arm__ ) vlog( "\tARCH:\tarm\n" ); +#elif defined( __aarch64__ ) + vlog( "\tARCH:\taarch64\n" ); #else vlog( "\tARCH:\tunknown\n" ); #endif diff --git a/test_conformance/conversions/basic_test_conversions.c b/test_conformance/conversions/basic_test_conversions.c index 73f38633..bfd31b27 100644 --- a/test_conformance/conversions/basic_test_conversions.c +++ b/test_conformance/conversions/basic_test_conversions.c @@ -751,16 +751,11 @@ static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) (( static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; } static void ulong2float( void *out, void *in) { -#if defined(_MSC_VER) +#if defined(_MSC_VER) && defined(_M_X64) cl_ulong l = ((cl_ulong*) in)[0]; float result; - cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; -#if defined(_M_X64) _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); -#else - result = sl; -#endif ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result)); #else cl_ulong l = ((cl_ulong*) in)[0]; diff --git a/test_conformance/conversions/test_conversions.c b/test_conformance/conversions/test_conversions.c index fc6f6315..62c709e0 100644 --- a/test_conformance/conversions/test_conversions.c +++ b/test_conformance/conversions/test_conversions.c @@ -98,6 +98,7 @@ cl_mem gOutBuffers[ kCallStyleCount ]; size_t gComputeDevices = 0; uint32_t gDeviceFrequency = 0; int gWimpyMode = 0; +int gWimpyReductionFactor = 128; int gSkipTesting = 0; int gForceFTZ = 0; int gMultithread = 1; @@ -418,6 +419,27 @@ static int ParseArgs( int argc, const char **argv ) case 'w': gWimpyMode ^= 1; break; + case '[': + // wimpy reduction factor can be set with the option -[2^n] + // Default factor is 128, and n practically can be from 1 to 12 + { + const char *arg_temp = strchr(&arg[1], ']'); + if (arg_temp != 0) + { + int new_factor = atoi(&arg[1]); + arg = arg_temp; // Advance until ']' + if (new_factor && !(new_factor & (new_factor - 1))) + { + vlog(" WimpyReduction factor changed from %d to %d \n", gWimpyReductionFactor, new_factor); + gWimpyReductionFactor = new_factor; + } + else + { + vlog(" Error in WimpyReduction factor, must be power of 2 \n"); + } + } + } + break; case 'z': gForceFTZ ^= 1; break; @@ -520,6 +542,7 @@ static int ParseArgs( int argc, const char **argv ) vlog( "*** WARNING: Testing in Wimpy mode! ***\n" ); vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" ); vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" ); + vlog("*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor); } return 0; @@ -546,6 +569,7 @@ static void PrintUsage( void ) vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" ); vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" ); vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" ); + vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor); vlog( "\t\t-z\tToggle flush to zero mode (Default: per device)\n" ); vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" ); vlog( "\n" ); @@ -1245,15 +1269,12 @@ static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode r if ( !gWimpyMode && gIsEmbedded ) step = blockCount * EMBEDDED_REDUCTION_FACTOR; + if ( gWimpyMode ) + step = (size_t)blockCount * (size_t)gWimpyReductionFactor; vlog( "Testing... " ); fflush(stdout); for( i = 0; i < (uint64_t)lastCase; i += step ) { - if (gWimpyMode) { - uint64_t blockIndex = (i / blockCount) & 0xFF; - if (blockIndex != 0 && blockIndex != 0xFF) - continue; - } if( 0 == ( i & ((lastCase >> 3) -1))) { vlog("."); diff --git a/test_conformance/d3d10/harness.cpp b/test_conformance/d3d10/harness.cpp index e93cee94..016d9e72 100644 --- a/test_conformance/d3d10/harness.cpp +++ b/test_conformance/d3d10/harness.cpp @@ -200,7 +200,7 @@ cl_int HarnessD3D10_CreateDevice(IDXGIAdapter* pAdapter, ID3D10Device **ppDevice pAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, - D3D10_CREATE_DEVICE_DEBUG, + 0, D3D10_SDK_VERSION, &sd, &HarnessD3D10_pSwapChain, diff --git a/test_conformance/device_partition/Jamfile b/test_conformance/device_partition/Jamfile index 0e1d4d6a..3bf4b29f 100644 --- a/test_conformance/device_partition/Jamfile +++ b/test_conformance/device_partition/Jamfile @@ -1,32 +1,32 @@ -project - : requirements - gcc:-xc++ - msvc:"/TP" - ; - -exe test_device_partition - : main.c - test_device_partition.cpp - ../../test_common/harness/errorHelpers.c - ../../test_common/harness/threadTesting.c - ../../test_common/harness/testHarness.c - ../../test_common/harness/kernelHelpers.c - ../../test_common/harness/genericThread.cpp - ../../test_common/harness/mt19937.c - ../../test_common/harness/conversions.c - ../../test_common/harness/typeWrappers.cpp - : windows:../../test_common/harness/msvc9.c - ; - -install dist - : test_device_partition - : debug:$(DIST)/debug/tests/conformance/1.2/x86/device_partition - release:$(DIST)/release/tests/conformance/1.2/x86/device_partition - ; - -install dist - : test_device_partition - : debug:$(DIST)/debug/tests/conformance/1.2/x86_64/device_partition - release:$(DIST)/release/tests/conformance/1.2/x86_64/device_partition - 64 - ; +project + : requirements + gcc:-xc++ + msvc:"/TP" + ; + +exe test_device_partition + : main.c + test_device_partition.cpp + ../../test_common/harness/errorHelpers.c + ../../test_common/harness/threadTesting.c + ../../test_common/harness/testHarness.c + ../../test_common/harness/kernelHelpers.c + ../../test_common/harness/genericThread.cpp + ../../test_common/harness/mt19937.c + ../../test_common/harness/conversions.c + ../../test_common/harness/typeWrappers.cpp + : windows:../../test_common/harness/msvc9.c + ; + +install dist + : test_device_partition + : debug:$(DIST)/debug/tests/conformance/1.2/x86/device_partition + release:$(DIST)/release/tests/conformance/1.2/x86/device_partition + ; + +install dist + : test_device_partition + : debug:$(DIST)/debug/tests/conformance/1.2/x86_64/device_partition + release:$(DIST)/release/tests/conformance/1.2/x86_64/device_partition + 64 + ; diff --git a/test_conformance/device_timer/main.c b/test_conformance/device_timer/main.c index c3cb2611..7ddeadad 100644 --- a/test_conformance/device_timer/main.c +++ b/test_conformance/device_timer/main.c @@ -32,8 +32,7 @@ basefn basefn_list[] = { const char *basefn_names[] = { "test_timer_resolution_queries", - "test_device_and_host_timers", - "all" + "test_device_and_host_timers" }; size_t num_fns = sizeof(basefn_names)/sizeof(basefn_names[0]); diff --git a/test_conformance/geometrics/test_geometrics_double.cpp b/test_conformance/geometrics/test_geometrics_double.cpp index 2d258d5d..34bd1933 100644 --- a/test_conformance/geometrics/test_geometrics_double.cpp +++ b/test_conformance/geometrics/test_geometrics_double.cpp @@ -203,7 +203,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command return -1; /* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */ - for( i = 0; i < TEST_SIZE * vecsize; i++ ) + for( i = 0; i < size * vecsize; i++ ) { inDataA[ i ] = get_random_double( -512.f, 512.f, d ); inDataB[ i ] = get_random_double( -512.f, 512.f, d ); @@ -237,7 +237,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command } /* Run the kernel */ - threads[0] = TEST_SIZE; + threads[0] = size; error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); test_error( error, "Unable to get work group size to use" ); diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp index fa0c00a6..0d0e5c7e 100644 --- a/test_conformance/gl/test_image_methods.cpp +++ b/test_conformance/gl/test_image_methods.cpp @@ -34,7 +34,8 @@ typedef struct image_kernel_data cl_int numSamples; }; -static const char *methodTestKernelPattern = +static const char *methodTestKernelPattern = +"%s" "typedef struct {\n" " int width;\n" " int height;\n" @@ -75,6 +76,8 @@ static const char *channelOrderConstLine = " outData->expectedChannelOrder = CLK_%s;\n"; static const char *numSamplesKernelLine = " outData->numSamples = get_image_num_samples( input );\n"; +static const char *enableMSAAKernelLine = +"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"; static int verify(cl_int input, cl_int kernelOutput, const char * description) { @@ -185,6 +188,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma bool doImageChannelOrder = false; bool doImageDim = false; bool doNumSamples = false; + bool doMSAA = false; switch(target) { case GL_TEXTURE_2D: imageType = "image2d_depth_t"; @@ -206,6 +210,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma break; case GL_TEXTURE_2D_MULTISAMPLE: doNumSamples = true; + doMSAA = true; if(format.formattype == GL_DEPTH_COMPONENT) { doImageWidth = true; imageType = "image2d_msaa_depth_t"; @@ -214,6 +219,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma } break; case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + doMSAA = true; if(format.formattype == GL_DEPTH_COMPONENT) { doImageWidth = true; imageType = "image2d_msaa_array_depth_t"; @@ -244,9 +250,11 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma } } - // Create a program to run against - sprintf( programSrc, methodTestKernelPattern, - imageType, + // Create a program to run against + sprintf(programSrc, + methodTestKernelPattern, + ( doMSAA ) ? enableMSAAKernelLine : "", + imageType, ( doArraySize ) ? arraySizeKernelLine : "", ( doImageWidth ) ? imageWidthKernelLine : "", ( doImageHeight ) ? imageHeightKernelLine : "", diff --git a/test_conformance/gl/test_images_read_common.cpp b/test_conformance/gl/test_images_read_common.cpp index 068b9a29..1d0529d5 100644 --- a/test_conformance/gl/test_images_read_common.cpp +++ b/test_conformance/gl/test_images_read_common.cpp @@ -107,6 +107,7 @@ static const char *kernelpattern_image_read_2darray_depth = "}\n"; static const char *kernelpattern_image_multisample_read_2d = +"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" "__kernel void sample_test( read_only image2d_msaa_t source, sampler_t sampler, __global %s4 *results )\n" "{\n" " int tidX = get_global_id(0);\n" @@ -121,6 +122,7 @@ static const char *kernelpattern_image_multisample_read_2d = "}\n"; static const char *kernelpattern_image_multisample_read_2d_depth = + "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" "__kernel void sample_test( read_only image2d_msaa_depth_t source, sampler_t sampler, __global %s *results )\n" "{\n" " int tidX = get_global_id(0);\n" @@ -135,6 +137,7 @@ static const char *kernelpattern_image_multisample_read_2d_depth = "}\n"; static const char *kernelpattern_image_multisample_read_2darray = +"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" "__kernel void sample_test( read_only image2d_array_msaa_t source, sampler_t sampler, __global %s4 *results )\n" "{\n" " int tidX = get_global_id(0);\n" @@ -151,6 +154,7 @@ static const char *kernelpattern_image_multisample_read_2darray = "}\n"; static const char *kernelpattern_image_multisample_read_2darray_depth = + "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" "__kernel void sample_test( read_only image2d_array_msaa_depth_t source, sampler_t sampler, __global %s *results )\n" "{\n" " int tidX = get_global_id(0);\n" diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp index 7b91f585..1345f46f 100644 --- a/test_conformance/gl/test_images_write_common.cpp +++ b/test_conformance/gl/test_images_write_common.cpp @@ -333,8 +333,8 @@ int test_cl_image_write( cl_context context, cl_command_queue queue, get_explicit_type_name( *outType ), suffix, convert); programPtr = kernelSource; - if( create_single_kernel_helper( context, &program, &kernel, 1, - (const char **)&programPtr, "sample_test" ) ) + if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, + (const char **)&programPtr, "sample_test", "-cl-std=CL2.0" ) ) { return -1; } diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp index e7aeb938..92d71238 100644 --- a/test_conformance/gles/main.cpp +++ b/test_conformance/gles/main.cpp @@ -386,17 +386,17 @@ int main(int argc, const char *argv[]) // Intentional falling through cleanup: - // Cleanup EGL - glEnv->terminate_egl_display(); - // Always make sure that OpenCL context is released properly when the test exit if(sCurrentContext) { clReleaseContext( sCurrentContext ); sCurrentContext = NULL; } + + // Cleanup EGL + glEnv->terminate_egl_display(); + delete glEnv; - return error; -} \ No newline at end of file +} diff --git a/test_conformance/half/Test_roundTrip.c b/test_conformance/half/Test_roundTrip.c index 18eb5f9f..0d257b60 100644 --- a/test_conformance/half/Test_roundTrip.c +++ b/test_conformance/half/Test_roundTrip.c @@ -162,7 +162,7 @@ int Test_roundTrip( void ) // Figure out how many elements are in a work block size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); - size_t blockCount = (size_t)getBufferSize(gDevice) / elementSize; //elementSize is a power of two + size_t blockCount = (size_t)gBufferSize / elementSize; //elementSize is a power of two uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half size_t stride = blockCount; diff --git a/test_conformance/half/Test_vLoadHalf.c b/test_conformance/half/Test_vLoadHalf.c index 3ca8a203..d067fcdc 100644 --- a/test_conformance/half/Test_vLoadHalf.c +++ b/test_conformance/half/Test_vLoadHalf.c @@ -453,7 +453,7 @@ int Test_vLoadHalf_private( bool aligned ) // Figure out how many elements are in a work block size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); - size_t blockCount = getBufferSize(gDevice) / elementSize; // elementSize is power of 2 + size_t blockCount = gBufferSize / elementSize; // elementSize is power of 2 uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half // we handle 64-bit types a bit differently. @@ -503,7 +503,7 @@ int Test_vLoadHalf_private( bool aligned ) continue; } */ - memset_pattern4( gOut_single, &pattern, getBufferSize(gDevice)); + memset_pattern4( gOut_single, &pattern, gBufferSize); if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) ) { vlog_error( "Failure in clWriteArray\n" ); diff --git a/test_conformance/half/Test_vStoreHalf.c b/test_conformance/half/Test_vStoreHalf.c index a3117bcb..4b9193d4 100644 --- a/test_conformance/half/Test_vStoreHalf.c +++ b/test_conformance/half/Test_vStoreHalf.c @@ -1044,7 +1044,7 @@ int Test_vStoreHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const c size_t stride = blockCount; if (gWimpyMode) - stride = 0x10000000U; + stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor; // we handle 64-bit types a bit differently. if( lastCase == 0 ) @@ -1654,7 +1654,7 @@ int Test_vStoreaHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const size_t stride = blockCount; if (gWimpyMode) - stride = 0x10000000U; + stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor; // we handle 64-bit types a bit differently. if( lastCase == 0 ) diff --git a/test_conformance/half/cl_utils.c b/test_conformance/half/cl_utils.c index 939cddf3..74408678 100644 --- a/test_conformance/half/cl_utils.c +++ b/test_conformance/half/cl_utils.c @@ -61,9 +61,11 @@ size_t gWorkGroupSize = 0; int gTestCount = 0; int gFailCount = 0; bool gWimpyMode = false; +int gWimpyReductionFactor = 512; int gTestDouble = 0; uint32_t gDeviceIndex = 0; int gIsEmbedded = 0; +size_t gBufferSize = 0; #if defined( __APPLE__ ) int gReportTimes = 1; @@ -184,17 +186,19 @@ int InitCL( void ) #if defined( __APPLE__ ) // FIXME: use clProtectedArray #endif + gBufferSize = getBufferSize(gDevice); + //Allocate buffers - gIn_half = malloc( getBufferSize(gDevice)/2 ); + gIn_half = malloc( gBufferSize/2 ); gOut_half = malloc( BUFFER_SIZE/2 ); gOut_half_reference = malloc( BUFFER_SIZE/2 ); gOut_half_reference_double = malloc( BUFFER_SIZE/2 ); gIn_single = malloc( BUFFER_SIZE ); - gOut_single = malloc( getBufferSize(gDevice) ); - gOut_single_reference = malloc( getBufferSize(gDevice) ); + gOut_single = malloc( gBufferSize ); + gOut_single_reference = malloc( gBufferSize ); gIn_double = malloc( 2*BUFFER_SIZE ); - // gOut_double = malloc( (2*getBufferSize(gDevice)) ); - // gOut_double_reference = malloc( (2*getBufferSize(gDevice)) ); + // gOut_double = malloc( (2*gBufferSize) ); + // gOut_double_reference = malloc( (2*gBufferSize) ); if ( NULL == gIn_half || NULL == gOut_half || @@ -207,7 +211,7 @@ int InitCL( void ) ) return -3; - gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(gDevice) / 2, NULL, &error); + gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, gBufferSize / 2, NULL, &error); if( gInBuffer_half == NULL ) { vlog_error( "clCreateArray failed for input (%d)\n", error ); @@ -235,7 +239,7 @@ int InitCL( void ) return -5; } - gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(gDevice), NULL, &error ); + gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, gBufferSize, NULL, &error ); if( gOutBuffer_single == NULL ) { vlog_error( "clCreateArray failed for output (%d)\n", error ); @@ -243,7 +247,7 @@ int InitCL( void ) } #if 0 - gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(gDevice)), NULL, &error ); + gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*gBufferSize), NULL, &error ); if( gOutBuffer_double == NULL ) { vlog_error( "clCreateArray failed for output (%d)\n", error ); @@ -317,6 +321,15 @@ void ReleaseCL(void) // clReleaseMemObject(gOutBuffer_double); clReleaseCommandQueue(gQueue); clReleaseContext(gContext); + + free(gIn_half); + free(gOut_half); + free(gOut_half_reference); + free(gOut_half_reference_double); + free(gIn_single); + free(gOut_single); + free(gOut_single_reference); + free(gIn_double); } cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) { @@ -453,19 +466,30 @@ size_t getBufferSize(cl_device_id device_id) if(s_initialized == 0 || s_device_id != device_id) { - cl_ulong result; + cl_ulong result, maxGlobalSize; cl_int err = clGetDeviceInfo (device_id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(result), (void *)&result, NULL); if(err) { - vlog_error("clGetDeviceInfo() failed\n"); + vlog_error("clGetDeviceInfo(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) failed\n"); s_result = 64*1024; goto exit; } - result = result / 2; log_info("Const buffer size is %llx (%llu)\n", result, result); + err = clGetDeviceInfo (device_id, + CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(maxGlobalSize), (void *)&maxGlobalSize, + NULL); + if(err) + { + vlog_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed\n"); + goto exit; + } + result = result / 2; + if(maxGlobalSize < result * 10) + result = result / 10; s_initialized = 1; s_device_id = device_id; s_result = result; diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h index 9a720efd..cba58072 100644 --- a/test_conformance/half/cl_utils.h +++ b/test_conformance/half/cl_utils.h @@ -73,11 +73,13 @@ extern int gFailCount; extern int gTestDouble; extern int gReportTimes; extern int gIsEmbedded; +extern size_t gBufferSize; // gWimpyMode indicates if we run the test in wimpy mode where we limit the // size of 32 bit ranges to a much smaller set. This is meant to be used // as a smoke test extern bool gWimpyMode; +extern int gWimpyReductionFactor; uint64_t ReadTime( void ); double SubtractTime( uint64_t endTime, uint64_t startTime ); diff --git a/test_conformance/half/main.c b/test_conformance/half/main.c index fe3b6f17..718153b1 100644 --- a/test_conformance/half/main.c +++ b/test_conformance/half/main.c @@ -236,7 +236,27 @@ static int ParseArgs( int argc, const char **argv ) case 'w': // Wimpy mode gWimpyMode = true; break; - + case '[': + // wimpy reduction factor can be set with the option -[2^n] + // Default factor is 512, and n practically can be from 1 to 12 + { + const char *arg_temp = strchr(&arg[1], ']'); + if (arg_temp != 0) + { + int new_factor = atoi(&arg[1]); + arg = arg_temp; // Advance until ']' + if (new_factor && !(new_factor & (new_factor - 1))) + { + vlog(" WimpyReduction factor changed from %d to %d \n", gWimpyReductionFactor, new_factor); + gWimpyReductionFactor = new_factor; + } + else + { + vlog(" Error in WimpyReduction factor, must be power of 2 \n"); + } + } + } + break; default: vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg ); PrintUsage(); @@ -278,6 +298,7 @@ static int ParseArgs( int argc, const char **argv ) vlog( "*** WARNING: Testing in Wimpy mode! ***\n" ); vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" ); vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" ); + vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor); } return 0; } @@ -288,6 +309,7 @@ static void PrintUsage( void ) vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" ); vlog( "\t\t-t\tToggle reporting performance data.\n" ); vlog( "\t\t-w\tRun in wimpy mode\n" ); + vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor); vlog( "\t\t-h\tHelp\n" ); vlog( "\n" ); } @@ -307,6 +329,8 @@ static void PrintArch( void ) vlog( "ARCH:\tx86_64\n" ); #elif defined( __arm__ ) vlog( "ARCH:\tarm\n" ); +#elif defined( __aarch64__ ) + vlog( "\tARCH:\taarch64\n" ); #else #error unknown arch #endif diff --git a/test_conformance/headers/CMakeLists.txt b/test_conformance/headers/CMakeLists.txt index eff8e286..4e7a1d86 100644 --- a/test_conformance/headers/CMakeLists.txt +++ b/test_conformance/headers/CMakeLists.txt @@ -19,6 +19,8 @@ set(CL_H_SOURCES test_cl.h.c ) +set_source_files_properties(${CL_H_SOURCES} PROPERTIES LANGUAGE CXX) + set(CL_H_OUT ${CONFORMANCE_PREFIX}cl_h${CONFORMANCE_SUFFIX}) add_executable( @@ -36,6 +38,8 @@ set(CL_PLATFORM_H_SOURCES test_cl_platform.h.c ) +set_source_files_properties(${CL_PLATFORM_H_SOURCES} PROPERTIES LANGUAGE CXX) + set(CL_PLATFORM_H_OUT ${CONFORMANCE_PREFIX}cl_platform_h${CONFORMANCE_SUFFIX}) add_executable( @@ -53,6 +57,8 @@ set(CL_GL_H_SOURCES test_cl_gl.h.c ) +set_source_files_properties(${CL_GL_H_SOURCES} PROPERTIES LANGUAGE CXX) + set(CL_GL_H_OUT ${CONFORMANCE_PREFIX}cl_gl_h${CONFORMANCE_SUFFIX}) add_executable( @@ -70,6 +76,8 @@ set(OPENCL_H_SOURCES test_opencl.h.c ) +set_source_files_properties(${OPENCL_H_SOURCES} PROPERTIES LANGUAGE CXX) + set(OPENCL_H_OUT ${CONFORMANCE_PREFIX}opencl_h${CONFORMANCE_SUFFIX}) add_executable( @@ -83,5 +91,4 @@ TARGET_LINK_LIBRARIES(${OPENCL_H_OUT} ${CLConform_LIBRARIES}) ######################################################################################## - # end of file # diff --git a/test_conformance/math_brute_force/binary.c b/test_conformance/math_brute_force/binary.c index 5800aaf5..042076ec 100644 --- a/test_conformance/math_brute_force/binary.c +++ b/test_conformance/math_brute_force/binary.c @@ -233,6 +233,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps @@ -268,6 +269,16 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -341,7 +352,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter) // Run the kernels if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) @@ -991,6 +1002,16 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; @@ -1063,7 +1084,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) diff --git a/test_conformance/math_brute_force/binaryOperator.c b/test_conformance/math_brute_force/binaryOperator.c index 718afd3d..0368aafc 100644 --- a/test_conformance/math_brute_force/binaryOperator.c +++ b/test_conformance/math_brute_force/binaryOperator.c @@ -207,6 +207,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps @@ -260,6 +261,16 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d) } test_info.step = test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -329,7 +340,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) @@ -501,63 +512,51 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount; int indx = (totalSpecialValueCount - 1) / buffer_elements; - if( job_id <= (cl_uint)indx ) - { // test edge cases - float *fp = (float *)p; - float *fp2 = (float *)p2; + + if( job_id <= (cl_uint)indx ) { + // Insert special values uint32_t x, y; - x = (job_id * buffer_elements) % specialValuesFloatCount; - y = (job_id * buffer_elements) / specialValuesFloatCount; + x = (job_id * buffer_elements) % specialValuesFloatCount; + y = (job_id * buffer_elements) / specialValuesFloatCount; - for( ; j < buffer_elements; j++ ) - { - fp[j] = specialValuesFloat[x]; - fp2[j] = specialValuesFloat[y]; - if( ++x >= specialValuesFloatCount ) - { + for( ; j < buffer_elements; j++ ) { + p[j] = ((cl_uint *)specialValuesFloat)[x]; + p2[j] = ((cl_uint *)specialValuesFloat)[y]; + ++x; + if (x >= specialValuesFloatCount) { x = 0; y++; - if( y >= specialValuesFloatCount ) + if (y >= specialValuesFloatCount) break; } - if(gTestFastRelaxed && strcmp(name,"divide") == 0 ) - { - float fpj = *(float*)&fp[j]; - float fpj2 = *(float*)&fp2[j]; - if(fabs(fpj) > 0x5E800000 ) //[2^-62,2^62] - { - fp[j] = NAN; - } - if( fabs(fpj2) > 0x5E800000 ) //[2^-62,2^62] - { - fp2[j] = NAN; - } + if (gTestFastRelaxed && strcmp(name,"divide") == 0) { + cl_uint pj = p[j] & 0x7fffffff; + cl_uint p2j = p2[j] & 0x7fffffff; + // Replace values outside [2^-62, 2^62] with QNaN + if (pj < 0x20800000 || pj > 0x5e800000) + p[j] = 0x7fc00000; + if (p2j < 0x20800000 || p2j > 0x5e800000) + p2[j] = 0x7fc00000; + } } } - } - //Init any remaining values. + // Init any remaining values. for( ; j < buffer_elements; j++ ) { p[j] = genrand_int32(d); p2[j] = genrand_int32(d); - if(gTestFastRelaxed) - { - if( strcmp(name,"divide")==0){ - float pj = *(float*)&p[j]; - float pj2 = *(float*)&p2[j]; - if(fabs(pj) > 0x5E800000 ) //[2^-62,2^62] - { - p[j] = NAN; - } - if( fabs(pj2) > 0x5E800000 ) //[2^-62,2^62] - { - p2[j] = NAN; - } - } - } + if (gTestFastRelaxed && strcmp(name,"divide") == 0) { + cl_uint pj = p[j] & 0x7fffffff; + cl_uint p2j = p2[j] & 0x7fffffff; + // Replace values outside [2^-62, 2^62] with QNaN + if (pj < 0x20800000 || pj > 0x5e800000) + p[j] = 0x7fc00000; + if (p2j < 0x20800000 || p2j > 0x5e800000) + p2[j] = 0x7fc00000; + } } if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) @@ -950,6 +949,16 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d) } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; @@ -1020,7 +1029,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) diff --git a/test_conformance/math_brute_force/binary_i.c b/test_conformance/math_brute_force/binary_i.c index b72d117f..5cea1bc6 100644 --- a/test_conformance/math_brute_force/binary_i.c +++ b/test_conformance/math_brute_force/binary_i.c @@ -230,6 +230,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps @@ -262,6 +263,16 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -330,7 +341,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d) } // Run the kernels - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors @@ -758,6 +769,16 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; @@ -831,7 +852,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d) // Run the kernels if( !gSkipCorrectnessTesting ) - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors diff --git a/test_conformance/math_brute_force/macro_binary.c b/test_conformance/math_brute_force/macro_binary.c index e489c598..b65948ee 100644 --- a/test_conformance/math_brute_force/macro_binary.c +++ b/test_conformance/math_brute_force/macro_binary.c @@ -222,6 +222,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode @@ -249,6 +250,16 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -319,7 +330,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d) // Run the kernels if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); if( error ) goto exit; @@ -749,6 +760,16 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d) } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ftz = f->ftz || gForceFTZ; @@ -820,7 +841,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); if( error ) goto exit; diff --git a/test_conformance/math_brute_force/macro_unary.c b/test_conformance/math_brute_force/macro_unary.c index 74e1dece..95af882a 100644 --- a/test_conformance/math_brute_force/macro_unary.c +++ b/test_conformance/math_brute_force/macro_unary.c @@ -193,6 +193,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode @@ -220,6 +221,16 @@ int TestMacro_Int_Float(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); // cl_kernels aren't thread safe, so we make one for each vector size for every thread @@ -279,7 +290,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); if( error ) goto exit; @@ -602,6 +613,16 @@ int TestMacro_Int_Double(const Func *f, MTdata d) } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ftz = f->ftz || gForceFTZ; @@ -664,7 +685,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); if( error ) goto exit; diff --git a/test_conformance/math_brute_force/main.c b/test_conformance/math_brute_force/main.c index 147e3af2..1cb87b4f 100644 --- a/test_conformance/math_brute_force/main.c +++ b/test_conformance/math_brute_force/main.c @@ -699,6 +699,8 @@ static void PrintArch( void ) vlog( "\tARCH:\tx86_64\n" ); #elif defined( __arm__ ) vlog( "\tARCH:\tarm\n" ); + #elif defined( __aarch64__ ) + vlog( "\tARCH:\taarch64\n" ); #else vlog( "\tARCH:\tunknown\n" ); #endif diff --git a/test_conformance/math_brute_force/reference_math.c b/test_conformance/math_brute_force/reference_math.c index 791375bc..6e8f79aa 100644 --- a/test_conformance/math_brute_force/reference_math.c +++ b/test_conformance/math_brute_force/reference_math.c @@ -1790,7 +1790,7 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000 // *signgamp = 1; ix = hx&0x7fffffff; if(ix>=0x7ff00000) return x*x; - if((ix|lx)==0) return (double)(one)/(double)(zero); + if((ix|lx)==0) return INFINITY; if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */ if(hx<0) { // *signgamp = -1; @@ -1799,9 +1799,9 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000 } if(hx<0) { if(ix>=0x43300000) /* |x|>=2**52, must be -integer */ - return (double)(one)/(double)(zero); + return INFINITY; t = reference_sinpi(x); - if(t==zero) (double)(one)/(double)(zero); /* -integer */ + if(t==zero) return INFINITY; /* -integer */ nadj = reference_log(pi/reference_fabs(t*x)); // if(tfloat_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -309,7 +320,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d) if( !gSkipCorrectnessTesting || skipTestingRelaxed) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) @@ -997,7 +1008,16 @@ int TestFunc_Double_Double(const Func *f, MTdata d) test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } test_info.f = f; test_info.ulps = f->double_ulps; @@ -1062,7 +1082,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp index 381d29bc..58478a1e 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp @@ -605,6 +605,28 @@ void TestNonUniformWorkGroup::showTestInfo () { } } +size_t TestNonUniformWorkGroup::adjustLocalArraySize (size_t localArraySize) { + // In case if localArraySize is too big, sometimes we can not run kernel because of lack + // of resources due to kernel itself requires some local memory to run + int err; + + cl_ulong kernelLocalMemSize = 0; + err = clGetKernelWorkGroupInfo(_testKernel, _device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalMemSize), &kernelLocalMemSize, NULL); + test_error(err, "clGetKernelWorkGroupInfo failed"); + + cl_ulong deviceLocalMemSize = 0; + err = clGetDeviceInfo(_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMemSize), &deviceLocalMemSize, NULL); + test_error(err, "clGetDeviceInfo failed"); + + if (kernelLocalMemSize + localArraySize > deviceLocalMemSize) { + size_t adjustedLocalArraySize = deviceLocalMemSize - kernelLocalMemSize; + log_info("localArraySize was adjusted from %lu to %lu\n", localArraySize, adjustedLocalArraySize); + localArraySize = adjustedLocalArraySize; + } + + return localArraySize; +} + int TestNonUniformWorkGroup::runKernel () { int err; @@ -622,7 +644,8 @@ int TestNonUniformWorkGroup::runKernel () { test_error(err, "clSetKernelArg failed"); //creating local buffer - err = clSetKernelArg(_testKernel, 1, localArraySize*sizeof(unsigned int), NULL); + localArraySize = adjustLocalArraySize(localArraySize*sizeof(unsigned int)); + err = clSetKernelArg(_testKernel, 1, localArraySize, NULL); test_error(err, "clSetKernelArg failed"); clMemWrapper testGlobalArray = clCreateBuffer(_context, CL_MEM_READ_WRITE, _numOfGlobalWorkItems*sizeof(cl_uint), NULL, &err); diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h index e7f261b5..a077296d 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h @@ -116,6 +116,7 @@ private: void verifyData (DataContainerAttrib * reference, DataContainerAttrib * results, short regionNumber); void calculateExpectedValues (); void showTestInfo (); + size_t adjustLocalArraySize(size_t localArraySize); }; // Class responsible for running subtest scenarios in test function diff --git a/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv b/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv index ee7ab99e..2415d745 100644 --- a/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv +++ b/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv @@ -93,6 +93,12 @@ Pipes,pipes/test_pipes SVM,SVM/test_SVM Workgroups,workgroups/test_workgroups +##################################### +# OpenCL 2.1 tests +##################################### +Device timer,device_timer/test_device_timer +SPIRV new,spirv_new/test_spirv_new -ILPath spirv_bin + ######################################### # Extensions ######################################### diff --git a/test_conformance/printf/test_printf.c b/test_conformance/printf/test_printf.c index fcb6d69e..15e6d41d 100644 --- a/test_conformance/printf/test_printf.c +++ b/test_conformance/printf/test_printf.c @@ -17,6 +17,7 @@ #include #include +#include #if ! defined( _WIN32) #if ! defined( __ANDROID__ ) @@ -304,9 +305,6 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont //----------------------------------------- static bool isLongSupported(cl_device_id device_id) { - //profile type && device extention for long support checking - char *profileType = NULL,*devExt = NULL; - size_t tempSize = 0; cl_int status; bool extSupport = true; @@ -325,7 +323,7 @@ static bool isLongSupported(cl_device_id device_id) return false; } - profileType = new char[tempSize]; + std::unique_ptr profileType(new char[tempSize]); if(profileType == NULL) { log_error("Failed to allocate memory(profileType)"); @@ -336,11 +334,11 @@ static bool isLongSupported(cl_device_id device_id) device_id, CL_DEVICE_PROFILE, sizeof(char) * tempSize, - profileType, + profileType.get(), NULL); - if(!strcmp("EMBEDDED_PROFILE",profileType)) + if(!strcmp("EMBEDDED_PROFILE",profileType.get())) { // Device extention status = clGetDeviceInfo( @@ -356,7 +354,7 @@ static bool isLongSupported(cl_device_id device_id) return false; } - devExt = new char[tempSize]; + std::unique_ptr devExt(new char[tempSize]); if(devExt == NULL) { log_error("Failed to allocate memory(devExt)"); @@ -367,13 +365,10 @@ static bool isLongSupported(cl_device_id device_id) device_id, CL_DEVICE_EXTENSIONS, sizeof(char) * tempSize, - devExt, + devExt.get(), NULL); - extSupport = (strstr(devExt,"cles_khr_int64") != NULL); - - delete devExt; - delete profileType; + extSupport = (strstr(devExt.get(),"cles_khr_int64") != NULL); } return extSupport; } @@ -460,10 +455,12 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int } } + int fd = acquireOutputStream(); globalWorkSize[0] = 1; cl_event ndrEvt; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL,&ndrEvt); if (err != CL_SUCCESS) { + releaseOutputStream(fd); log_error("\n clEnqueueNDRangeKernel failed errcode:%d\n", err); ++s_test_fail; goto exit; @@ -473,6 +470,7 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int err = clFlush(queue); if(err != CL_SUCCESS) { + releaseOutputStream(fd); log_error("clFlush failed\n"); goto exit; } @@ -480,6 +478,8 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int //is immidatly printed err = waitForEvent(&ndrEvt); + releaseOutputStream(fd); + if(err != CL_SUCCESS) { log_error("waitforEvent failed\n"); @@ -553,6 +553,8 @@ static void printArch( void ) log_info( "ARCH:\tx86_64\n" ); #elif defined( __arm__ ) log_info( "ARCH:\tarm\n" ); +#elif defined( __aarch64__ ) + log_info( "ARCH:\taarch64\n" ); #else #error unknown arch #endif @@ -669,7 +671,6 @@ int main(int argc, const char* argv[]) { int err; - int fd = acquireOutputStream(); // Get platform err = clGetPlatformIDs(1, &platform_id, NULL); @@ -695,8 +696,6 @@ int main(int argc, const char* argv[]) { if((err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, config_size, &device_frequency, NULL ))) device_frequency = 1; - releaseOutputStream(fd); - log_info( "\nCompute Device info:\n" ); log_info( "\tProcessing with %d devices\n", compute_devices ); log_info( "\tDevice Frequency: %d MHz\n", device_frequency ); @@ -716,8 +715,6 @@ int main(int argc, const char* argv[]) { log_info( "Test binary built %s %s\n", __DATE__, __TIME__ ); - fd = acquireOutputStream(); - cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL); checkNull(context, "clCreateContext"); @@ -727,17 +724,13 @@ int main(int argc, const char* argv[]) { // Forall types for (int testId = 0; testId < TYPE_COUNT; ++testId) { if (test_filter_num && (testId != test_filter_num)) { - releaseOutputStream(fd); log_info("\n*** Skipping printf for %s ***\n",strType[testId]); - fd = acquireOutputStream(); } else { - releaseOutputStream(fd); log_info("\n*** Testing printf for %s ***\n",strType[testId]); - fd = acquireOutputStream(); + //For all formats for(unsigned int testNum = 0;testNum < allTestCase[testId]->_testNum;++testNum){ - releaseOutputStream(fd); if(allTestCase[testId]->_type == TYPE_VECTOR) log_info("%d)testing printf(\"%sv%s%s\",%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,allTestCase[testId]->_genParameters[testNum].vectorSize, allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,allTestCase[testId]->_genParameters[testNum].dataRepresentation); @@ -752,7 +745,6 @@ int main(int argc, const char* argv[]) { } else log_info("%d)testing printf(\"%s\",%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].genericFormat,allTestCase[testId]->_genParameters[testNum].dataRepresentation); - fd = acquireOutputStream(); // Long support for varible type if(allTestCase[testId]->_type == TYPE_VECTOR && !strcmp(allTestCase[testId]->_genParameters[testNum].dataType,"long") && !isLongSupported(device_id)) @@ -766,15 +758,11 @@ int main(int argc, const char* argv[]) { // Perform the test if (doTest(queue, context,testId,testNum,device_id,isLongSupport) != 0) { - releaseOutputStream(fd); log_error("*** FAILED ***\n\n"); - fd = acquireOutputStream(); } else { - releaseOutputStream(fd); log_info("Passed\n"); - fd = acquireOutputStream(); } } } @@ -790,8 +778,6 @@ int main(int argc, const char* argv[]) { if(clReleaseContext(context)!= CL_SUCCESS) log_error("clReleaseContext\n"); - releaseOutputStream(fd); - if (s_test_fail == 0) { if (s_test_cnt > 1) diff --git a/test_conformance/printf/util_printf.c b/test_conformance/printf/util_printf.c index 5bd9be1a..842a0f74 100644 --- a/test_conformance/printf/util_printf.c +++ b/test_conformance/printf/util_printf.c @@ -212,8 +212,6 @@ struct printDataGenParameters printFloatGenParameters[] = { {"%+#21.15E","789456123.0"}, -#if ! defined( __ANDROID__ ) - //Double argument representing floating-point,in [-]xh.hhhhpAd style {"%.6a","0.1"}, @@ -222,8 +220,6 @@ struct printDataGenParameters printFloatGenParameters[] = { {"%10.2a","9990.235"}, -#endif - //Infinity (1.0/0.0) {"%f","1.0f/0.0f"}, @@ -275,14 +271,10 @@ const char* correctBufferFloat[] = { "+7.894561230000000E+8", -#if ! defined( __ANDROID__ ) - "0x1.99999ap-4", "0x1.38p+13", -#endif - "inf", "-nan", @@ -621,11 +613,6 @@ struct printDataGenParameters printStringGenParameters[] = { //%% specification {"%s","\"%%\""}, - - //null string - - {"%s","(void*)0"} - }; //--------------------------------------------------------- @@ -641,9 +628,6 @@ const char * correctBufferString[] = { "f", "%%", - - "(null)" - }; //--------------------------------------------------------- @@ -887,7 +871,7 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId if(!strcmp(pTestCase->_correctBuffer[testId],"inf")) return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf"); if(!strcmp(pTestCase->_correctBuffer[testId],"nan") || !strcmp(pTestCase->_correctBuffer[testId],"-nan")) { - return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)"); + return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)")&&strcmp(analysisBuffer,"-nan(ind)"); } return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]); } diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py old mode 100644 new mode 100755 diff --git a/test_conformance/select/test_select.c b/test_conformance/select/test_select.c index 5eec6a3d..4b9f5f8a 100644 --- a/test_conformance/select/test_select.c +++ b/test_conformance/select/test_select.c @@ -72,6 +72,7 @@ static int doTest(cl_command_queue queue, cl_context context, // range. Otherwise, we test a subset of the range // [-min_short, min_short] static bool s_wimpy_mode = false; +static int s_wimpy_reduction_factor = 256; // Tests are broken into the major test which is based on the // src and cmp type and their corresponding vector types and @@ -304,7 +305,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; size_t block_elements = BUFFER_SIZE / type_size[stype]; - size_t step = s_wimpy_mode ? 256 : 1; + size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1; cl_ulong cmp_stride = block_elements * step; // It is more efficient to create the tests all at once since we @@ -471,6 +472,7 @@ static void printUsage( void ) log_info("test_select: [-cghw] [test_name|start_test_num] \n"); log_info(" default is to run the full test on the default device\n"); log_info(" -w run in wimpy mode (smoke test)\n"); + log_info(" -[2^n] Set wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", s_wimpy_reduction_factor); log_info(" test_name will run only one test of that name\n"); log_info(" start_test_num will start running from that num\n"); } @@ -491,6 +493,8 @@ static void printArch( void ) log_info( "ARCH:\tx86_64\n" ); #elif defined( __arm__ ) log_info( "ARCH:\tarm\n" ); +#elif defined( __aarch64__ ) + log_info( "ARCH:\taarch64\n" ); #else #error unknown arch #endif @@ -582,6 +586,27 @@ int main(int argc, const char* argv[]) { case 'w': // Wimpy mode s_wimpy_mode = true; break; + case '[': + // wimpy reduction factor can be set with the option -[2^n] + // Default factor is 256, and n practically can be from 1 to 12 + { + const char *arg_temp = strchr(&arg[1], ']'); + if (arg_temp != 0) + { + int new_factor = atoi(&arg[1]); + arg = arg_temp; // Advance until ']' + if (new_factor && !(new_factor & (new_factor - 1))) + { + vlog(" WimpyReduction factor changed from %d to %d \n", s_wimpy_reduction_factor, new_factor); + s_wimpy_reduction_factor = new_factor; + } + else + { + vlog(" Error in WimpyReduction factor must be power of 2 \n"); + } + } + } + break; default: log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg ); printUsage(); @@ -659,6 +684,7 @@ int main(int argc, const char* argv[]) { log_info("*** WARNING: Testing in Wimpy mode! ***\n"); log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n"); log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n"); + log_info("*** Wimpy Reduction Factor: %-27u ***\n\n", s_wimpy_reduction_factor); } cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL); diff --git a/test_conformance/spir/basic.zip b/test_conformance/spir/basic.zip index d60636fb..14c7a3f4 100644 Binary files a/test_conformance/spir/basic.zip and b/test_conformance/spir/basic.zip differ diff --git a/test_conformance/spir/half.zip b/test_conformance/spir/half.zip index f0d3e8ff..0b1deca1 100644 Binary files a/test_conformance/spir/half.zip and b/test_conformance/spir/half.zip differ diff --git a/test_conformance/spir/images_kernel_read_write.zip b/test_conformance/spir/images_kernel_read_write.zip index 65fa8728..dc701137 100644 Binary files a/test_conformance/spir/images_kernel_read_write.zip and b/test_conformance/spir/images_kernel_read_write.zip differ diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp index b0fba6fc..95fcd8bf 100644 --- a/test_conformance/spir/main.cpp +++ b/test_conformance/spir/main.cpp @@ -840,7 +840,6 @@ bool test_basic (cl_device_id device, cl_uint size_t_width, const char *folder) "test_sizeof.sizeof_uintptr_t", "test_sizeof.sizeof_image2d_t", "test_sizeof.sizeof_image3d_t", - "test_sizeof.sizeof_sampler_t", "test_sizeof.sizeof_double", "test_sizeof.sizeof_double2", "test_sizeof.sizeof_double4", diff --git a/test_conformance/spir/profiling.zip b/test_conformance/spir/profiling.zip index 33ac3396..c792bbec 100644 Binary files a/test_conformance/spir/profiling.zip and b/test_conformance/spir/profiling.zip differ diff --git a/test_conformance/spir/typeinfo.h b/test_conformance/spir/typeinfo.h index ea1f6b66..a9dc7124 100644 --- a/test_conformance/spir/typeinfo.h +++ b/test_conformance/spir/typeinfo.h @@ -93,6 +93,42 @@ TYPE_HNDL("image3d_int", false, 0, 1, 0x0, TYPE_HNDL("image3d_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage3d) TYPE_HNDL("image_kernel_data", false, 0, 1, 0x0, 0xffffffff, KernelStructTypeArgGenerator) //image_kernel_data defines as 5 X int TYPE_HNDL("image_kernel_data*", true, 0, 1, 0x0, 0xffffffff, KernelStructTypeArgGenerator) //image_kernel_data defines as 5 X int +TYPE_HNDL("read_only_image1d_array_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dArray) +TYPE_HNDL("read_only_image1d_array_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dArray) +TYPE_HNDL("read_only_image1d_array_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dArray) +TYPE_HNDL("read_only_image1d_buffer_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dBuffer) +TYPE_HNDL("read_only_image1d_buffer_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dBuffer) +TYPE_HNDL("read_only_image1d_buffer_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dBuffer) +TYPE_HNDL("read_only_image1d_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1d) +TYPE_HNDL("read_only_image1d_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1d) +TYPE_HNDL("read_only_image1d_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1d) +TYPE_HNDL("read_only_image2d_array_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2dArray) +TYPE_HNDL("read_only_image2d_array_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2dArray) +TYPE_HNDL("read_only_image2d_array_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2dArray) +TYPE_HNDL("read_only_image2d_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2d) +TYPE_HNDL("read_only_image2d_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2d) +TYPE_HNDL("read_only_image2d_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2d) +TYPE_HNDL("read_only_image3d_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage3d) +TYPE_HNDL("read_only_image3d_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage3d) +TYPE_HNDL("read_only_image3d_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage3d) +TYPE_HNDL("write_only_image1d_array_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dArray) +TYPE_HNDL("write_only_image1d_array_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dArray) +TYPE_HNDL("write_only_image1d_array_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dArray) +TYPE_HNDL("write_only_image1d_buffer_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dBuffer) +TYPE_HNDL("write_only_image1d_buffer_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dBuffer) +TYPE_HNDL("write_only_image1d_buffer_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1dBuffer) +TYPE_HNDL("write_only_image1d_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1d) +TYPE_HNDL("write_only_image1d_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1d) +TYPE_HNDL("write_only_image1d_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage1d) +TYPE_HNDL("write_only_image2d_array_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2dArray) +TYPE_HNDL("write_only_image2d_array_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2dArray) +TYPE_HNDL("write_only_image2d_array_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2dArray) +TYPE_HNDL("write_only_image2d_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2d) +TYPE_HNDL("write_only_image2d_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2d) +TYPE_HNDL("write_only_image2d_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage2d) +TYPE_HNDL("write_only_image3d_float", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage3d) +TYPE_HNDL("write_only_image3d_int", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage3d) +TYPE_HNDL("write_only_image3d_uint", false, 0, 1, 0x0, 0x7f, KernelArgGeneratorImage3d) TYPE_HNDL("int", false, 0, 1, 0x0, 0x7fffffff, KernelArgGeneratorT) TYPE_HNDL("int*", true, 0, 16, 0x0, 0x7fffffff, KernelArgGeneratorT) TYPE_HNDL("int16", false, 0, 16, 0x0, 0x7fffffff, KernelArgGeneratorT) diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp index 5a289126..efb7fe8e 100644 --- a/test_conformance/subgroups/main.cpp +++ b/test_conformance/subgroups/main.cpp @@ -40,15 +40,15 @@ ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_lis static const int num_fns = sizeof(basefn_names) / sizeof(char *); -static int +static test_status checkSubGroupsExtension(cl_device_id device) { if (!is_extension_available(device, "cl_khr_subgroups")) { - log_info("Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); - return CL_INVALID_DEVICE; + log_error("'cl_khr_subgroups' is a required extension, failing.\n"); + return TEST_FAIL; } - return CL_SUCCESS; + return TEST_PASS; } int