diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b975bb5..207b8789 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ set(CLConform_VERSION "${CLConform_VERSION_MAJOR}.${CLConform_VERSION_MINOR}") set(CLConform_VERSION_FULL "${CLConform_VERSION}.${CLConform_VERSION_MICRO}${CLConform_VERSION_EXTRA}") -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.1) add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1) add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS=1) @@ -77,6 +77,11 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__") endif() +# Clang gives C++11 narrowing warnings so surpress these for now +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-narrowing") +endif() + list(APPEND CLConform_LIBRARIES ${OPENCL_LIBRARIES}) if(ANDROID) list(APPEND CLConform_LIBRARIES m) diff --git a/build_android.py b/build_android.py old mode 100644 new mode 100755 index 2bd7f5c8..e1b8c1e9 --- a/build_android.py +++ b/build_android.py @@ -108,7 +108,9 @@ def install_android_cmake(): print "input: " if get_input(): print("installing android-cmake") - subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake']) + #subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake']) + # Use a newer fork of android-cmake which has been updated to support Clang. GCC is deprecated in newer NDKs and C11 atomics conformance doesn't build with NDK > 10. + subprocess.call(['git', 'clone', 'https://github.com/daewoong-jang/android-cmake']) args.android_cmake = os.path.join(args.src_dir,"android-cmake") else: exit() diff --git a/build_lnx.sh b/build_lnx.sh old mode 100644 new mode 100755 diff --git a/clean_tests.py b/clean_tests.py old mode 100644 new mode 100755 diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h index b0d3cf75..23445c8e 100644 --- a/test_common/harness/compat.h +++ b/test_common/harness/compat.h @@ -207,7 +207,10 @@ long double roundl(long double x); int cf_signbit(double x); int cf_signbitf(float x); +// Added in _MSC_VER == 1800 (Visual Studio 2013) +#if _MSC_VER < 1800 static int signbit(double x) { return cf_signbit(x); } +#endif static int signbitf(float x) { return cf_signbitf(x); } long int lrint (double flt); @@ -241,8 +244,11 @@ int32_t float2int (float fx); // stdio.h // -#if defined( _MSC_VER ) - #define snprintf sprintf_s +#if defined(_MSC_VER) + // snprintf added in _MSC_VER == 1900 (Visual Studio 2015) + #if _MSC_VER < 1900 + #define snprintf sprintf_s + #endif #endif diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp index 7fe7945c..cdb7028c 100644 --- a/test_common/harness/imageHelpers.cpp +++ b/test_common/harness/imageHelpers.cpp @@ -479,16 +479,6 @@ struct AddressingTable static AddressingTable sAddressingTable; -bool alpha_is_x(cl_image_format *format){ - switch (format->image_channel_order) { - case CL_RGBx: - case CL_sRGBx: - return true; - default: - return false; - } -} - bool is_sRGBA_order(cl_channel_order image_channel_order){ switch (image_channel_order) { case CL_sRGB: @@ -508,19 +498,21 @@ int has_alpha(cl_image_format *format) { case CL_R: return 0; case CL_A: - case CL_Rx: return 1; + case CL_Rx: + return 0; case CL_RG: return 0; case CL_RA: - case CL_RGx: return 1; + case CL_RGx: + return 0; case CL_RGB: case CL_sRGB: return 0; case CL_RGBx: case CL_sRGBx: - return 1; + return 0; case CL_RGBA: return 1; case CL_BGRA: @@ -719,13 +711,6 @@ void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes, } } -int issubnormal(float a) -{ - union { cl_int i; cl_float f; } u; - u.f = a; - return (u.i & 0x7f800000U) == 0; -} - float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) { if (sampler->filter_mode == CL_FILTER_NEAREST) return 0.0f; @@ -1412,16 +1397,9 @@ void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, || ( depth_lod != 0 && z >= (int)depth_lod ) || ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) ) { - // Border color - if (imageInfo->format->image_channel_order == CL_DEPTH) - { - outData[ 0 ] = 0; - } - else { outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0; if (!has_alpha(imageInfo->format)) - outData[3] = alpha_is_x(imageInfo->format) ? 0 : 1; - } + outData[3] = 1; return; } diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h index 83ff1377..ba80ea77 100644 --- a/test_common/harness/imageHelpers.h +++ b/test_common/harness/imageHelpers.h @@ -136,8 +136,6 @@ extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *d int has_alpha(cl_image_format *format); -extern bool alpha_is_x(cl_image_format *format); - extern bool is_sRGBA_order(cl_channel_order image_channel_order); inline float calculate_array_index( float coord, float extent ); @@ -594,7 +592,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor * extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine ); extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler); extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter ); -extern int issubnormal(float); #define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) ) diff --git a/test_common/harness/kernelHelpers.c b/test_common/harness/kernelHelpers.c index 77c40571..ce7fd7fe 100644 --- a/test_common/harness/kernelHelpers.c +++ b/test_common/harness/kernelHelpers.c @@ -645,14 +645,14 @@ size_t get_pixel_bytes( const cl_image_format *fmt ) return 0; } -int verifyImageSupport( cl_device_id device ) +test_status verifyImageSupport( cl_device_id device ) { if( checkForImageSupport( device ) ) { log_error( "ERROR: Device does not supported images as required by this test!\n" ); - return CL_IMAGE_FORMAT_NOT_SUPPORTED; + return TEST_FAIL; } - return 0; + return TEST_PASS; } int checkForImageSupport( cl_device_id device ) diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h index 692f17b3..9feaca87 100644 --- a/test_common/harness/kernelHelpers.h +++ b/test_common/harness/kernelHelpers.h @@ -17,6 +17,7 @@ #define _kernelHelpers_h #include "compat.h" +#include "testHarness.h" #include #include @@ -90,8 +91,8 @@ extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl /* Helper to get pixel size for a pixel format */ size_t get_pixel_bytes( const cl_image_format *fmt ); -/* Verify the given device supports images. 0 means you're good to go, otherwise an error */ -extern int verifyImageSupport( cl_device_id device ); +/* Verify the given device supports images. */ +extern test_status verifyImageSupport( cl_device_id device ); /* Checks that the given device supports images. Same as verify, but doesn't print an error */ extern int checkForImageSupport( cl_device_id device ); diff --git a/test_common/harness/msvc9.c b/test_common/harness/msvc9.c index 093bb978..3b168453 100644 --- a/test_common/harness/msvc9.c +++ b/test_common/harness/msvc9.c @@ -566,6 +566,7 @@ long int lrintf (float x) // /////////////////////////////////////////////////////////////////// +#if _MSC_VER < 1900 int fetestexcept(int excepts) { unsigned int status = _statusfp(); @@ -583,6 +584,7 @@ int feclearexcept(int excepts) _clearfp(); return 0; } +#endif #endif // __INTEL_COMPILER diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp new file mode 100644 index 00000000..13e5c05d --- /dev/null +++ b/test_common/harness/parseParameters.cpp @@ -0,0 +1,42 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "parseParameters.h" +#include "errorHelpers.h" +#include + +bool is_power_of_two(int number) +{ + return number && !(number & (number - 1)); +} + +extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor) +{ + const char *arg_temp = strchr(&arg[1], ']'); + if (arg_temp != 0) + { + int new_factor = atoi(&arg[1]); + arg = arg_temp; // Advance until ']' + if (is_power_of_two(new_factor)) + { + log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor); + wimpyReductionFactor = new_factor; + } + else + { + log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor); + } + } +} diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h new file mode 100644 index 00000000..212a9764 --- /dev/null +++ b/test_common/harness/parseParameters.h @@ -0,0 +1,24 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef _parseParameters_h +#define _parseParameters_h + +#include "compat.h" +#include + +extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor); + +#endif // _parseParameters_h diff --git a/test_common/harness/testHarness.c b/test_common/harness/testHarness.c index c1b8bfa0..30358df9 100644 --- a/test_common/harness/testHarness.c +++ b/test_common/harness/testHarness.c @@ -135,7 +135,7 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, log_info( "\tid\t\tIndicates device at index should be used (default 0).\n" ); log_info( "\t\tcpu|gpu|accelerator| (default CL_DEVICE_TYPE_DEFAULT)\n" ); - for( i = 0; i < num_fns - 1; i++ ) + for( i = 0; i < num_fns; i++ ) { log_info( "\t\t%s\n", fnNames[ i ] ); } @@ -431,10 +431,18 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, /* If we have a device checking function, run it */ - if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS ) + if( ( deviceCheckFn != NULL ) ) { - test_finish(); - return -1; + test_status status = deviceCheckFn( device ); + switch (status) + { + case TEST_PASS: + break; + case TEST_FAIL: + return 1; + case TEST_SKIP: + return 0; + } } if (num_elements <= 0) diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h index c2620647..7c1d9cb2 100644 --- a/test_common/harness/testHarness.h +++ b/test_common/harness/testHarness.h @@ -23,6 +23,13 @@ extern "C" { #endif +typedef enum test_status +{ + TEST_PASS = 0, + TEST_FAIL = 1, + TEST_SKIP = 2, +} test_status; + extern cl_uint gReSeed; extern cl_uint gRandomSeed; @@ -32,8 +39,8 @@ extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns, basefn fnList[], const char *fnNames[], int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps ); -// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits. -typedef int (*DeviceCheckFn)( cl_device_id device ); +// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits. +typedef test_status (*DeviceCheckFn)( cl_device_id device ); // Same as runTestHarness, but also supplies a function that checks the created device for required functionality. extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt index 62a3b62c..694bcc2a 100644 --- a/test_conformance/SVM/CMakeLists.txt +++ b/test_conformance/SVM/CMakeLists.txt @@ -1,3 +1,4 @@ +set(CMAKE_CXX_STANDARD 11) set(MODULE_NAME SVM) set(${MODULE_NAME}_SOURCES diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp index 209b1ed2..8ab0c392 100644 --- a/test_conformance/SVM/main.cpp +++ b/test_conformance/SVM/main.cpp @@ -226,7 +226,7 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt } error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL); - test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING"); + test_error(error,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES"); if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS))) { log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value"); diff --git a/test_conformance/SVM/test_allocate_shared_buffer.cpp b/test_conformance/SVM/test_allocate_shared_buffer.cpp index 7d555c8d..14262cb7 100644 --- a/test_conformance/SVM/test_allocate_shared_buffer.cpp +++ b/test_conformance/SVM/test_allocate_shared_buffer.cpp @@ -97,6 +97,10 @@ int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_c log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc"); return -1; } + err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL); + test_error(err, "clEnqueueUnmapMemObject failed"); + err = clFinish(queues[0]); + test_error(err, "clFinish failed"); } } diff --git a/test_conformance/SVM/test_enqueue_api.cpp b/test_conformance/SVM/test_enqueue_api.cpp index 6a04e955..f92872d6 100644 --- a/test_conformance/SVM/test_enqueue_api.cpp +++ b/test_conformance/SVM/test_enqueue_api.cpp @@ -17,10 +17,15 @@ #include "../../test_common/harness/mt19937.h" #include +#include + +#if !defined(_WIN32) +#include +#endif typedef struct { - cl_uint status; + std::atomic status; cl_uint num_svm_pointers; std::vector svm_pointers; } CallbackData; @@ -62,7 +67,7 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point clSVMFree(context, svm_pointers[i]); } - data->status = 1; + data->status.store(1, std::memory_order_release); } int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) @@ -231,7 +236,9 @@ int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue test_error(error, "clFinish failed"); //wait for the callback - while(data.status == 0) { } + while(data.status.load(std::memory_order_acquire) == 0) { + usleep(1); + } //check if number of SVM pointers returned in the callback matches with expected if (data.num_svm_pointers != buffers.size()) diff --git a/test_conformance/api/test_api_min_max.c b/test_conformance/api/test_api_min_max.c index 36046751..ab3f10ca 100644 --- a/test_conformance/api/test_api_min_max.c +++ b/test_conformance/api/test_api_min_max.c @@ -1303,7 +1303,6 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, int error; clProgramWrapper program; clKernelWrapper kernel; - clMemWrapper streams[3]; size_t threads[1], localThreads[1]; cl_int *constantData, *resultData; cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize; @@ -1324,12 +1323,12 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, log_info("Reported max constant buffer size of %lld bytes.\n", maxSize); - // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE + // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - if (maxSize > maxGlobalSize / 4) - maxSize = maxGlobalSize / 4; + if (maxSize > maxGlobalSize / 8) + maxSize = maxGlobalSize / 8; error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); @@ -1358,6 +1357,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, for(i=0; i<(int)(numberOfInts); i++) constantData[i] = (int)genrand_int32(d); + clMemWrapper streams[3]; streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error); test_error( error, "Creating test array failed" ); streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeToAllocate, NULL, &error); @@ -1427,7 +1427,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, if (allocPassed) { if (currentSize < maxSize/PASSING_FRACTION) { - log_error("Failed to allocate at least 1/4 of the reported constant size.\n"); + log_error("Failed to allocate at least 1/8 of the reported constant size.\n"); return -1; } else if (currentSize != maxSize) { log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize); diff --git a/test_conformance/basic/run_array b/test_conformance/basic/run_array old mode 100644 new mode 100755 diff --git a/test_conformance/basic/run_array_image_copy b/test_conformance/basic/run_array_image_copy old mode 100644 new mode 100755 diff --git a/test_conformance/basic/run_image b/test_conformance/basic/run_image old mode 100644 new mode 100755 diff --git a/test_conformance/basic/run_multi_read_image b/test_conformance/basic/run_multi_read_image old mode 100644 new mode 100755 diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp index ec65101d..ca657787 100644 --- a/test_conformance/basic/test_async_strided_copy.cpp +++ b/test_conformance/basic/test_async_strided_copy.cpp @@ -209,14 +209,13 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu log_error( "ERROR: Results of copy did not validate!\n" ); sprintf(values + strlen( values), "%d -> [", i); for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]); + sprintf(values + strlen( values), "%2x ", inchar[j]); sprintf(values + strlen(values), "] != ["); for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]); + sprintf(values + strlen( values), "%2x ", outchar[j]); sprintf(values + strlen(values), "]"); log_error("%s\n", values); - - return -1; + return -1; } } diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index 9a3eb99c..243d2361 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -872,12 +872,14 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co // We need to create 5 random values of the given type, // and read 4 of them back. - cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)]; - cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; + const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16); + const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16); + cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT); + cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT); - clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) ); + clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) ); test_error_ret(status,"Failed to allocate write buffer",status); - clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) ); + clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) ); test_error_ret(status,"Failed to allocate read buffer",status); status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); @@ -892,7 +894,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co // Generate new random data to push through. // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. - cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0); + cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // For boolean, random data cast to bool isn't very random. @@ -904,7 +906,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co } bool_iter++; } else { - l_set_randomly( write_data, sizeof(write_data), rand_state ); + l_set_randomly( write_data, write_data_size, rand_state ); } status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status); @@ -913,7 +915,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status); // Determine the expected values. - cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; + cl_uchar expected[read_data_size]; memset( expected, -1, sizeof(expected) ); l_copy( expected, 0, write_data, 0, ti ); l_copy( expected, 1, write_data, 1, ti ); @@ -930,8 +932,8 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i]; } - cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); - memset( read_data, -1, sizeof(read_data) ); + cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); + memset(read_data, -1, read_data_size); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); // Now run the kernel @@ -940,7 +942,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clFinish(queue); test_error_ret(status,"finish",status); - read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); + read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // Collapse down to one bit. @@ -959,7 +961,8 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } - + align_free(write_data); + align_free(read_data); return err; } @@ -1018,12 +1021,14 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, // We need to create 5 random values of the given type, // and read 4 of them back. - cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)]; - cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; + const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16); + const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16); - clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) ); + cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT); + cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT); + clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) ); test_error_ret(status,"Failed to allocate write buffer",status); - clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) ); + clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) ); test_error_ret(status,"Failed to allocate read buffer",status); status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); @@ -1043,7 +1048,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, // Generate new random data to push through. // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. - cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0); + cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // For boolean, random data cast to bool isn't very random. @@ -1055,7 +1060,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, } bool_iter++; } else { - l_set_randomly( write_data, sizeof(write_data), rand_state ); + l_set_randomly( write_data, write_data_size, rand_state ); } status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status); @@ -1071,7 +1076,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status); // Determine the expected values. - cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; + cl_uchar expected[read_data_size]; memset( expected, -1, sizeof(expected) ); if ( iteration ) { l_copy( expected, 0, write_data, 0, ti ); @@ -1102,8 +1107,8 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0); - cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); - memset( read_data, -1, sizeof(read_data) ); + cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); + memset( read_data, -1, read_data_size ); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); // Now run the kernel @@ -1117,7 +1122,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clFinish(queue); test_error_ret(status,"finish",status); - read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); + read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // Collapse down to one bit. @@ -1139,6 +1144,8 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context, } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } + align_free(write_data); + align_free(read_data); return err; } @@ -1352,6 +1359,13 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS); return status; } + + status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0); + if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status))) + { + print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS); + return status; + } } @@ -1372,12 +1386,12 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu test_error_ret(status,"Failed to create reader kernel for user type test",status); // Set up data. - cl_uchar CL_ALIGNED(ALIGNMENT) uchar_data; - cl_uint CL_ALIGNED(ALIGNMENT) uint_data; + cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT); + cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT); - clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uchar_data), &uchar_data, &status ) ); + clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) ); test_error_ret(status,"Failed to allocate uchar buffer",status); - clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uint_data), &uint_data, &status ) ); + clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) ); test_error_ret(status,"Failed to allocate uint buffer",status); status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status); @@ -1387,18 +1401,18 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu cl_uint expected_uint = 42; for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice // Read back data - uchar_data = -1; - uint_data = -1; + *uchar_data = -1; + *uint_data = -1; const size_t one = 1; status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clFinish(queue); test_error_ret(status,"finish",status); - cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uint_data), 0, 0, 0, 0); - cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uchar_data), 0, 0, 0, 0); + cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0); + cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0); - if ( expected_uchar != uchar_data || expected_uint != uint_data ) { + if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) { log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n", - iter, (int)uchar_data, uint_data, (int)expected_uchar, expected_uint ); + iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint ); err |= 1; } @@ -1410,16 +1424,17 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu expected_uint++; // Write the new values into persistent store. - uchar_data = expected_uchar; - uint_data = expected_uint; - status = clSetKernelArg(writer,0,sizeof(uchar_data),&uchar_data); test_error_ret(status,"set arg",status); - status = clSetKernelArg(writer,1,sizeof(uint_data),&uint_data); test_error_ret(status,"set arg",status); + *uchar_data = expected_uchar; + *uint_data = expected_uint; + status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status); + status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status); status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status); status = clFinish(queue); test_error_ret(status,"finish",status); } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } - + align_free(uchar_data); + align_free(uint_data); return err; } diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index 72c88b19..5ca7f6ca 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -1566,6 +1566,7 @@ public: using CBasicTestMemOrderScope::MemoryOrder; using CBasicTestMemOrderScope::MemoryScopeStr; using CBasicTestMemOrderScope::MemoryOrderScopeStr; + using CBasicTestMemOrderScope::UseSVM; using CBasicTestMemOrderScope::LocalMemory; CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope(dataType, useSVM) { @@ -1606,7 +1607,7 @@ public: program += " atomic_work_item_fence(" + std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + "memory_order_acquire," + - std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") + + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) + ");\n"; program += @@ -1632,7 +1633,7 @@ public: program += " atomic_work_item_fence(" + std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + "memory_order_release," + - std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") + + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) + ");\n"; program += diff --git a/test_conformance/compatibility/test_common/harness/compat.h b/test_conformance/compatibility/test_common/harness/compat.h index f69c8886..d177f87a 100644 --- a/test_conformance/compatibility/test_common/harness/compat.h +++ b/test_conformance/compatibility/test_common/harness/compat.h @@ -13,32 +13,63 @@ // See the License for the specific language governing permissions and // limitations under the License. // +/* + Header compat.h should be used instead of stdlib.h, stdbool.h, stdint.h, float.h, fenv.h, + math.h. It provides workarounds if these headers are not available or not complete. + + Important: It should be included before math.h, directly or indirectly, because Intel mathimf.h + is not compatible with Microsoft math.h. Including math.h before mathimf.h causes compile-time + error. +*/ #ifndef _COMPAT_H_ #define _COMPAT_H_ #if defined(_WIN32) && defined (_MSC_VER) - #include -#include -#include -#include -#include -#include - -#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z)) -#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z) -#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z)) - -#define isfinite(x) _finite(x) - -#if !defined(__cplusplus) -typedef char bool; -#define inline - -#else -extern "C" { #endif +#ifdef __cplusplus + #define EXTERN_C extern "C" +#else + #define EXTERN_C +#endif + + +// +// stdlib.h +// + +#include // On Windows, _MAX_PATH defined there. + +// llabs appeared in MS C v16 (VS 10/2010). +#if defined( _MSC_VER ) && _MSC_VER <= 1500 + EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; } +#endif + + +// +// stdbool.h +// + +// stdbool.h appeared in MS C v18 (VS 12/2013). +#if defined( _MSC_VER ) && MSC_VER <= 1700 +#if !defined(__cplusplus) +typedef char bool; + #define true 1 + #define false 0 + #endif +#else + #include +#endif + + + +// +// stdint.h +// + +// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12. +#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 ) typedef unsigned char uint8_t; typedef char int8_t; typedef unsigned short uint16_t; @@ -47,25 +78,83 @@ typedef unsigned int uint32_t; typedef int int32_t; typedef unsigned long long uint64_t; typedef long long int64_t; - -#define MAXPATHLEN MAX_PATH - -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; +#else +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + #include +#endif -#define INFINITY (FLT_MAX + FLT_MAX) -//#define NAN (INFINITY | 1) -//const static int PINFBITPATT_SP32 = INFINITY; + +// +// float.h +// + +#include + + + +// +// fenv.h +// + +// fenv.h appeared in MS C v18 (VS 12/2013). +#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER ) + // reimplement fenv.h because windows doesn't have it + #define FE_INEXACT 0x0020 + #define FE_UNDERFLOW 0x0010 + #define FE_OVERFLOW 0x0008 + #define FE_DIVBYZERO 0x0004 + #define FE_INVALID 0x0001 + #define FE_ALL_EXCEPT 0x003D + int fetestexcept(int excepts); + int feclearexcept(int excepts); +#else + #include +#endif + + +// +// math.h +// + +#if defined( __INTEL_COMPILER ) + #include +#else + #include +#endif + +#if defined( _MSC_VER ) + + #ifdef __cplusplus + extern "C" { + #endif #ifndef M_PI #define M_PI 3.14159265358979323846264338327950288 #endif + #if ! defined( __INTEL_COMPILER ) + #ifndef NAN + #define NAN (INFINITY - INFINITY) + #endif + #ifndef HUGE_VALF + #define HUGE_VALF (float)HUGE_VAL + #endif + #ifndef INFINITY + #define INFINITY (FLT_MAX + FLT_MAX) + #endif + #ifndef isfinite + #define isfinite(x) _finite(x) + #endif + #ifndef isnan #define isnan( x ) ((x) != (x)) + #endif + #ifndef isinf #define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY) + #endif double rint( double x); float rintf( float x); @@ -99,27 +188,6 @@ long double remquol( long double x, long double y, int *quo); long double scalblnl(long double x, long n); -inline long long -llabs(long long __x) { return __x >= 0 ? __x : -__x; } - - -// end of math functions - -uint64_t ReadTime( void ); -double SubtractTime( uint64_t endTime, uint64_t startTime ); - -#define sleep(X) Sleep(1000*X) -#define snprintf sprintf_s -//#define hypotl _hypot - -float make_nan(); -float nanf( const char* str); -double nan( const char* str); -long double nanl( const char* str); - -//#if defined USE_BOOST -//#include -//double hypot(double x, double y); float hypotf(float x, float y); long double hypotl(long double x, long double y) ; double lgamma(double x); @@ -144,67 +212,179 @@ double round(double x); float roundf(float x); long double roundl(long double x); -int signbit(double x); -int signbitf(float x); + int cf_signbit(double x); + int cf_signbitf(float x); -//bool signbitl(long double x) { return boost::math::tr1::signbit(x); } -//#endif // USE_BOOST + static int signbit(double x) { return cf_signbit(x); } + static int signbitf(float x) { return cf_signbitf(x); } long int lrint (double flt); long int lrintf (float flt); - float int2float (int32_t ix); int32_t float2int (float fx); + #endif + + #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 + // These functions appeared in Intel C v13. + float nanf( const char* str); + double nan( const char* str); + long double nanl( const char* str); + #endif + + #ifdef __cplusplus + } + #endif + +#endif + +#if defined( __ANDROID__ ) + #define log2(X) (log(X)/log(2)) +#endif + + + +// +// stdio.h +// + +#if defined( _MSC_VER ) + #define snprintf sprintf_s +#endif + + + +// +// unistd.h +// + +#if defined( _MSC_VER ) + EXTERN_C unsigned int sleep( unsigned int sec ); + EXTERN_C int usleep( int usec ); +#endif + + + +// +// syscall.h +// + +#if defined( __ANDROID__ ) + // Android bionic's isn't providing SYS_sysctl wrappers. + #define SYS__sysctl __NR__sysctl +#endif + + + +// Some tests use _malloca which defined in malloc.h. +#if !defined (__APPLE__) +#include +#endif + + +// +// ??? +// + +#if defined( _MSC_VER ) + + #define MAXPATHLEN _MAX_PATH + + EXTERN_C uint64_t ReadTime( void ); + EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime ); + /** Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. */ -int __builtin_clz(unsigned int pattern); - - -static const double zero= 0.00000000000000000000e+00; -#define NAN (INFINITY - INFINITY) -#define HUGE_VALF (float)HUGE_VAL - -int usleep(int usec); - -// reimplement fenv.h because windows doesn't have it -#define FE_INEXACT 0x0020 -#define FE_UNDERFLOW 0x0010 -#define FE_OVERFLOW 0x0008 -#define FE_DIVBYZERO 0x0004 -#define FE_INVALID 0x0001 -#define FE_ALL_EXCEPT 0x003D - -int fetestexcept(int excepts); -int feclearexcept(int excepts); - -#ifdef __cplusplus -} -#endif - -#else // !((defined(_WIN32) && defined(_MSC_VER) -#if defined(__MINGW32__) -#include -#define sleep(X) Sleep(1000*X) + EXTERN_C int __builtin_clz(unsigned int pattern); #endif -#if defined(__linux__) || defined(__MINGW32__) || defined(__APPLE__) -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS + +#ifndef MIN + #define MIN(x,y) (((x)<(y))?(x):(y)) #endif -#include -#include -#include -#include +#ifndef MAX + #define MAX(x,y) (((x)>(y))?(x):(y)) #endif + + +/* + ------------------------------------------------------------------------------------------------ + WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG. + + This is a typical usage of the macros: + + double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2); + + (taken from math_brute_force/reference_math.c). There are two problems: + + 1. There is an error here. On Windows in will produce incorrect result + `0x1.5555555555555p+50'. To have a correct result it should be written as + `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the + third argument is not obvious -- sometimes it should be the same as exponent of the + first argument, but sometimes not. + + 2. Information is duplicated. It is easy to make a mistake. + + Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file). + ------------------------------------------------------------------------------------------------ +*/ +#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER ) + + #define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z)) + #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z) + #define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z)) + +#else + +// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead. #define MAKE_HEX_FLOAT(x,y,z) x #define MAKE_HEX_DOUBLE(x,y,z) x #define MAKE_HEX_LONG(x,y,z) x -#endif // !((defined(_WIN32) && defined(_MSC_VER) +#endif +/* + ------------------------------------------------------------------------------------------------ + HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long + double respectively. Arguments: + + sm -- sign of number, + int -- integer part of mantissa (without `0x' prefix), + fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes), + se -- sign of exponent, + exp -- absolute value of (binary) exponent. + + Example: + + double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2 + + Note: + + We have to pass signs as separate arguments because gcc pass negative integer values + (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result + `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point + literal. + ------------------------------------------------------------------------------------------------ +*/ +#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER ) + // If compiler does not support hex floating point literals: + #define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) ) + #define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) ) + #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) ) +#else + // If compiler supports hex floating point literals: just concatenate all the parts into a literal. + #define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F + #define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp + #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L +#endif + +#if defined(__MINGW32__) + #include + #define sleep(sec) Sleep((sec) * 1000) +#endif + #endif // _COMPAT_H_ diff --git a/test_conformance/compatibility/test_common/harness/kernelHelpers.c b/test_conformance/compatibility/test_common/harness/kernelHelpers.c index 3680c06e..42f41aa4 100644 --- a/test_conformance/compatibility/test_common/harness/kernelHelpers.c +++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.c @@ -475,14 +475,14 @@ size_t get_pixel_bytes( const cl_image_format *fmt ) return 0; } -int verifyImageSupport( cl_device_id device ) +test_status verifyImageSupport( cl_device_id device ) { if( checkForImageSupport( device ) ) { log_error( "ERROR: Device does not supported images as required by this test!\n" ); - return CL_IMAGE_FORMAT_NOT_SUPPORTED; + return TEST_FAIL; } - return 0; + return TEST_PASS; } int checkForImageSupport( cl_device_id device ) diff --git a/test_conformance/compatibility/test_common/harness/kernelHelpers.h b/test_conformance/compatibility/test_common/harness/kernelHelpers.h index 59c01b5f..09515e28 100644 --- a/test_conformance/compatibility/test_common/harness/kernelHelpers.h +++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.h @@ -17,6 +17,7 @@ #define _kernelHelpers_h #include "compat.h" +#include "testHarness.h" #include #include @@ -84,8 +85,8 @@ extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl /* Helper to get pixel size for a pixel format */ size_t get_pixel_bytes( const cl_image_format *fmt ); -/* Verify the given device supports images. 0 means you're good to go, otherwise an error */ -extern int verifyImageSupport( cl_device_id device ); +/* Verify the given device supports images. */ +extern test_status verifyImageSupport( cl_device_id device ); /* Checks that the given device supports images. Same as verify, but doesn't print an error */ extern int checkForImageSupport( cl_device_id device ); diff --git a/test_conformance/compatibility/test_common/harness/msvc9.c b/test_conformance/compatibility/test_common/harness/msvc9.c index 7d559bce..093bb978 100644 --- a/test_conformance/compatibility/test_common/harness/msvc9.c +++ b/test_conformance/compatibility/test_common/harness/msvc9.c @@ -13,15 +13,18 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#if defined(_WIN32) && defined (_MSC_VER) - #include "compat.h" -#include -#include -#include -#include +#if defined ( _MSC_VER ) +#include +#include + +#include + +#include + +#if ! defined( __INTEL_COMPILER ) /////////////////////////////////////////////////////////////////// // @@ -387,86 +390,6 @@ long double log2l(long double x) return 1.44269504088896340735992468100189214L * log(x); } -/////////////////////////////////////////////////////////////////// -// -// misc functions -// -/////////////////////////////////////////////////////////////////// - -/* -// This function is commented out because the Windows implementation should never call munmap. -// If it is calling it, we have a bug. Please file a bugzilla. -int munmap(void *addr, size_t len) -{ -// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html - - return (int)VirtualAlloc( (LPVOID)addr, len, - MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS ); -} -*/ - -uint64_t ReadTime( void ) -{ - LARGE_INTEGER current; - QueryPerformanceCounter(¤t); - return (uint64_t)current.QuadPart; -} - -double SubtractTime( uint64_t endTime, uint64_t startTime ) -{ - static double PerformanceFrequency = 0.0; - - if (PerformanceFrequency == 0.0) { - LARGE_INTEGER frequency; - QueryPerformanceFrequency(&frequency); - PerformanceFrequency = (double) frequency.QuadPart; - } - - return (double)(endTime - startTime) / PerformanceFrequency * 1e9; -} - -float make_nan() -{ -/* This is the IEEE 754 single-precision format: - unsigned int mantissa: 22; - unsigned int quiet_nan: 1; - unsigned int exponent: 8; - unsigned int negative: 1; -*/ - //const static unsigned - static const int32_t _nan = 0x7fc00000; - return *(const float*)(&_nan); -} - -float nanf( const char* str) -{ - cl_uint u = atoi( str ); - u |= 0x7fc00000U; - return *( float*)(&u); -} - - -double nan( const char* str) -{ - cl_ulong u = atoi( str ); - u |= 0x7ff8000000000000ULL; - return *( double*)(&u); -} - -// double check this implementatation -long double nanl( const char* str) -{ - union - { - long double f; - struct { cl_ulong m; cl_ushort sexp; }u; - }u; - u.u.sexp = 0x7fff; - u.u.m = 0x8000000000000000ULL | atoi( str ); - - return u.f; -} - double trunc(double x) { double absx = fabs(x); @@ -589,7 +512,165 @@ long double roundl(long double x) return x; } -int signbit(double x) +float cbrtf( float x ) +{ + float z = pow( fabs((double) x), 1.0 / 3.0 ); + return copysignf( z, x ); +} + +double cbrt( double x ) +{ + return copysign( pow( fabs( x ), 1.0 / 3.0 ), x ); +} + +long int lrint (double x) +{ + double absx = fabs(x); + + if( x >= (double) LONG_MAX ) + return LONG_MAX; + + if( absx < 4503599627370496.0 /* 0x1.0p52 */ ) + { + double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x ); + double rounded = x + magic; + rounded -= magic; + return (long int) rounded; + } + + return (long int) x; +} + +long int lrintf (float x) +{ + float absx = fabsf(x); + + if( x >= (float) LONG_MAX ) + return LONG_MAX; + + if( absx < 8388608.0f /* 0x1.0p23f */ ) + { + float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x ); + float rounded = x + magic; + rounded -= magic; + return (long int) rounded; + } + + return (long int) x; +} + + +/////////////////////////////////////////////////////////////////// +// +// fenv functions +// +/////////////////////////////////////////////////////////////////// + +int fetestexcept(int excepts) +{ + unsigned int status = _statusfp(); + return excepts & ( + ((status & _SW_INEXACT) ? FE_INEXACT : 0) | + ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) | + ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) | + ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) | + ((status & _SW_INVALID) ? FE_INVALID : 0) + ); +} + +int feclearexcept(int excepts) +{ + _clearfp(); + return 0; +} + +#endif // __INTEL_COMPILER + +#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 + +float make_nan() +{ +/* This is the IEEE 754 single-precision format: + unsigned int mantissa: 22; + unsigned int quiet_nan: 1; + unsigned int exponent: 8; + unsigned int negative: 1; +*/ + //const static unsigned + static const int32_t _nan = 0x7fc00000; + return *(const float*)(&_nan); +} + +float nanf( const char* str) +{ + cl_uint u = atoi( str ); + u |= 0x7fc00000U; + return *( float*)(&u); +} + + +double nan( const char* str) +{ + cl_ulong u = atoi( str ); + u |= 0x7ff8000000000000ULL; + return *( double*)(&u); +} + +// double check this implementatation +long double nanl( const char* str) +{ + union + { + long double f; + struct { cl_ulong m; cl_ushort sexp; }u; + }u; + u.u.sexp = 0x7fff; + u.u.m = 0x8000000000000000ULL | atoi( str ); + + return u.f; +} + +#endif + +/////////////////////////////////////////////////////////////////// +// +// misc functions +// +/////////////////////////////////////////////////////////////////// + +/* +// This function is commented out because the Windows implementation should never call munmap. +// If it is calling it, we have a bug. Please file a bugzilla. +int munmap(void *addr, size_t len) +{ +// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html + + return (int)VirtualAlloc( (LPVOID)addr, len, + MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS ); +} +*/ + +uint64_t ReadTime( void ) +{ + LARGE_INTEGER current; + QueryPerformanceCounter(¤t); + return (uint64_t)current.QuadPart; +} + +double SubtractTime( uint64_t endTime, uint64_t startTime ) +{ + static double PerformanceFrequency = 0.0; + + if (PerformanceFrequency == 0.0) { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + PerformanceFrequency = (double) frequency.QuadPart; + } + + return (double)(endTime - startTime) / PerformanceFrequency * 1e9; +} + +int cf_signbit(double x) { union { @@ -600,7 +681,7 @@ int signbit(double x) return u.u >> 63; } -int signbitf(float x) +int cf_signbitf(float x) { union { @@ -611,17 +692,6 @@ int signbitf(float x) return u.u >> 31; } -float cbrtf( float x ) -{ - float z = pow( fabs((double) x), 1.0 / 3.0 ); - return copysignf( z, x ); -} - -double cbrt( double x ) -{ - return copysign( pow( fabs( x ), 1.0 / 3.0 ), x ); -} - float int2float (int32_t ix) { union { @@ -642,7 +712,7 @@ int32_t float2int (float fx) return u.i; } -#if defined(_MSC_VER) && !defined(_WIN64) +#if !defined(_WIN64) /** Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined. @@ -682,45 +752,10 @@ int __builtin_clz(unsigned int pattern) return count; } -#endif //defined(_MSC_VER) && !defined(_WIN64) +#endif // !defined(_WIN64) #include #include -long int lrint (double x) -{ - double absx = fabs(x); - - if( x >= (double) LONG_MAX ) - return LONG_MAX; - - if( absx < 4503599627370496.0 /* 0x1.0p52 */ ) - { - double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x ); - double rounded = x + magic; - rounded -= magic; - return (long int) rounded; - } - - return (long int) x; -} - -long int lrintf (float x) -{ - float absx = fabsf(x); - - if( x >= (float) LONG_MAX ) - return LONG_MAX; - - if( absx < 8388608.0f /* 0x1.0p23f */ ) - { - float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x ); - float rounded = x + magic; - rounded -= magic; - return (long int) rounded; - } - - return (long int) x; -} int usleep(int usec) { @@ -728,22 +763,10 @@ int usleep(int usec) return 0; } -int fetestexcept(int excepts) +unsigned int sleep( unsigned int sec ) { - unsigned int status = _statusfp(); - return excepts & ( - ((status & _SW_INEXACT) ? FE_INEXACT : 0) | - ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) | - ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) | - ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) | - ((status & _SW_INVALID) ? FE_INVALID : 0) - ); -} - -int feclearexcept(int excepts) -{ - _clearfp(); + Sleep( sec * 1000 ); return 0; } -#endif //defined(_WIN32) +#endif // defined( _MSC_VER ) diff --git a/test_conformance/compatibility/test_common/harness/testHarness.c b/test_conformance/compatibility/test_common/harness/testHarness.c index 1994399d..af05c8d5 100644 --- a/test_conformance/compatibility/test_common/harness/testHarness.c +++ b/test_conformance/compatibility/test_common/harness/testHarness.c @@ -439,10 +439,18 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, /* If we have a device checking function, run it */ - if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS ) + if( ( deviceCheckFn != NULL ) ) { - test_finish(); - return -1; + test_status status = deviceCheckFn( device ); + switch (status) + { + case TEST_PASS: + break; + case TEST_FAIL: + return 1; + case TEST_SKIP: + return 0; + } } if (num_elements <= 0) diff --git a/test_conformance/compatibility/test_common/harness/testHarness.h b/test_conformance/compatibility/test_common/harness/testHarness.h index c2620647..7c1d9cb2 100644 --- a/test_conformance/compatibility/test_common/harness/testHarness.h +++ b/test_conformance/compatibility/test_common/harness/testHarness.h @@ -23,6 +23,13 @@ extern "C" { #endif +typedef enum test_status +{ + TEST_PASS = 0, + TEST_FAIL = 1, + TEST_SKIP = 2, +} test_status; + extern cl_uint gReSeed; extern cl_uint gRandomSeed; @@ -32,8 +39,8 @@ extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns, basefn fnList[], const char *fnNames[], int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps ); -// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits. -typedef int (*DeviceCheckFn)( cl_device_id device ); +// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits. +typedef test_status (*DeviceCheckFn)( cl_device_id device ); // Same as runTestHarness, but also supplies a function that checks the created device for required functionality. extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, diff --git a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt index 50dd64ef..4af76989 100644 --- a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt +++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt @@ -20,6 +20,7 @@ set(${MODULE_NAME}_SOURCES test_mem_object_info.cpp test_null_buffer_arg.c test_kernel_arg_info.c + test_queue_properties.cpp ../../test_common/harness/errorHelpers.c ../../test_common/harness/threadTesting.c ../../test_common/harness/testHarness.c diff --git a/test_conformance/compatibility/test_conformance/api/main.c b/test_conformance/compatibility/test_conformance/api/main.c index 1870bf8f..a8e9e993 100644 --- a/test_conformance/compatibility/test_conformance/api/main.c +++ b/test_conformance/compatibility/test_conformance/api/main.c @@ -113,6 +113,7 @@ basefn basefn_list[] = { test_get_image1d_info, test_get_image1d_array_info, test_get_image2d_array_info, + test_queue_properties, }; @@ -200,6 +201,7 @@ const char *basefn_names[] = { "get_image1d_info", "get_image1d_array_info", "get_image2d_array_info", + "queue_properties", }; ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); diff --git a/test_conformance/compatibility/test_conformance/api/procs.h b/test_conformance/compatibility/test_conformance/api/procs.h index ff796a38..e0a85356 100644 --- a/test_conformance/compatibility/test_conformance/api/procs.h +++ b/test_conformance/compatibility/test_conformance/api/procs.h @@ -105,4 +105,5 @@ extern int test_get_image1d_info( cl_device_id deviceID, cl_context context extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements ); extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements ); extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); +extern int test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); diff --git a/test_conformance/compatibility/test_conformance/api/test_api_min_max.c b/test_conformance/compatibility/test_conformance/api/test_api_min_max.c index aa733996..2dc43d74 100644 --- a/test_conformance/compatibility/test_conformance/api/test_api_min_max.c +++ b/test_conformance/compatibility/test_conformance/api/test_api_min_max.c @@ -1319,12 +1319,12 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, log_info("Reported max constant buffer size of %lld bytes.\n", maxSize); - // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE + // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - if (maxSize > maxGlobalSize / 4) - maxSize = maxGlobalSize / 4; + if (maxSize > maxGlobalSize / 8) + maxSize = maxGlobalSize / 8; error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); @@ -1422,7 +1422,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, if (allocPassed) { if (currentSize < maxSize/PASSING_FRACTION) { - log_error("Failed to allocate at least 1/4 of the reported constant size.\n"); + log_error("Failed to allocate at least 1/8 of the reported constant size.\n"); return -1; } else if (currentSize != maxSize) { log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize); diff --git a/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp b/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp new file mode 100644 index 00000000..3b368077 --- /dev/null +++ b/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp @@ -0,0 +1,174 @@ +// +// Copyright (c) 2018 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + + +#include "testBase.h" +#include "../../test_common/harness/typeWrappers.h" +#include "../../test_common/harness/conversions.h" +#include +#include +#include + +using namespace std; +/* +The test against cl_khr_create_command_queue extension. It validates if devices with Opencl 1.X can use clCreateCommandQueueWithPropertiesKHR function. +Based on device capabilities test will create queue with NULL properties, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property and +CL_QUEUE_PROFILING_ENABLE property. Finally simple kernel will be executed on such queue. +*/ + +const char *queue_test_kernel[] = { +"__kernel void vec_cpy(__global int *src, __global int *dst)\n" +"{\n" +" int tid = get_global_id(0);\n" +"\n" +" dst[tid] = src[tid];\n" +"\n" +"}\n" }; + +int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop_def, cl_device_id deviceID, clKernelWrapper& kernel, size_t num_elements) +{ + clMemWrapper streams[2]; + int error; + std::vector buf(num_elements); + clCreateCommandQueueWithPropertiesKHR_fn clCreateCommandQueueWithPropertiesKHR = NULL; + cl_platform_id platform; + clEventWrapper event; + + error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + + clCreateCommandQueueWithPropertiesKHR = (clCreateCommandQueueWithPropertiesKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clCreateCommandQueueWithPropertiesKHR"); + if (clCreateCommandQueueWithPropertiesKHR == NULL) + { + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n"); + return -1; + } + + clCommandQueueWrapper queue = clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def, &error); + test_error(error, "clCreateCommandQueueWithPropertiesKHR failed"); + + for (int i = 0; i < num_elements; ++i) + { + buf[i] = i; + } + + streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf.data(), &error); + test_error( error, "clCreateBuffer failed." ); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error); + test_error( error, "clCreateBuffer failed." ); + + error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + test_error( error, "clSetKernelArg failed." ); + + error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]); + test_error( error, "clSetKernelArg failed." ); + + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, &event); + test_error( error, "clEnqueueNDRangeKernel failed." ); + + error = clWaitForEvents(1, &event); + test_error(error, "clWaitForEvents failed."); + + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements, buf.data(), 0, NULL, NULL); + test_error( error, "clEnqueueReadBuffer failed." ); + + for (int i = 0; i < num_elements; ++i) + { + if (buf[i] != i) + { + log_error("ERROR: Incorrect vector copy result."); + return -1; + } + } + + return 0; +} + +int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + if (num_elements <= 0) + { + num_elements = 128; + } + int error = 0; + + clProgramWrapper program; + clKernelWrapper kernel; + size_t strSize; + std::string strExt(0, '\0'); + cl_queue_properties_khr device_props = NULL; + cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 }; + + // Query extension + error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &strSize); + test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed"); + strExt.resize(strSize); + error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, strExt.size(), &strExt[0], NULL); + test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed"); + log_info("CL_DEVICE_EXTENSIONS:\n%s\n\n", strExt.c_str()); + + if (strExt.find("cl_khr_create_command_queue") == string::npos) + { + log_info("extension cl_khr_create_command_queue is not supported.\n"); + return 0; + } + + error = create_single_kernel_helper(context, &program, &kernel, 1, queue_test_kernel, "vec_cpy"); + test_error(error, "create_single_kernel_helper failed"); + + log_info("Queue property NULL. Testing ... \n"); + error = enqueue_kernel(context, NULL,deviceID, kernel, (size_t)num_elements); + test_error(error, "enqueue_kernel failed"); + + error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL); + test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed"); + + if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) + { + log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE supported. Testing ... \n"); + queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements); + test_error(error, "enqueue_kernel failed"); + } else + { + log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not supported \n"); + } + + if (device_props & CL_QUEUE_PROFILING_ENABLE) + { + log_info("Queue property CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n"); + queue_prop_def[1] = CL_QUEUE_PROFILING_ENABLE; + error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements); + test_error(error, "enqueue_kernel failed"); + } else + { + log_info("Queue property CL_QUEUE_PROFILING_ENABLE not supported \n"); + } + + if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE && device_props & CL_QUEUE_PROFILING_ENABLE) + { + log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n"); + queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE; + error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements); + test_error(error, "enqueue_kernel failed"); + } + else + { + log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE or CL_QUEUE_PROFILING_ENABLE not supported \n"); + } + + return 0; +} \ No newline at end of file diff --git a/test_conformance/compatibility/test_conformance/basic/run_array b/test_conformance/compatibility/test_conformance/basic/run_array old mode 100644 new mode 100755 diff --git a/test_conformance/compatibility/test_conformance/basic/run_array_image_copy b/test_conformance/compatibility/test_conformance/basic/run_array_image_copy old mode 100644 new mode 100755 diff --git a/test_conformance/compatibility/test_conformance/basic/run_image b/test_conformance/compatibility/test_conformance/basic/run_image old mode 100644 new mode 100755 diff --git a/test_conformance/compatibility/test_conformance/basic/run_multi_read_image b/test_conformance/compatibility/test_conformance/basic/run_multi_read_image old mode 100644 new mode 100755 diff --git a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp index 6db06398..dca52b5e 100644 --- a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp +++ b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp @@ -202,10 +202,10 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu log_error( "ERROR: Results of copy did not validate!\n" ); sprintf(values + strlen( values), "%d -> [", i); for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]); + sprintf(values + strlen( values), "%2x ", inchar[j]); sprintf(values + strlen(values), "] != ["); for (int j=0; j<(int)elementSize; j++) - sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]); + sprintf(values + strlen( values), "%2x ", outchar[j]); sprintf(values + strlen(values), "]"); log_error("%s\n", values); diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagedim.c b/test_conformance/compatibility/test_conformance/basic/test_imagedim.c index 2dbaaa7b..81de4cbc 100644 --- a/test_conformance/compatibility/test_conformance/basic/test_imagedim.c +++ b/test_conformance/compatibility/test_conformance/basic/test_imagedim.c @@ -16,6 +16,7 @@ #include "../../test_common/harness/compat.h" #include +#include #include #include #include @@ -112,6 +113,10 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n", max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0)); + if (max_mem_size > (cl_ulong)SIZE_MAX) { + max_mem_size = (cl_ulong)SIZE_MAX; + } + cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); test_error(err, "clCreateSampler failed"); @@ -182,7 +187,7 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que size_t origin[3] = {0,0,0}; size_t region[3] = {img_width, img_height, 1}; - err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL); + err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL); if (err != CL_SUCCESS) { log_error("clWriteImage failed\n"); @@ -324,6 +329,10 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue max_img_width = (int)max_image2d_width; max_img_height = (int)max_image2d_height; + if (max_mem_size > (cl_ulong)SIZE_MAX) { + max_mem_size = (cl_ulong)SIZE_MAX; + } + // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel, // and we want to consume 1/4 of global memory (this is the minimum required to be // supported by the spec) @@ -351,6 +360,9 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0)); d = init_genrand( gRandomSeed ); + input_ptr = generate_8888_image(max_img_width, max_img_height, d); + output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height); + int plus_minus; for (plus_minus=0; plus_minus < 3; plus_minus++) { @@ -390,9 +402,6 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue break; } - input_ptr = generate_8888_image(effective_img_width, effective_img_height, d); - output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * effective_img_width * effective_img_height); - img_format.image_channel_order = CL_RGBA; img_format.image_channel_data_type = CL_UNORM_INT8; streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, effective_img_width, effective_img_height, 0, NULL, NULL); @@ -419,7 +428,7 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue size_t origin[3] = {0,0,0}; size_t region[3] = {effective_img_width, effective_img_height, 1}; - err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL); + err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL); if (err != CL_SUCCESS) { log_error("clWriteImage failed\n"); @@ -484,14 +493,14 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue clReleaseMemObject(streams[0]); clReleaseMemObject(streams[1]); - free(input_ptr); - free(output_ptr); } } } // cleanup + free(input_ptr); + free(output_ptr); free_mtdata(d); clReleaseSampler(sampler); clReleaseKernel(kernel); diff --git a/test_conformance/compatibility/test_conformance/basic/test_sizeof.c b/test_conformance/compatibility/test_conformance/basic/test_sizeof.c index bece1b94..9c56bb58 100644 --- a/test_conformance/compatibility/test_conformance/basic/test_sizeof.c +++ b/test_conformance/compatibility/test_conformance/basic/test_sizeof.c @@ -133,7 +133,7 @@ const size_table vector_table[] = const char *ptr_table[] = { - "void*", + "global void*", "size_t", "sizeof(int)", // check return type of sizeof "ptrdiff_t" diff --git a/test_conformance/compatibility/test_conformance/images/image_helpers.h b/test_conformance/compatibility/test_conformance/images/image_helpers.h index d2132c7e..cd0eff6c 100644 --- a/test_conformance/compatibility/test_conformance/images/image_helpers.h +++ b/test_conformance/compatibility/test_conformance/images/image_helpers.h @@ -481,7 +481,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor * extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine ); extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler); extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter ); -extern int issubnormal(float); #define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) ) diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index 3a03e1d5..cfb01a34 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -54,6 +54,9 @@ const char *known_extensions[] = { "cl_khr_egl_image", "cl_khr_egl_event", "cl_khr_il_program", + "cl_khr_create_command_queue", + "cl_khr_throttle_hints", + "cl_khr_priority_hints", }; size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*); diff --git a/test_conformance/contractions/contractions.c b/test_conformance/contractions/contractions.c index 6fb444b9..cb9069b0 100644 --- a/test_conformance/contractions/contractions.c +++ b/test_conformance/contractions/contractions.c @@ -515,6 +515,8 @@ static void PrintArch( void ) vlog( "\tARCH:\tx86_64\n" ); #elif defined( __arm__ ) vlog( "\tARCH:\tarm\n" ); +#elif defined( __aarch64__ ) + vlog( "\tARCH:\taarch64\n" ); #else vlog( "\tARCH:\tunknown\n" ); #endif diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt index 35acc7a6..4c65c69d 100644 --- a/test_conformance/conversions/CMakeLists.txt +++ b/test_conformance/conversions/CMakeLists.txt @@ -19,6 +19,7 @@ set (${MODULE_NAME}_SOURCES ../../test_common/harness/errorHelpers.c ../../test_common/harness/kernelHelpers.c ../../test_common/harness/testHarness.c + ../../test_common/harness/parseParameters.cpp ) if(ANDROID) diff --git a/test_conformance/conversions/basic_test_conversions.c b/test_conformance/conversions/basic_test_conversions.c index 73f38633..bfd31b27 100644 --- a/test_conformance/conversions/basic_test_conversions.c +++ b/test_conformance/conversions/basic_test_conversions.c @@ -751,16 +751,11 @@ static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) (( static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; } static void ulong2float( void *out, void *in) { -#if defined(_MSC_VER) +#if defined(_MSC_VER) && defined(_M_X64) cl_ulong l = ((cl_ulong*) in)[0]; float result; - cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; -#if defined(_M_X64) _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); -#else - result = sl; -#endif ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result)); #else cl_ulong l = ((cl_ulong*) in)[0]; diff --git a/test_conformance/conversions/test_conversions.c b/test_conformance/conversions/test_conversions.c index 5b835343..fa04909b 100644 --- a/test_conformance/conversions/test_conversions.c +++ b/test_conformance/conversions/test_conversions.c @@ -18,6 +18,7 @@ #include "../../test_common/harness/ThreadPool.h" #include "../../test_common/harness/testHarness.h" #include "../../test_common/harness/kernelHelpers.h" +#include "../../test_common/harness/parseParameters.h" #if !defined(_WIN32) && !defined(__ANDROID__) #include #endif @@ -98,6 +99,7 @@ cl_mem gOutBuffers[ kCallStyleCount ]; size_t gComputeDevices = 0; uint32_t gDeviceFrequency = 0; int gWimpyMode = 0; +int gWimpyReductionFactor = 128; int gSkipTesting = 0; int gForceFTZ = 0; int gMultithread = 1; @@ -438,6 +440,9 @@ static int ParseArgs( int argc, const char **argv ) case 'w': gWimpyMode ^= 1; break; + case '[': + parseWimpyReductionFactor(arg, gWimpyReductionFactor); + break; case 'z': gForceFTZ ^= 1; break; @@ -540,6 +545,7 @@ static int ParseArgs( int argc, const char **argv ) vlog( "*** WARNING: Testing in Wimpy mode! ***\n" ); vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" ); vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" ); + vlog("*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor); } return 0; @@ -566,6 +572,7 @@ static void PrintUsage( void ) vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" ); vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" ); vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" ); + vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor); vlog( "\t\t-z\tToggle flush to zero mode (Default: per device)\n" ); vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" ); vlog( "\n" ); @@ -1243,15 +1250,12 @@ static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode r if ( !gWimpyMode && gIsEmbedded ) step = blockCount * EMBEDDED_REDUCTION_FACTOR; + if ( gWimpyMode ) + step = (size_t)blockCount * (size_t)gWimpyReductionFactor; vlog( "Testing... " ); fflush(stdout); for( i = 0; i < (uint64_t)lastCase; i += step ) { - if (gWimpyMode) { - uint64_t blockIndex = (i / blockCount) & 0xFF; - if (blockIndex != 0 && blockIndex != 0xFF) - continue; - } if( 0 == ( i & ((lastCase >> 3) -1))) { vlog("."); diff --git a/test_conformance/d3d10/harness.cpp b/test_conformance/d3d10/harness.cpp index 5caee9bb..13829760 100644 --- a/test_conformance/d3d10/harness.cpp +++ b/test_conformance/d3d10/harness.cpp @@ -200,7 +200,7 @@ cl_int HarnessD3D10_CreateDevice(IDXGIAdapter* pAdapter, ID3D10Device **ppDevice pAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, - D3D10_CREATE_DEVICE_DEBUG, + 0, D3D10_SDK_VERSION, &sd, &HarnessD3D10_pSwapChain, diff --git a/test_conformance/device_execution/execute_block.cpp b/test_conformance/device_execution/execute_block.cpp index e10b7c6e..99589605 100644 --- a/test_conformance/device_execution/execute_block.cpp +++ b/test_conformance/device_execution/execute_block.cpp @@ -928,15 +928,16 @@ static const char* block_barrier[] = NL, " size_t gid = get_group_id(0);" NL, " size_t idx = gid*lsz;" NL, "" + NL, " res[tid]=lsz;" + NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, " int (^kernelBlock)(int) = ^(int a)" NL, " {" - NL, " atomic_inc(res+idx);" + NL, " atomic_dec(res+idx);" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " return (int)abs(a - b) - (res[idx] != lsz ? 0 : 1);" + NL, " return (int)abs(a - b) - (res[idx] != 0 ? 0 : 1);" NL, " };" NL, "" NL, " int d = kernelBlock(2);" - NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, " res[tid] = d;" NL, "}" NL diff --git a/test_conformance/device_execution/main.c b/test_conformance/device_execution/main.c index 3aa67de3..6d247d02 100644 --- a/test_conformance/device_execution/main.c +++ b/test_conformance/device_execution/main.c @@ -65,7 +65,7 @@ ct_assert(arr_size(commonfn_names) == arr_size(basefn_list)) static const int num_commonfns = arr_size(commonfn_names); -int deviceCheck(cl_device_id device) +test_status deviceCheck(cl_device_id device) { static const char expected_cl[] = "OpenCL 2.0"; static const char expected_clc[] = "OpenCL C 2.0"; @@ -77,7 +77,7 @@ int deviceCheck(cl_device_id device) if(res != CL_SUCCESS || ret_len < strlen(expected_cl) || strncmp(version, expected_cl, strlen(expected_cl))) { log_info("Device does not support '%s'. Skipping the test.\n", expected_cl); - return CL_INVALID_DEVICE; + return TEST_FAIL; } version[0] = 0; @@ -87,10 +87,10 @@ int deviceCheck(cl_device_id device) if(res != CL_SUCCESS || ret_len < strlen(expected_clc) || strncmp(version, expected_clc, strlen(expected_clc))) { log_info("Device does not support '%s'. Skipping the test.\n", expected_clc); - return CL_INVALID_DEVICE; + return TEST_FAIL; } - return CL_SUCCESS; + return TEST_PASS; } int diff --git a/test_conformance/device_partition/Jamfile b/test_conformance/device_partition/Jamfile index 0e1d4d6a..3bf4b29f 100644 --- a/test_conformance/device_partition/Jamfile +++ b/test_conformance/device_partition/Jamfile @@ -1,32 +1,32 @@ -project - : requirements - gcc:-xc++ - msvc:"/TP" - ; - -exe test_device_partition - : main.c - test_device_partition.cpp - ../../test_common/harness/errorHelpers.c - ../../test_common/harness/threadTesting.c - ../../test_common/harness/testHarness.c - ../../test_common/harness/kernelHelpers.c - ../../test_common/harness/genericThread.cpp - ../../test_common/harness/mt19937.c - ../../test_common/harness/conversions.c - ../../test_common/harness/typeWrappers.cpp - : windows:../../test_common/harness/msvc9.c - ; - -install dist - : test_device_partition - : debug:$(DIST)/debug/tests/conformance/1.2/x86/device_partition - release:$(DIST)/release/tests/conformance/1.2/x86/device_partition - ; - -install dist - : test_device_partition - : debug:$(DIST)/debug/tests/conformance/1.2/x86_64/device_partition - release:$(DIST)/release/tests/conformance/1.2/x86_64/device_partition - 64 - ; +project + : requirements + gcc:-xc++ + msvc:"/TP" + ; + +exe test_device_partition + : main.c + test_device_partition.cpp + ../../test_common/harness/errorHelpers.c + ../../test_common/harness/threadTesting.c + ../../test_common/harness/testHarness.c + ../../test_common/harness/kernelHelpers.c + ../../test_common/harness/genericThread.cpp + ../../test_common/harness/mt19937.c + ../../test_common/harness/conversions.c + ../../test_common/harness/typeWrappers.cpp + : windows:../../test_common/harness/msvc9.c + ; + +install dist + : test_device_partition + : debug:$(DIST)/debug/tests/conformance/1.2/x86/device_partition + release:$(DIST)/release/tests/conformance/1.2/x86/device_partition + ; + +install dist + : test_device_partition + : debug:$(DIST)/debug/tests/conformance/1.2/x86_64/device_partition + release:$(DIST)/release/tests/conformance/1.2/x86_64/device_partition + 64 + ; diff --git a/test_conformance/geometrics/test_geometrics_double.cpp b/test_conformance/geometrics/test_geometrics_double.cpp index 2d258d5d..34bd1933 100644 --- a/test_conformance/geometrics/test_geometrics_double.cpp +++ b/test_conformance/geometrics/test_geometrics_double.cpp @@ -203,7 +203,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command return -1; /* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */ - for( i = 0; i < TEST_SIZE * vecsize; i++ ) + for( i = 0; i < size * vecsize; i++ ) { inDataA[ i ] = get_random_double( -512.f, 512.f, d ); inDataB[ i ] = get_random_double( -512.f, 512.f, d ); @@ -237,7 +237,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command } /* Run the kernel */ - threads[0] = TEST_SIZE; + threads[0] = size; error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); test_error( error, "Unable to get work group size to use" ); diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp index fa0c00a6..06a3f393 100644 --- a/test_conformance/gl/test_image_methods.cpp +++ b/test_conformance/gl/test_image_methods.cpp @@ -34,7 +34,8 @@ typedef struct image_kernel_data cl_int numSamples; }; -static const char *methodTestKernelPattern = +static const char *methodTestKernelPattern = +"%s" "typedef struct {\n" " int width;\n" " int height;\n" @@ -75,6 +76,8 @@ static const char *channelOrderConstLine = " outData->expectedChannelOrder = CLK_%s;\n"; static const char *numSamplesKernelLine = " outData->numSamples = get_image_num_samples( input );\n"; +static const char *enableMSAAKernelLine = +"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"; static int verify(cl_int input, cl_int kernelOutput, const char * description) { @@ -185,6 +188,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma bool doImageChannelOrder = false; bool doImageDim = false; bool doNumSamples = false; + bool doMSAA = false; switch(target) { case GL_TEXTURE_2D: imageType = "image2d_depth_t"; @@ -206,6 +210,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma break; case GL_TEXTURE_2D_MULTISAMPLE: doNumSamples = true; + doMSAA = true; if(format.formattype == GL_DEPTH_COMPONENT) { doImageWidth = true; imageType = "image2d_msaa_depth_t"; @@ -214,6 +219,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma } break; case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + doMSAA = true; if(format.formattype == GL_DEPTH_COMPONENT) { doImageWidth = true; imageType = "image2d_msaa_array_depth_t"; @@ -244,9 +250,11 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma } } - // Create a program to run against - sprintf( programSrc, methodTestKernelPattern, - imageType, + // Create a program to run against + sprintf(programSrc, + methodTestKernelPattern, + ( doMSAA ) ? enableMSAAKernelLine : "", + imageType, ( doArraySize ) ? arraySizeKernelLine : "", ( doImageWidth ) ? imageWidthKernelLine : "", ( doImageHeight ) ? imageHeightKernelLine : "", @@ -265,7 +273,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma if (error) print_error(error, "clFinish failed.\n"); const char *ptr = programSrc; - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" ); + error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" ); test_error( error, "Unable to create kernel to test against" ); // Create an output buffer diff --git a/test_conformance/gl/test_images_read_common.cpp b/test_conformance/gl/test_images_read_common.cpp index 068b9a29..f9b3e302 100644 --- a/test_conformance/gl/test_images_read_common.cpp +++ b/test_conformance/gl/test_images_read_common.cpp @@ -107,6 +107,7 @@ static const char *kernelpattern_image_read_2darray_depth = "}\n"; static const char *kernelpattern_image_multisample_read_2d = +"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" "__kernel void sample_test( read_only image2d_msaa_t source, sampler_t sampler, __global %s4 *results )\n" "{\n" " int tidX = get_global_id(0);\n" @@ -121,6 +122,7 @@ static const char *kernelpattern_image_multisample_read_2d = "}\n"; static const char *kernelpattern_image_multisample_read_2d_depth = + "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" "__kernel void sample_test( read_only image2d_msaa_depth_t source, sampler_t sampler, __global %s *results )\n" "{\n" " int tidX = get_global_id(0);\n" @@ -135,6 +137,7 @@ static const char *kernelpattern_image_multisample_read_2d_depth = "}\n"; static const char *kernelpattern_image_multisample_read_2darray = +"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" "__kernel void sample_test( read_only image2d_array_msaa_t source, sampler_t sampler, __global %s4 *results )\n" "{\n" " int tidX = get_global_id(0);\n" @@ -151,6 +154,7 @@ static const char *kernelpattern_image_multisample_read_2darray = "}\n"; static const char *kernelpattern_image_multisample_read_2darray_depth = + "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n" "__kernel void sample_test( read_only image2d_array_msaa_depth_t source, sampler_t sampler, __global %s *results )\n" "{\n" " int tidX = get_global_id(0);\n" @@ -248,8 +252,8 @@ int test_cl_image_read( cl_context context, cl_command_queue queue, get_kernel_suffix( outFormat ) ); programPtr = kernelSource; - if( create_single_kernel_helper( context, &program, &kernel, 1, - (const char **)&programPtr, "sample_test" ) ) + if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, + (const char **)&programPtr, "sample_test", "-cl-std=CL2.0" ) ) { return -1; } diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp index 7b91f585..1345f46f 100644 --- a/test_conformance/gl/test_images_write_common.cpp +++ b/test_conformance/gl/test_images_write_common.cpp @@ -333,8 +333,8 @@ int test_cl_image_write( cl_context context, cl_command_queue queue, get_explicit_type_name( *outType ), suffix, convert); programPtr = kernelSource; - if( create_single_kernel_helper( context, &program, &kernel, 1, - (const char **)&programPtr, "sample_test" ) ) + if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, + (const char **)&programPtr, "sample_test", "-cl-std=CL2.0" ) ) { return -1; } diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp index e7aeb938..0b61702a 100644 --- a/test_conformance/gles/main.cpp +++ b/test_conformance/gles/main.cpp @@ -101,16 +101,14 @@ const char *basefn_names[] = { "images_write_cube", "renderbuffer_read", "renderbuffer_write", - "renderbuffer_getinfo", - "all" + "renderbuffer_getinfo" }; const char *basefn_names32[] = { - "fence_sync", - "all" + "fence_sync" }; -ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); +ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); int num_fns = sizeof(basefn_names) / sizeof(char *); int num_fns32 = sizeof(basefn_names32) / sizeof(char *); @@ -386,17 +384,17 @@ int main(int argc, const char *argv[]) // Intentional falling through cleanup: - // Cleanup EGL - glEnv->terminate_egl_display(); - // Always make sure that OpenCL context is released properly when the test exit if(sCurrentContext) { clReleaseContext( sCurrentContext ); sCurrentContext = NULL; } + + // Cleanup EGL + glEnv->terminate_egl_display(); + delete glEnv; - return error; -} \ No newline at end of file +} diff --git a/test_conformance/half/CMakeLists.txt b/test_conformance/half/CMakeLists.txt index 46931eb2..6e92a888 100644 --- a/test_conformance/half/CMakeLists.txt +++ b/test_conformance/half/CMakeLists.txt @@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES ../../test_common/harness/kernelHelpers.c ../../test_common/harness/ThreadPool.c ../../test_common/harness/testHarness.c + ../../test_common/harness/parseParameters.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/half/Test_roundTrip.c b/test_conformance/half/Test_roundTrip.c index d941405b..f55f65d3 100644 --- a/test_conformance/half/Test_roundTrip.c +++ b/test_conformance/half/Test_roundTrip.c @@ -161,7 +161,7 @@ int Test_roundTrip( cl_device_id deviceID, cl_context context, cl_command_queue // Figure out how many elements are in a work block size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); - size_t blockCount = (size_t)getBufferSize(gDevice) / elementSize; //elementSize is a power of two + size_t blockCount = (size_t)gBufferSize / elementSize; //elementSize is a power of two uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half size_t stride = blockCount; diff --git a/test_conformance/half/Test_vLoadHalf.c b/test_conformance/half/Test_vLoadHalf.c index 81c76ead..3f8b53cd 100644 --- a/test_conformance/half/Test_vLoadHalf.c +++ b/test_conformance/half/Test_vLoadHalf.c @@ -454,7 +454,7 @@ int Test_vLoadHalf_private( bool aligned ) // Figure out how many elements are in a work block size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); - size_t blockCount = getBufferSize(gDevice) / elementSize; // elementSize is power of 2 + size_t blockCount = gBufferSize / elementSize; // elementSize is power of 2 uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half // we handle 64-bit types a bit differently. @@ -504,7 +504,7 @@ int Test_vLoadHalf_private( bool aligned ) continue; } */ - memset_pattern4( gOut_single, &pattern, getBufferSize(gDevice)); + memset_pattern4( gOut_single, &pattern, gBufferSize); if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) ) { vlog_error( "Failure in clWriteArray\n" ); diff --git a/test_conformance/half/Test_vStoreHalf.c b/test_conformance/half/Test_vStoreHalf.c index 80b74ea2..5d9a1b0a 100644 --- a/test_conformance/half/Test_vStoreHalf.c +++ b/test_conformance/half/Test_vStoreHalf.c @@ -1045,7 +1045,7 @@ int Test_vStoreHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const c size_t stride = blockCount; if (gWimpyMode) - stride = 0x10000000U; + stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor; // we handle 64-bit types a bit differently. if( lastCase == 0 ) @@ -1654,7 +1654,7 @@ int Test_vStoreaHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const size_t stride = blockCount; if (gWimpyMode) - stride = 0x10000000U; + stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor; // we handle 64-bit types a bit differently. if( lastCase == 0 ) diff --git a/test_conformance/half/cl_utils.c b/test_conformance/half/cl_utils.c index 0c587805..4a6123b7 100644 --- a/test_conformance/half/cl_utils.c +++ b/test_conformance/half/cl_utils.c @@ -61,8 +61,10 @@ size_t gMaxThreadGroupSize = 0; size_t gWorkGroupSize = 0; int gFailCount = 0; bool gWimpyMode = false; +int gWimpyReductionFactor = 512; int gTestDouble = 0; uint32_t gDeviceIndex = 0; +size_t gBufferSize = 0; #if defined( __APPLE__ ) int gReportTimes = 1; @@ -178,17 +180,19 @@ int InitCL( void ) #if defined( __APPLE__ ) // FIXME: use clProtectedArray #endif + gBufferSize = getBufferSize(gDevice); + //Allocate buffers - gIn_half = malloc( getBufferSize(gDevice)/2 ); + gIn_half = malloc( gBufferSize/2 ); gOut_half = malloc( BUFFER_SIZE/2 ); gOut_half_reference = malloc( BUFFER_SIZE/2 ); gOut_half_reference_double = malloc( BUFFER_SIZE/2 ); gIn_single = malloc( BUFFER_SIZE ); - gOut_single = malloc( getBufferSize(gDevice) ); - gOut_single_reference = malloc( getBufferSize(gDevice) ); + gOut_single = malloc( gBufferSize ); + gOut_single_reference = malloc( gBufferSize ); gIn_double = malloc( 2*BUFFER_SIZE ); - // gOut_double = malloc( (2*getBufferSize(gDevice)) ); - // gOut_double_reference = malloc( (2*getBufferSize(gDevice)) ); + // gOut_double = malloc( (2*gBufferSize) ); + // gOut_double_reference = malloc( (2*gBufferSize) ); if ( NULL == gIn_half || NULL == gOut_half || @@ -201,7 +205,7 @@ int InitCL( void ) ) return -3; - gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(gDevice) / 2, NULL, &error); + gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, gBufferSize / 2, NULL, &error); if( gInBuffer_half == NULL ) { vlog_error( "clCreateArray failed for input (%d)\n", error ); @@ -229,7 +233,7 @@ int InitCL( void ) return -5; } - gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(gDevice), NULL, &error ); + gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, gBufferSize, NULL, &error ); if( gOutBuffer_single == NULL ) { vlog_error( "clCreateArray failed for output (%d)\n", error ); @@ -237,7 +241,7 @@ int InitCL( void ) } #if 0 - gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(gDevice)), NULL, &error ); + gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*gBufferSize), NULL, &error ); if( gOutBuffer_double == NULL ) { vlog_error( "clCreateArray failed for output (%d)\n", error ); @@ -310,6 +314,15 @@ void ReleaseCL(void) // clReleaseMemObject(gOutBuffer_double); clReleaseCommandQueue(gQueue); clReleaseContext(gContext); + + free(gIn_half); + free(gOut_half); + free(gOut_half_reference); + free(gOut_half_reference_double); + free(gIn_single); + free(gOut_single); + free(gOut_single_reference); + free(gIn_double); } cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) { @@ -427,21 +440,30 @@ size_t getBufferSize(cl_device_id device_id) if(s_initialized == 0 || s_device_id != device_id) { - cl_ulong result; + cl_ulong result, maxGlobalSize; cl_int err = clGetDeviceInfo (device_id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(result), (void *)&result, NULL); if(err) { - vlog_error("clGetDeviceInfo() failed\n"); + vlog_error("clGetDeviceInfo(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) failed\n"); s_result = 64*1024; goto exit; } + log_info("Const buffer size is %llx (%llu)\n", result, result); + err = clGetDeviceInfo (device_id, + CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(maxGlobalSize), (void *)&maxGlobalSize, + NULL); + if(err) + { + vlog_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed\n"); + goto exit; + } result = result / 2; - if (result > BUFFER_SIZE) - result = BUFFER_SIZE; - log_info("Using const buffer size 0x%lx (%lu)\n", (unsigned long)result, (unsigned long)result); + if(maxGlobalSize < result * 10) + result = result / 10; s_initialized = 1; s_device_id = device_id; s_result = result; diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h index 6299d3de..aba31e2f 100644 --- a/test_conformance/half/cl_utils.h +++ b/test_conformance/half/cl_utils.h @@ -71,11 +71,13 @@ extern size_t gWorkGroupSize; extern int gFailCount; extern int gTestDouble; extern int gReportTimes; +extern size_t gBufferSize; // gWimpyMode indicates if we run the test in wimpy mode where we limit the // size of 32 bit ranges to a much smaller set. This is meant to be used // as a smoke test extern bool gWimpyMode; +extern int gWimpyReductionFactor; uint64_t ReadTime( void ); double SubtractTime( uint64_t endTime, uint64_t startTime ); diff --git a/test_conformance/half/main.c b/test_conformance/half/main.c index 21b2226f..a09639c3 100644 --- a/test_conformance/half/main.c +++ b/test_conformance/half/main.c @@ -28,6 +28,7 @@ #include "../../test_common/harness/testHarness.h" #include "../../test_common/harness/mingw_compat.h" +#include "../../test_common/harness/parseParameters.h" #if defined (__MINGW32__) #include #endif @@ -260,7 +261,9 @@ static int ParseArgs( int argc, const char **argv ) case 'w': // Wimpy mode gWimpyMode = true; break; - + case '[': + parseWimpyReductionFactor( arg, gWimpyReductionFactor); + break; default: vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg ); PrintUsage(); @@ -302,6 +305,7 @@ static int ParseArgs( int argc, const char **argv ) vlog( "*** WARNING: Testing in Wimpy mode! ***\n" ); vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" ); vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" ); + vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor); } return 0; } @@ -312,6 +316,7 @@ static void PrintUsage( void ) vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" ); vlog( "\t\t-t\tToggle reporting performance data.\n" ); vlog( "\t\t-w\tRun in wimpy mode\n" ); + vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor); vlog( "\t\t-h\tHelp\n" ); for( int i = 0; i < num_fns; i++ ) { @@ -334,6 +339,8 @@ static void PrintArch( void ) vlog( "ARCH:\tx86_64\n" ); #elif defined( __arm__ ) vlog( "ARCH:\tarm\n" ); +#elif defined( __aarch64__ ) + vlog( "\tARCH:\taarch64\n" ); #else #error unknown arch #endif diff --git a/test_conformance/headers/CMakeLists.txt b/test_conformance/headers/CMakeLists.txt index 1fdccaa6..82e2d397 100644 --- a/test_conformance/headers/CMakeLists.txt +++ b/test_conformance/headers/CMakeLists.txt @@ -3,6 +3,7 @@ set(HEADERS_SOURCES ../../test_common/harness/errorHelpers.c ../../test_common/harness/kernelHelpers.c ../../test_common/harness/testHarness.c + ../../test_common/harness/msvc9.c ) set_source_files_properties(${HEADERS_SOURCES} PROPERTIES LANGUAGE CXX) @@ -24,6 +25,8 @@ set(CL_H_SOURCES test_cl.h.c ) +set_source_files_properties(${CL_H_SOURCES} PROPERTIES LANGUAGE CXX) + set(CL_H_OUT ${CONFORMANCE_PREFIX}cl_h${CONFORMANCE_SUFFIX}) add_executable( @@ -41,6 +44,8 @@ set(CL_PLATFORM_H_SOURCES test_cl_platform.h.c ) +set_source_files_properties(${CL_PLATFORM_H_SOURCES} PROPERTIES LANGUAGE CXX) + set(CL_PLATFORM_H_OUT ${CONFORMANCE_PREFIX}cl_platform_h${CONFORMANCE_SUFFIX}) add_executable( @@ -58,6 +63,8 @@ set(CL_GL_H_SOURCES test_cl_gl.h.c ) +set_source_files_properties(${CL_GL_H_SOURCES} PROPERTIES LANGUAGE CXX) + set(CL_GL_H_OUT ${CONFORMANCE_PREFIX}cl_gl_h${CONFORMANCE_SUFFIX}) add_executable( @@ -75,6 +82,8 @@ set(OPENCL_H_SOURCES test_opencl.h.c ) +set_source_files_properties(${OPENCL_H_SOURCES} PROPERTIES LANGUAGE CXX) + set(OPENCL_H_OUT ${CONFORMANCE_PREFIX}opencl_h${CONFORMANCE_SUFFIX}) add_executable( @@ -88,5 +97,4 @@ TARGET_LINK_LIBRARIES(${OPENCL_H_OUT} ${CLConform_LIBRARIES}) ######################################################################################## - # end of file # diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp index 2ec35f6b..d88a49f0 100644 --- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp +++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp @@ -152,9 +152,27 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format, imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding); - log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] ); - if ( gDebugTrace ) - log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] ); + // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that + // image, the result array, plus offset arrays, will fit in the global ram space + cl_ulong size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4; + + while (size > maxAllocSize || (size * 3) > memSize) { + if (imageInfo.arraySize == 1) { + // arraySize cannot be 0. + break; + } + imageInfo.arraySize--; + size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4; + } + + while (size > maxAllocSize || (size * 3) > memSize) { + imageInfo.height--; + imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch; + size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4; + } + + log_info( "Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize); + if ( test_fill_image_2D_array( device, &imageInfo, outputType, seed ) ) return -1; } diff --git a/test_conformance/images/clGetInfo/main.cpp b/test_conformance/images/clGetInfo/main.cpp index df3a03e1..3deeed49 100644 --- a/test_conformance/images/clGetInfo/main.cpp +++ b/test_conformance/images/clGetInfo/main.cpp @@ -57,11 +57,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i return test_image_set( device, CL_MEM_OBJECT_IMAGE3D ); } -int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY ); } -int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY ); } @@ -70,16 +70,16 @@ basefn basefn_list[] = { test_1D, test_2D, test_3D, - test_1DArray, - test_2DArray, + test_1Darray, + test_2Darray, }; const char *basefn_names[] = { "1D", "2D", "3D", - "1DArray", - "2DArray", + "1Darray", + "2Darray", }; ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); diff --git a/test_conformance/images/clReadWriteImage/main.cpp b/test_conformance/images/clReadWriteImage/main.cpp index 260ba791..cfea18e2 100644 --- a/test_conformance/images/clReadWriteImage/main.cpp +++ b/test_conformance/images/clReadWriteImage/main.cpp @@ -52,11 +52,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i { return test_image_set( device, CL_MEM_OBJECT_IMAGE3D ); } -int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY ); } -int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY ); } @@ -65,16 +65,16 @@ basefn basefn_list[] = { test_1D, test_2D, test_3D, - test_1DArray, - test_2DArray, + test_1Darray, + test_2Darray, }; const char *basefn_names[] = { "1D", "2D", "3D", - "1DArray", - "2DArray", + "1Darray", + "2Darray", }; ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); diff --git a/test_conformance/images/kernel_image_methods/main.cpp b/test_conformance/images/kernel_image_methods/main.cpp index 22cdd698..e10da63d 100644 --- a/test_conformance/images/kernel_image_methods/main.cpp +++ b/test_conformance/images/kernel_image_methods/main.cpp @@ -52,11 +52,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i { return test_image_set( device, CL_MEM_OBJECT_IMAGE3D ); } -int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY ); } -int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY ); } @@ -65,16 +65,16 @@ basefn basefn_list[] = { test_1D, test_2D, test_3D, - test_1DArray, - test_2DArray, + test_1Darray, + test_2Darray, }; const char *basefn_names[] = { "1D", "2D", "3D", - "1DArray", - "2DArray", + "1Darray", + "2Darray", }; ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); diff --git a/test_conformance/images/samplerlessReads/main.cpp b/test_conformance/images/samplerlessReads/main.cpp index 587e612d..f14d4e1f 100644 --- a/test_conformance/images/samplerlessReads/main.cpp +++ b/test_conformance/images/samplerlessReads/main.cpp @@ -65,11 +65,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i { return test_image_set( device, CL_MEM_OBJECT_IMAGE3D ); } -int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY ); } -int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY ); } @@ -78,16 +78,16 @@ basefn basefn_list[] = { test_1D, test_2D, test_3D, - test_1DArray, - test_2DArray, + test_1Darray, + test_2Darray, }; const char *basefn_names[] = { "1D", "2D", "3D", - "1DArray", - "2DArray", + "1Darray", + "2Darray", }; ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 2f4900c2..9eb2f8ae 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -23,6 +23,10 @@ set(${MODULE_NAME}_SOURCES ../../test_common/harness/ThreadPool.c ../../test_common/harness/mt19937.c ../../test_common/harness/msvc9.c + ../../test_common/harness/kernelHelpers.c + ../../test_common/harness/errorHelpers.c + ../../test_common/harness/testHarness.c + ../../test_common/harness/parseParameters.cpp ) @@ -46,26 +50,7 @@ set_source_files_properties( endif(NOT ANDROID) set_source_files_properties( - FunctionList.c - Sleep.c - binary.c - binaryOperator.c - Utility.c - binary_i.c - binary_two_results_i.c - i_unary.c - macro_binary.c - macro_unary.c - mad.c - main.c - reference_math.c - ternary.c - unary.c - unary_two_results.c - unary_two_results_i.c unary_u.c - ../../test_common/harness/rounding_mode.c - ../../test_common/harness/ThreadPool.c - ../../test_common/harness/msvc9.c + ${MODULE_NAME}_SOURCES PROPERTIES LANGUAGE CXX) if(CMAKE_COMPILER_IS_GNUCC) diff --git a/test_conformance/math_brute_force/FunctionList.h b/test_conformance/math_brute_force/FunctionList.h index 346654fb..b5ddb715 100644 --- a/test_conformance/math_brute_force/FunctionList.h +++ b/test_conformance/math_brute_force/FunctionList.h @@ -85,7 +85,7 @@ typedef struct Func float relaxed_error; int ftz; int relaxed; - const ::vtbl *vtbl; + const vtbl *vtbl_ptr; }Func; diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h index 13651037..0dd1826c 100644 --- a/test_conformance/math_brute_force/Utility.h +++ b/test_conformance/math_brute_force/Utility.h @@ -26,11 +26,7 @@ #include #include "../../test_common/harness/rounding_mode.h" #include "../../test_common/harness/fpcontrol.h" - -#if defined( _WIN32) && defined (_MSC_VER) #include "../../test_common/harness/testHarness.h" -#endif - #include "../../test_common/harness/ThreadPool.h" #define BUFFER_SIZE (1024*1024*2) @@ -112,7 +108,7 @@ extern "C" { float Abs_Error( float test, double reference ); float Ulp_Error( float test, double reference ); //float Ulp_Error_Half( float test, double reference ); -float Ulp_Error_Double( double test, long double reference ); +float Bruteforce_Ulp_Error_Double( double test, long double reference ); #ifdef __cplusplus } //extern "C" #endif diff --git a/test_conformance/math_brute_force/binary.c b/test_conformance/math_brute_force/binary.c index 5800aaf5..7bf136ff 100644 --- a/test_conformance/math_brute_force/binary.c +++ b/test_conformance/math_brute_force/binary.c @@ -233,6 +233,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps @@ -268,6 +269,16 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -341,7 +352,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter) // Run the kernels if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) @@ -991,6 +1002,16 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; @@ -1063,7 +1084,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) @@ -1359,7 +1380,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { cl_double test = ((cl_double*) q)[j]; long double correct = func.f_ff( s[j], s2[j] ); - float err = Ulp_Error_Double( test, correct ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); int fail = ! (fabsf(err) <= ulps); if( fail && ftz ) @@ -1399,8 +1420,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { long double correct2 = func.f_ff( 0.0, s2[j] ); long double correct3 = func.f_ff( -0.0, s2[j] ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; @@ -1422,10 +1443,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) correct3 = func.f_ff( -0.0, 0.0 ); long double correct4 = func.f_ff( 0.0, -0.0 ); long double correct5 = func.f_ff( -0.0, -0.0 ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - float err4 = Ulp_Error_Double( test, correct4 ); - float err5 = Ulp_Error_Double( test, correct5 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) && (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps))); if( fabsf( err2 ) < fabsf(err ) ) @@ -1451,8 +1472,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { long double correct2 = func.f_ff( s[j], 0.0 ); long double correct3 = func.f_ff( s[j], -0.0 ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; diff --git a/test_conformance/math_brute_force/binaryOperator.c b/test_conformance/math_brute_force/binaryOperator.c index 718afd3d..09946cbe 100644 --- a/test_conformance/math_brute_force/binaryOperator.c +++ b/test_conformance/math_brute_force/binaryOperator.c @@ -207,6 +207,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps @@ -260,6 +261,16 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d) } test_info.step = test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -329,7 +340,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) @@ -501,63 +512,51 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data ) int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount; int indx = (totalSpecialValueCount - 1) / buffer_elements; - if( job_id <= (cl_uint)indx ) - { // test edge cases - float *fp = (float *)p; - float *fp2 = (float *)p2; + + if( job_id <= (cl_uint)indx ) { + // Insert special values uint32_t x, y; - x = (job_id * buffer_elements) % specialValuesFloatCount; - y = (job_id * buffer_elements) / specialValuesFloatCount; + x = (job_id * buffer_elements) % specialValuesFloatCount; + y = (job_id * buffer_elements) / specialValuesFloatCount; - for( ; j < buffer_elements; j++ ) - { - fp[j] = specialValuesFloat[x]; - fp2[j] = specialValuesFloat[y]; - if( ++x >= specialValuesFloatCount ) - { + for( ; j < buffer_elements; j++ ) { + p[j] = ((cl_uint *)specialValuesFloat)[x]; + p2[j] = ((cl_uint *)specialValuesFloat)[y]; + ++x; + if (x >= specialValuesFloatCount) { x = 0; y++; - if( y >= specialValuesFloatCount ) + if (y >= specialValuesFloatCount) break; } - if(gTestFastRelaxed && strcmp(name,"divide") == 0 ) - { - float fpj = *(float*)&fp[j]; - float fpj2 = *(float*)&fp2[j]; - if(fabs(fpj) > 0x5E800000 ) //[2^-62,2^62] - { - fp[j] = NAN; - } - if( fabs(fpj2) > 0x5E800000 ) //[2^-62,2^62] - { - fp2[j] = NAN; - } + if (gTestFastRelaxed && strcmp(name,"divide") == 0) { + cl_uint pj = p[j] & 0x7fffffff; + cl_uint p2j = p2[j] & 0x7fffffff; + // Replace values outside [2^-62, 2^62] with QNaN + if (pj < 0x20800000 || pj > 0x5e800000) + p[j] = 0x7fc00000; + if (p2j < 0x20800000 || p2j > 0x5e800000) + p2[j] = 0x7fc00000; + } } } - } - //Init any remaining values. + // Init any remaining values. for( ; j < buffer_elements; j++ ) { p[j] = genrand_int32(d); p2[j] = genrand_int32(d); - if(gTestFastRelaxed) - { - if( strcmp(name,"divide")==0){ - float pj = *(float*)&p[j]; - float pj2 = *(float*)&p2[j]; - if(fabs(pj) > 0x5E800000 ) //[2^-62,2^62] - { - p[j] = NAN; - } - if( fabs(pj2) > 0x5E800000 ) //[2^-62,2^62] - { - p2[j] = NAN; - } - } - } + if (gTestFastRelaxed && strcmp(name,"divide") == 0) { + cl_uint pj = p[j] & 0x7fffffff; + cl_uint p2j = p2[j] & 0x7fffffff; + // Replace values outside [2^-62, 2^62] with QNaN + if (pj < 0x20800000 || pj > 0x5e800000) + p[j] = 0x7fc00000; + if (p2j < 0x20800000 || p2j > 0x5e800000) + p2[j] = 0x7fc00000; + } } if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) )) @@ -950,6 +949,16 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d) } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; @@ -1020,7 +1029,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) @@ -1315,7 +1324,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { cl_double test = ((cl_double*) q)[j]; long double correct = func.f_ff( s[j], s2[j] ); - float err = Ulp_Error_Double( test, correct ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); int fail = ! (fabsf(err) <= ulps); if( fail && ftz ) @@ -1334,8 +1343,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { long double correct2 = func.f_ff( 0.0, s2[j] ); long double correct3 = func.f_ff( -0.0, s2[j] ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; @@ -1357,10 +1366,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) correct3 = func.f_ff( -0.0, 0.0 ); long double correct4 = func.f_ff( 0.0, -0.0 ); long double correct5 = func.f_ff( -0.0, -0.0 ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - float err4 = Ulp_Error_Double( test, correct4 ); - float err5 = Ulp_Error_Double( test, correct5 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) && (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps))); if( fabsf( err2 ) < fabsf(err ) ) @@ -1386,8 +1395,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { long double correct2 = func.f_ff( s[j], 0.0 ); long double correct3 = func.f_ff( s[j], -0.0 ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; diff --git a/test_conformance/math_brute_force/binary_i.c b/test_conformance/math_brute_force/binary_i.c index b72d117f..314d65dd 100644 --- a/test_conformance/math_brute_force/binary_i.c +++ b/test_conformance/math_brute_force/binary_i.c @@ -230,6 +230,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps @@ -262,6 +263,16 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -330,7 +341,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d) } // Run the kernels - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors @@ -758,6 +769,16 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = f->double_ulps; test_info.ftz = f->ftz || gForceFTZ; @@ -831,7 +852,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d) // Run the kernels if( !gSkipCorrectnessTesting ) - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors @@ -1128,7 +1149,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { cl_double test = ((cl_double*) q)[j]; long double correct = func.f_fi( s[j], s2[j] ); - float err = Ulp_Error_Double( test, correct ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); int fail = ! (fabsf(err) <= ulps); if( fail && ftz ) @@ -1146,8 +1167,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { long double correct2 = func.f_fi( 0.0, s2[j] ); long double correct3 = func.f_fi( -0.0, s2[j] ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; diff --git a/test_conformance/math_brute_force/binary_two_results_i.c b/test_conformance/math_brute_force/binary_two_results_i.c index ab06c450..b1e80ede 100644 --- a/test_conformance/math_brute_force/binary_two_results_i.c +++ b/test_conformance/math_brute_force/binary_two_results_i.c @@ -871,7 +871,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d) double test = ((double*) q)[j]; int correct2 = INT_MIN; long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 ); - float err = Ulp_Error_Double( test, correct ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); int64_t iErr; // in case of remquo, we only care about the sign and last seven bits of @@ -907,8 +907,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d) int correct3i, correct4i; long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i ); long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i ); - float err2 = Ulp_Error_Double( test, correct3 ); - float err3 = Ulp_Error_Double( test, correct4 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct4 ); int64_t iErr3 = (long long) q2[j] - (long long) correct3i; int64_t iErr4 = (long long) q2[j] - (long long) correct4i; fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0))); @@ -937,10 +937,10 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d) correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i ); long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i ); long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i ); - err2 = Ulp_Error_Double( test, correct3 ); - err3 = Ulp_Error_Double( test, correct4 ); - float err4 = Ulp_Error_Double( test, correct7 ); - float err5 = Ulp_Error_Double( test, correct8 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct3 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct7 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct8 ); iErr3 = (long long) q2[j] - (long long) correct3i; iErr4 = (long long) q2[j] - (long long) correct4i; int64_t iErr7 = (long long) q2[j] - (long long) correct7i; @@ -979,8 +979,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d) int correct3i, correct4i; long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i ); long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i ); - float err2 = Ulp_Error_Double( test, correct3 ); - float err3 = Ulp_Error_Double( test, correct4 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct4 ); int64_t iErr3 = (long long) q2[j] - (long long) correct3i; int64_t iErr4 = (long long) q2[j] - (long long) correct4i; fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0))); diff --git a/test_conformance/math_brute_force/macro_binary.c b/test_conformance/math_brute_force/macro_binary.c index e489c598..b65948ee 100644 --- a/test_conformance/math_brute_force/macro_binary.c +++ b/test_conformance/math_brute_force/macro_binary.c @@ -222,6 +222,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode @@ -249,6 +250,16 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -319,7 +330,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d) // Run the kernels if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); if( error ) goto exit; @@ -749,6 +760,16 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d) } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ftz = f->ftz || gForceFTZ; @@ -820,7 +841,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); if( error ) goto exit; diff --git a/test_conformance/math_brute_force/macro_unary.c b/test_conformance/math_brute_force/macro_unary.c index 74e1dece..95af882a 100644 --- a/test_conformance/math_brute_force/macro_unary.c +++ b/test_conformance/math_brute_force/macro_unary.c @@ -193,6 +193,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode @@ -220,6 +221,16 @@ int TestMacro_Int_Float(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); // cl_kernels aren't thread safe, so we make one for each vector size for every thread @@ -279,7 +290,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); if( error ) goto exit; @@ -602,6 +613,16 @@ int TestMacro_Int_Double(const Func *f, MTdata d) } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ftz = f->ftz || gForceFTZ; @@ -664,7 +685,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); if( error ) goto exit; diff --git a/test_conformance/math_brute_force/mad.c b/test_conformance/math_brute_force/mad.c index 844da097..652ab360 100644 --- a/test_conformance/math_brute_force/mad.c +++ b/test_conformance/math_brute_force/mad.c @@ -785,7 +785,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d) { double test = ((double*) q)[j]; long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] ); - float err = Ulp_Error_Double( test, correct ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); int fail = ! (fabsf(err) <= f->double_ulps); if( fail && ftz ) @@ -803,8 +803,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d) { // look at me, long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] ); long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; @@ -826,10 +826,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d) correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] ); long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] ); long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - float err4 = Ulp_Error_Double( test, correct4 ); - float err5 = Ulp_Error_Double( test, correct5 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) @@ -860,14 +860,14 @@ int TestFunc_mad_Double(const Func *f, MTdata d) long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f ); long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f ); long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - err4 = Ulp_Error_Double( test, correct4 ); - err5 = Ulp_Error_Double( test, correct5 ); - float err6 = Ulp_Error_Double( test, correct6 ); - float err7 = Ulp_Error_Double( test, correct7 ); - float err8 = Ulp_Error_Double( test, correct8 ); - float err9 = Ulp_Error_Double( test, correct9 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); + float err6 = Bruteforce_Ulp_Error_Double( test, correct6 ); + float err7 = Bruteforce_Ulp_Error_Double( test, correct7 ); + float err8 = Bruteforce_Ulp_Error_Double( test, correct8 ); + float err9 = Bruteforce_Ulp_Error_Double( test, correct9 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) && @@ -907,10 +907,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d) correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 ); long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 ); long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - float err4 = Ulp_Error_Double( test, correct4 ); - float err5 = Ulp_Error_Double( test, correct5 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) @@ -936,8 +936,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d) { long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] ); long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; @@ -959,10 +959,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d) correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 ); long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 ); long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - float err4 = Ulp_Error_Double( test, correct4 ); - float err5 = Ulp_Error_Double( test, correct5 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) @@ -988,8 +988,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d) { long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 ); long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; diff --git a/test_conformance/math_brute_force/main.c b/test_conformance/math_brute_force/main.c index 6e34071b..92600e9a 100644 --- a/test_conformance/math_brute_force/main.c +++ b/test_conformance/math_brute_force/main.c @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // +#include "../../test_common/harness/parseParameters.h" #include "Utility.h" #include @@ -39,6 +40,8 @@ #include #endif +#include "../../test_common/harness/testHarness.h" + #define kPageSize 4096 #define DOUBLE_REQUIRED_FEATURES ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM ) @@ -51,10 +54,11 @@ cl_context gContext = NULL; cl_command_queue gQueue = NULL; int gTestCount = 0; int gFailCount = 0; -int32_t gStartTestNumber = -1; -int32_t gEndTestNumber = -1; +static int32_t gStartTestNumber; +static int32_t gEndTestNumber; int gSkipCorrectnessTesting = 0; int gStopOnError = 0; +static bool gSkipRestOfTests; #if defined( __APPLE__ ) int gMeasureTimes = 1; #else @@ -78,8 +82,6 @@ int gDeviceILogb0 = 1; int gDeviceILogbNaN = 1; int gCheckTininessBeforeRounding = 1; int gIsInRTZMode = 0; -int gInfNanSupport = 1; -int gIsEmbedded = 0; uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT; uint32_t gMinVectorSizeIndex = 0; const char *method[] = { "Best", "Average" }; @@ -100,7 +102,7 @@ uint32_t gSimdSize = 1; uint32_t gDeviceFrequency = 0; cl_uint chosen_device_index = 0; cl_uint chosen_platform_index = 0; -cl_uint gRandomSeed = 0; +static MTdata gMTdata; cl_device_fp_config gFloatCapabilities = 0; cl_device_fp_config gDoubleCapabilities = 0; int gWimpyReductionFactor = 32; @@ -136,11 +138,786 @@ static int IsTininessDetectedBeforeRounding( void ); static int IsInRTZMode( void ); //expensive. Please check gIsInRTZMode global instead. static void TestFinishAtExit(void); + +int doTest( const char* name ) +{ + if( gSkipRestOfTests ) + { + vlog( "Skipping function because of an earlier error.\n" ); + return 1; + } + + int error = 0; + const Func* func_data = NULL; + + for( size_t i = 0; i < functionListCount; i++ ) + { + const Func* const temp_func = functionList + i; + if( strcmp( temp_func->name, name ) == 0 ) + { + if( i < gStartTestNumber || i > gEndTestNumber ) + { + vlog( "Skipping function #%d\n", i ); + return 0; + } + + func_data = temp_func; + break; + } + } + + if( func_data == NULL ) + { + vlog( "Function '%s' doesn't exist!\n", name ); + exit( EXIT_FAILURE ); + } + + if( func_data->func.p == NULL ) + { + vlog( "'%s' is missing implementation, skipping function.\n", func_data->name ); + return 0; + } + + // if correctly rounded divide & sqrt are supported by the implementation + // then test it; otherwise skip the test + if( strcmp( func_data->name, "sqrt_cr" ) == 0 || strcmp( func_data->name, "divide_cr" ) == 0 ) + { + if( ( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 ) + { + vlog( "Correctly rounded divide and sqrt are not supported, skipping function.\n" ); + return 0; + } + } + + { + extern int my_ilogb(double); + if( 0 == strcmp( "ilogb", func_data->name ) ) + { + InitILogbConstants(); + } + + if ( gTestFastRelaxed ) + { + if( func_data->relaxed ) + { + gTestCount++; + vlog( "%3d: ", gTestCount ); + if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata ) ) + { + gFailCount++; + error++; + if( gStopOnError ) + { + gSkipRestOfTests = true; + return error; + } + } + } + } + + if( gTestFloat ) + { + int testFastRelaxedTmp = gTestFastRelaxed; + gTestFastRelaxed = 0; + + gTestCount++; + vlog( "%3d: ", gTestCount ); + if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata ) ) + { + gFailCount++; + error++; + if( gStopOnError ) + { + gTestFastRelaxed = testFastRelaxedTmp; + gSkipRestOfTests = true; + return error; + } + } + gTestFastRelaxed = testFastRelaxedTmp; + } + + if( gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p ) + { + //Disable fast-relaxed-math for double precision floating-point + int testFastRelaxedTmp = gTestFastRelaxed; + gTestFastRelaxed = 0; + + gTestCount++; + vlog( "%3d: ", gTestCount ); + if( func_data->vtbl_ptr->DoubleTestFunc( func_data, gMTdata ) ) + { + gFailCount++; + error++; + if( gStopOnError ) + { + gTestFastRelaxed = testFastRelaxedTmp; + gSkipRestOfTests = true; + return error; + } + } + + //Re-enable testing fast-relaxed-math mode + gTestFastRelaxed = testFastRelaxedTmp; + } + +#if defined( __APPLE__ ) + { + if( gHasBasicDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p) + { + //Disable fast-relaxed-math for double precision floating-point + int testFastRelaxedTmp = gTestFastRelaxed; + gTestFastRelaxed = 0; + + int isBasicTest = 0; + for( j = 0; j < gNumBasicDoubleFuncs; j++ ) { + if( 0 == strcmp(gBasicDoubleFuncs[j], func_data->name ) ) { + isBasicTest = 1; + break; + } + } + if (isBasicTest) { + gTestCount++; + if( gTestFloat ) + vlog( " " ); + if( func_data->vtbl->DoubleTestFunc( func_data, gMTdata ) ) + { + gFailCount++; + error++; + if( gStopOnError ) + { + gTestFastRelaxed = testFastRelaxedTmp; + gSkipRestOfTests = true; + return error; + } + } + } + + //Re-enable testing fast-relaxed-math mode + gTestFastRelaxed = testFastRelaxedTmp; + } + } +#endif + } + + return error; +} + +int test_acos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "acos" ); +} +int test_acosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "acosh" ); +} +int test_acospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "acospi" ); +} +int test_asin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "asin" ); +} +int test_asinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "asinh" ); +} +int test_asinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "asinpi" ); +} +int test_atan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "atan" ); +} +int test_atanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "atanh" ); +} +int test_atanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "atanpi" ); +} +int test_atan2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "atan2" ); +} +int test_atan2pi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "atan2pi" ); +} +int test_cbrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "cbrt" ); +} +int test_ceil( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "ceil" ); +} +int test_copysign( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "copysign" ); +} +int test_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "cos" ); +} +int test_cosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "cosh" ); +} +int test_cospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "cospi" ); +} +int test_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "exp" ); +} +int test_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "exp2" ); +} +int test_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "exp10" ); +} +int test_expm1( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "expm1" ); +} +int test_fabs( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "fabs" ); +} +int test_fdim( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "fdim" ); +} +int test_floor( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "floor" ); +} +int test_fma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "fma" ); +} +int test_fmax( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "fmax" ); +} +int test_fmin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "fmin" ); +} +int test_fmod( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "fmod" ); +} +int test_fract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "fract" ); +} +int test_frexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "frexp" ); +} +int test_hypot( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "hypot" ); +} +int test_ilogb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "ilogb" ); +} +int test_isequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isequal" ); +} +int test_isfinite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isfinite" ); +} +int test_isgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isgreater" ); +} +int test_isgreaterequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isgreaterequal" ); +} +int test_isinf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isinf" ); +} +int test_isless( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isless" ); +} +int test_islessequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "islessequal" ); +} +int test_islessgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "islessgreater" ); +} +int test_isnan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isnan" ); +} +int test_isnormal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isnormal" ); +} +int test_isnotequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isnotequal" ); +} +int test_isordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isordered" ); +} +int test_isunordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "isunordered" ); +} +int test_ldexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "ldexp" ); +} +int test_lgamma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "lgamma" ); +} +int test_lgamma_r( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "lgamma_r" ); +} +int test_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "log" ); +} +int test_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "log2" ); +} +int test_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "log10" ); +} +int test_log1p( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "log1p" ); +} +int test_logb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "logb" ); +} +int test_mad( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "mad" ); +} +int test_maxmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "maxmag" ); +} +int test_minmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "minmag" ); +} +int test_modf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "modf" ); +} +int test_nan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "nan" ); +} +int test_nextafter( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "nextafter" ); +} +int test_pow( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "pow" ); +} +int test_pown( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "pown" ); +} +int test_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "powr" ); +} +int test_remainder( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "remainder" ); +} +int test_remquo( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "remquo" ); +} +int test_rint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "rint" ); +} +int test_rootn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "rootn" ); +} +int test_round( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "round" ); +} +int test_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "rsqrt" ); +} +int test_signbit( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "signbit" ); +} +int test_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "sin" ); +} +int test_sincos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "sincos" ); +} +int test_sinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "sinh" ); +} +int test_sinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "sinpi" ); +} +int test_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "sqrt" ); +} +int test_sqrt_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "sqrt_cr" ); +} +int test_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "tan" ); +} +int test_tanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "tanh" ); +} +int test_tanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "tanpi" ); +} +int test_trunc( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "trunc" ); +} +int test_half_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_cos" ); +} +int test_half_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_divide" ); +} +int test_half_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_exp" ); +} +int test_half_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_exp2" ); +} +int test_half_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_exp10" ); +} +int test_half_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_log" ); +} +int test_half_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_log2" ); +} +int test_half_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_log10" ); +} +int test_half_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_powr" ); +} +int test_half_recip( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_recip" ); +} +int test_half_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_rsqrt" ); +} +int test_half_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_sin" ); +} +int test_half_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_sqrt" ); +} +int test_half_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "half_tan" ); +} +int test_add( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "add" ); +} +int test_subtract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "subtract" ); +} +int test_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "divide" ); +} +int test_divide_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "divide_cr" ); +} +int test_multiply( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "multiply" ); +} +int test_assignment( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "assignment" ); +} +int test_not( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) +{ + return doTest( "not" ); +} + +basefn basefn_list[] = { + test_acos, + test_acosh, + test_acospi, + test_asin, + test_asinh, + test_asinpi, + test_atan, + test_atanh, + test_atanpi, + test_atan2, + test_atan2pi, + test_cbrt, + test_ceil, + test_copysign, + test_cos, + test_cosh, + test_cospi, + test_exp, + test_exp2, + test_exp10, + test_expm1, + test_fabs, + test_fdim, + test_floor, + test_fma, + test_fmax, + test_fmin, + test_fmod, + test_fract, + test_frexp, + test_hypot, + test_ilogb, + test_isequal, + test_isfinite, + test_isgreater, + test_isgreaterequal, + test_isinf, + test_isless, + test_islessequal, + test_islessgreater, + test_isnan, + test_isnormal, + test_isnotequal, + test_isordered, + test_isunordered, + test_ldexp, + test_lgamma, + test_lgamma_r, + test_log, + test_log2, + test_log10, + test_log1p, + test_logb, + test_mad, + test_maxmag, + test_minmag, + test_modf, + test_nan, + test_nextafter, + test_pow, + test_pown, + test_powr, + test_remainder, + test_remquo, + test_rint, + test_rootn, + test_round, + test_rsqrt, + test_signbit, + test_sin, + test_sincos, + test_sinh, + test_sinpi, + test_sqrt, + test_sqrt_cr, + test_tan, + test_tanh, + test_tanpi, + test_trunc, + test_half_cos, + test_half_divide, + test_half_exp, + test_half_exp2, + test_half_exp10, + test_half_log, + test_half_log2, + test_half_log10, + test_half_powr, + test_half_recip, + test_half_rsqrt, + test_half_sin, + test_half_sqrt, + test_half_tan, + test_add, + test_subtract, + test_divide, + test_divide_cr, + test_multiply, + test_assignment, + test_not, +}; + +const char *basefn_names[] = { + "acos", + "acosh", + "acospi", + "asin", + "asinh", + "asinpi", + "atan", + "atanh", + "atanpi", + "atan2", + "atan2pi", + "cbrt", + "ceil", + "copysign", + "cos", + "cosh", + "cospi", + "exp", + "exp2", + "exp10", + "expm1", + "fabs", + "fdim", + "floor", + "fma", + "fmax", + "fmin", + "fmod", + "fract", + "frexp", + "hypot", + "ilogb", + "isequal", + "isfinite", + "isgreater", + "isgreaterequal", + "isinf", + "isless", + "islessequal", + "islessgreater", + "isnan", + "isnormal", + "isnotequal", + "isordered", + "isunordered", + "ldexp", + "lgamma", + "lgamma_r", + "log", + "log2", + "log10", + "log1p", + "logb", + "mad", + "maxmag", + "minmag", + "modf", + "nan", + "nextafter", + "pow", + "pown", + "powr", + "remainder", + "remquo", + "rint", + "rootn", + "round", + "rsqrt", + "signbit", + "sin", + "sincos", + "sinh", + "sinpi", + "sqrt", + "sqrt_cr", + "tan", + "tanh", + "tanpi", + "trunc", + "half_cos", + "half_divide", + "half_exp", + "half_exp2", + "half_exp10", + "half_log", + "half_log2", + "half_log10", + "half_powr", + "half_recip", + "half_rsqrt", + "half_sin", + "half_sqrt", + "half_tan", + "add", + "subtract", + "divide", + "divide_cr", + "multiply", + "assignment", + "not", +}; + +ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); + +const int num_fns = sizeof(basefn_names) / sizeof(char *); + #pragma mark - int main (int argc, const char * argv[]) { - unsigned int i, j, error = 0; + int error; test_start(); atexit(TestFinishAtExit); @@ -178,7 +955,7 @@ int main (int argc, const char * argv[]) vlog( " \t "); if( gWimpyMode ) vlog( " " ); - for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) + for( int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ ) vlog( "\t float%s", sizeNames[i] ); } else @@ -192,147 +969,21 @@ int main (int argc, const char * argv[]) vlog( "\n-----------------------------------------------------------------------------------------------------------\n" ); - uint32_t start = 0; - if( gStartTestNumber > (int) start ) + gMTdata = init_genrand( gRandomSeed ); + if( gEndTestNumber == 0 ) { - vlog( "Skipping to test %d...\n", gStartTestNumber ); - start = gStartTestNumber; + gEndTestNumber = functionListCount; } - uint32_t stop = (uint32_t) functionListCount; - MTdata d = init_genrand( gRandomSeed ); - if( gStartTestNumber <= gEndTestNumber && -1 != gEndTestNumber && (int) functionListCount > gEndTestNumber + 1) - stop = gEndTestNumber + 1; - FPU_mode_type oldMode; DisableFTZ( &oldMode ); - for( i = start; i < stop; i++ ) - { - const Func *f = functionList + i; - - // If the user passed a list of functions to run, make sure we are in that list - if( gTestNameCount ) - { - for( j = 0; j < gTestNameCount; j++ ) - if( 0 == strcmp(gTestNames[j], f->name ) ) - break; - - // If this function doesn't match any on the list skip to the next function - if( j == gTestNameCount ) - continue; - } - - // if correctly rounded divide & sqrt are supported by the implementation - // then test it; otherwise skip the test - if (!strcmp(f->name, "sqrt_cr") || !strcmp(f->name, "divide_cr")) - { - if(( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 ) - continue; - - } - - - { - extern int my_ilogb(double); - if( 0 == strcmp( "ilogb", f->name) ) - InitILogbConstants(); - - if ( gTestFastRelaxed ) - { - if( f->relaxed ) - { - gTestCount++; - vlog( "%3d: ", gTestCount ); - if( f->vtbl->TestFunc( f, d ) ) - { - gFailCount++; - error++; - if( gStopOnError ) - break; - } - } - } - - if( gTestFloat ) - { - int testFastRelaxedTmp = gTestFastRelaxed; - gTestFastRelaxed = 0; - gTestCount++; - vlog( "%3d: ", gTestCount ); - if( f->vtbl->TestFunc( f, d ) ) - { - gFailCount++; - error++; - if( gStopOnError ) - { - gTestFastRelaxed = testFastRelaxedTmp; - break; - } - } - gTestFastRelaxed = testFastRelaxedTmp; - } - - if( gHasDouble && NULL != f->vtbl->DoubleTestFunc && NULL != f->dfunc.p ) - { - //Disable fast-relaxed-math for double precision floating-point - int testFastRelaxedTmp = gTestFastRelaxed; - gTestFastRelaxed = 0; - - gTestCount++; - vlog( "%3d: ", gTestCount ); - if( f->vtbl->DoubleTestFunc( f, d ) ) - { - gFailCount++; - error++; - if( gStopOnError ) - break; - } - - //Re-enable testing fast-relaxed-math mode - gTestFastRelaxed = testFastRelaxedTmp; - } - -#if defined( __APPLE__ ) - { - if( gHasBasicDouble && NULL != f->vtbl->DoubleTestFunc && NULL != f->dfunc.p) - { - //Disable fast-relaxed-math for double precision floating-point - int testFastRelaxedTmp = gTestFastRelaxed; - gTestFastRelaxed = 0; - - int isBasicTest = 0; - for( j = 0; j < gNumBasicDoubleFuncs; j++ ) { - if( 0 == strcmp(gBasicDoubleFuncs[j], f->name ) ) { - isBasicTest = 1; - break; - } - } - if (isBasicTest) { - gTestCount++; - if( gTestFloat ) - vlog( " " ); - if( f->vtbl->DoubleTestFunc( f, d ) ) - { - gFailCount++; - error++; - if( gStopOnError ) - break; - } - } - - //Re-enable testing fast-relaxed-math mode - gTestFastRelaxed = testFastRelaxedTmp; - } - } -#endif - } - } + int ret = parseAndCallCommandLineTests( gTestNameCount, gTestNames, NULL, num_fns, basefn_list, basefn_names, true, 0, 0 ); RestoreFPState( &oldMode ); - free_mtdata(d); d = NULL; - vlog( "\ndone.\n" ); + free_mtdata(gMTdata); + free(gTestNames); int error_code = clFinish(gQueue); if (error_code) @@ -341,16 +992,16 @@ int main (int argc, const char * argv[]) if (gFailCount == 0) { if (gTestCount > 1) - vlog("PASSED %d of %d tests.\n", gTestCount, gTestCount); + vlog("PASSED %d of %d sub-tests.\n", gTestCount, gTestCount); else - vlog("PASSED test.\n"); + vlog("PASSED sub-test.\n"); } else if (gFailCount > 0) { if (gTestCount > 1) - vlog_error("FAILED %d of %d tests.\n", gFailCount, gTestCount); + vlog_error("FAILED %d of %d sub-tests.\n", gFailCount, gTestCount); else - vlog_error("FAILED test.\n"); + vlog_error("FAILED sub-test.\n"); } ReleaseCL(); @@ -363,23 +1014,22 @@ int main (int argc, const char * argv[]) vlog( "time: %f s\n", time ); #endif - - if (gFailCount > 0) - return -1; - return error; + return ret; } static int ParseArgs( int argc, const char **argv ) { int i; gTestNames = (const char**) calloc( argc - 1, sizeof( char*) ); - gTestNameCount = 0; + if( NULL == gTestNames ) + { + vlog( "Failed to allocate memory for gTestNames array.\n" ); + return 1; + } + gTestNames[0] = argv[0]; + gTestNameCount = 1; int singleThreaded = 0; - // Parse arg list - if( NULL == gTestNames && argc > 1 ) - return -1; - { // Extract the app name strncpy( appName, argv[0], MAXPATHLEN ); @@ -490,24 +1140,7 @@ static int ParseArgs( int argc, const char **argv ) break; case '[': - // wimpy reduction factor can be set with the option -[2^n] - // Default factor is 32, and n practically can be from 1 to 10 - { - const char *arg_temp = strchr(&arg[1], ']'); - if( arg_temp != 0) - { - int new_factor = atoi(&arg[1]); - arg=arg_temp; // Advance until ']' - if(new_factor && !(new_factor & (new_factor - 1))) - { - vlog( " WimpyReduction factor changed from %d to %d \n",gWimpyReductionFactor, new_factor); - gWimpyReductionFactor = new_factor; - }else - { - vlog( " Error in WimpyReduction factor %d, must be power of 2 \n",gWimpyReductionFactor); - } - } - } + parseWimpyReductionFactor(arg, gWimpyReductionFactor); break; case 'z': @@ -573,7 +1206,7 @@ static int ParseArgs( int argc, const char **argv ) long number = strtol( arg, &t, 0 ); if( t != arg ) { - if( -1 == gStartTestNumber ) + if( 0 == gStartTestNumber ) gStartTestNumber = (int32_t) number; else gEndTestNumber = gStartTestNumber + (int32_t) number; @@ -596,19 +1229,18 @@ static int ParseArgs( int argc, const char **argv ) if (k >= functionListCount) { //It may be a device type or rundomize parameter - if( 0 == strcmp(arg, "CL_DEVICE_TYPE_CPU")) { + if( 0 == strcmp(arg, "CL_DEVICE_TYPE_CPU")) { gDeviceType = CL_DEVICE_TYPE_CPU; - } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_GPU")) { + } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_GPU")) { gDeviceType = CL_DEVICE_TYPE_GPU; - } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_ACCELERATOR")) { + } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_ACCELERATOR")) { gDeviceType = CL_DEVICE_TYPE_ACCELERATOR; - } else if( 0 == strcmp(arg, "randomize")) { + } else if( 0 == strcmp(arg, "randomize")) { gRandomSeed = (cl_uint) time( NULL ); vlog( "\nRandom seed: %u.\n", gRandomSeed ); - } else { - vlog_error("\nInvalid function name: %s\n", arg); - test_finish(); - exit(-1); + } else { + gTestNames[gTestNameCount] = arg; + gTestNameCount++; } } } @@ -690,6 +1322,8 @@ static void PrintArch( void ) vlog( "\tARCH:\tx86_64\n" ); #elif defined( __arm__ ) vlog( "\tARCH:\tarm\n" ); + #elif defined( __aarch64__ ) + vlog( "\tARCH:\taarch64\n"); #else vlog( "\tARCH:\tunknown\n" ); #endif @@ -770,7 +1404,7 @@ static void PrintUsage( void ) vlog( "\n" ); } -static void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data) +static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data) { vlog( "%s (%p, %zd, %p)\n", errinfo, private_info, cb, user_data ); } @@ -979,7 +1613,7 @@ static int InitCL( void ) else isEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE"); // we will verify this with a kernel below - gContext = clCreateContext( NULL, 1, &gDevice, notify_callback, NULL, &error ); + gContext = clCreateContext( NULL, 1, &gDevice, bruteforce_notify_callback, NULL, &error ); if( NULL == gContext || error ) { vlog_error( "clCreateContext failed. (%d) \n", error ); @@ -1137,7 +1771,7 @@ static int InitCL( void ) vlog( "\t\t All double results that do not match the reference result have their reported\n" ); vlog( "\t\t error inflated by 0.5 ulps to account for the fact that this system\n" ); vlog( "\t\t can not accurately represent the right result to an accuracy closer\n" ); - vlog( "\t\t than half an ulp. See comments in Ulp_Error_Double() for more details.\n\n" ); + vlog( "\t\t than half an ulp. See comments in Bruteforce_Ulp_Error_Double() for more details.\n\n" ); } #if defined( __APPLE__ ) vlog( "\tTesting basic double precision? %s\n", no_yes[0 != gHasBasicDouble] ); @@ -1512,7 +2146,7 @@ const char *sizeNames[ VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" }; const int sizeValues[ VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; // TODO: There is another version of Ulp_Error_Double defined in test_common/harness/errorHelpers.c -float Ulp_Error_Double( double test, long double reference ) +float Bruteforce_Ulp_Error_Double( double test, long double reference ) { //Check for Non-power-of-two and NaN @@ -1609,74 +2243,6 @@ float Ulp_Error_Double( double test, long double reference ) return result; } - -float Ulp_Error( float test, double reference ) -{ - union{ double d; uint64_t u; }u; u.d = reference; - double testVal = test; - - // Note: This function presumes that someone has already tested whether the result is correctly, - // rounded before calling this function. That test: - // - // if( (float) reference == test ) - // return 0.0f; - // - // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here. - // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded - // results. - - - if( isinf( reference ) ) - { - if( testVal == reference ) - return 0.0f; - - return (float) (testVal - reference ); - } - - if( isinf( testVal) ) - { // infinite test value, but finite (but possibly overflowing in float) reference. - // - // The function probably overflowed prematurely here. Formally, the spec says this is - // an infinite ulp error and should not be tolerated. Unfortunately, this would mean - // that the internal precision of some half_pow implementations would have to be 29+ bits - // at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 ) - // is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which - // after rounding to single is 4*32 = 128, which will ultimately result in premature - // overflow, even though a good faith representation would be correct to within 2**-29 - // interally. - - // In the interest of not requiring the implementation go to extraordinary lengths to - // deliver a half precision function, we allow premature overflow within the limit - // of the allowed ulp error. Towards, that end, we "pretend" the test value is actually - // 2**128, the next value that would appear in the number line if float had sufficient range. - testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal ); - - // Note that the same hack may not work in long double, which is not guaranteed to have - // more range than double. It is not clear that premature overflow should be tolerated for - // double. - } - - if( u.u & 0x000fffffffffffffULL ) - { // Non-power of two and NaN - if( isnan( reference ) && isnan( test ) ) - return 0.0f; // if we are expecting a NaN, any NaN is fine - - // The unbiased exponent of the ulp unit place - int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 ); - - // Scale the exponent of the error - return (float) scalbn( testVal - reference, ulp_exp ); - } - - // reference is a normal power of two or a zero - // The unbiased exponent of the ulp unit place - int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 ); - - // Scale the exponent of the error - return (float) scalbn( testVal - reference, ulp_exp ); -} - float Abs_Error( float test, double reference ) { if( isnan(test) && isnan(reference) ) @@ -1786,25 +2352,6 @@ cl_uint RoundUpToNextPowerOfTwo( cl_uint x ) return x+x; } -#if !defined( __APPLE__ ) -void memset_pattern4(void *dest, const void *src_pattern, size_t bytes ) -{ - uint32_t pat = ((uint32_t*) src_pattern)[0]; - size_t count = bytes / 4; - size_t i; - uint32_t *d = (uint32_t*)dest; - - for( i = 0; i < count; i++ ) - d[i] = pat; - - d += i; - - bytes &= 3; - if( bytes ) - memcpy( d, src_pattern, bytes ); -} -#endif - void TestFinishAtExit(void) { test_finish(); } diff --git a/test_conformance/math_brute_force/reference_math.c b/test_conformance/math_brute_force/reference_math.c index d90f4508..6e8f79aa 100644 --- a/test_conformance/math_brute_force/reference_math.c +++ b/test_conformance/math_brute_force/reference_math.c @@ -1790,7 +1790,7 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000 // *signgamp = 1; ix = hx&0x7fffffff; if(ix>=0x7ff00000) return x*x; - if((ix|lx)==0) return one/zero; + if((ix|lx)==0) return INFINITY; if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */ if(hx<0) { // *signgamp = -1; @@ -1799,9 +1799,9 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000 } if(hx<0) { if(ix>=0x43300000) /* |x|>=2**52, must be -integer */ - return one/zero; + return INFINITY; t = reference_sinpi(x); - if(t==zero) return one/zero; /* -integer */ + if(t==zero) return INFINITY; /* -integer */ nadj = reference_log(pi/reference_fabs(t*x)); // if(tdfunc.f_fff( s[j], s2[j], s3[j] ); - float err = Ulp_Error_Double( test, correct ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); int fail = ! (fabsf(err) <= f->double_ulps); if( fail && ftz ) @@ -1028,8 +1028,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) { // look at me, long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] ); long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; @@ -1051,10 +1051,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] ); long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] ); long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - float err4 = Ulp_Error_Double( test, correct4 ); - float err5 = Ulp_Error_Double( test, correct5 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) @@ -1085,14 +1085,14 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f ); long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f ); long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - err4 = Ulp_Error_Double( test, correct4 ); - err5 = Ulp_Error_Double( test, correct5 ); - float err6 = Ulp_Error_Double( test, correct6 ); - float err7 = Ulp_Error_Double( test, correct7 ); - float err8 = Ulp_Error_Double( test, correct8 ); - float err9 = Ulp_Error_Double( test, correct9 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); + float err6 = Bruteforce_Ulp_Error_Double( test, correct6 ); + float err7 = Bruteforce_Ulp_Error_Double( test, correct7 ); + float err8 = Bruteforce_Ulp_Error_Double( test, correct8 ); + float err9 = Bruteforce_Ulp_Error_Double( test, correct9 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) && @@ -1132,10 +1132,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 ); long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 ); long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - float err4 = Ulp_Error_Double( test, correct4 ); - float err5 = Ulp_Error_Double( test, correct5 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) @@ -1161,8 +1161,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) { long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] ); long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; @@ -1184,10 +1184,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 ); long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 ); long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 ); - err2 = Ulp_Error_Double( test, correct2 ); - err3 = Ulp_Error_Double( test, correct3 ); - float err4 = Ulp_Error_Double( test, correct4 ); - float err5 = Ulp_Error_Double( test, correct5 ); + err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err4 = Bruteforce_Ulp_Error_Double( test, correct4 ); + float err5 = Bruteforce_Ulp_Error_Double( test, correct5 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) @@ -1213,8 +1213,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d) { long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 ); long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; diff --git a/test_conformance/math_brute_force/unary.c b/test_conformance/math_brute_force/unary.c index d8b3dbae..fc802327 100644 --- a/test_conformance/math_brute_force/unary.c +++ b/test_conformance/math_brute_force/unary.c @@ -200,6 +200,7 @@ typedef struct TestInfo cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] ThreadInfo *tinfo; // An array of thread specific information for each worker thread cl_uint threadCount; // Number of worker threads + cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values float ulps; // max_allowed ulps @@ -234,6 +235,16 @@ int TestFunc_Float_Float(const Func *f, MTdata d) test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; } test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + test_info.f = f; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); @@ -309,7 +320,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d) if( !gSkipCorrectnessTesting || skipTestingRelaxed) { - error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) @@ -892,7 +903,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { cl_double test = ((cl_double*) q)[j]; long double correct = func.f_f( s[j] ); - float err = Ulp_Error_Double( test, correct ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); int fail = ! (fabsf(err) <= ulps); if( fail ) @@ -912,8 +923,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data ) { long double correct2 = func.f_f( 0.0L ); long double correct3 = func.f_f( -0.0L ); - float err2 = Ulp_Error_Double( test, correct2 ); - float err3 = Ulp_Error_Double( test, correct3 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct2 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct3 ); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); if( fabsf( err2 ) < fabsf(err ) ) err = err2; @@ -997,7 +1008,16 @@ int TestFunc_Double_Double(const Func *f, MTdata d) test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; } - test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + //there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } test_info.f = f; test_info.ulps = f->double_ulps; @@ -1062,7 +1082,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d) if( !gSkipCorrectnessTesting ) { - error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); + error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info ); // Accumulate the arithmetic errors for( i = 0; i < test_info.threadCount; i++ ) diff --git a/test_conformance/math_brute_force/unary_two_results.c b/test_conformance/math_brute_force/unary_two_results.c index a3fb307d..26756edf 100644 --- a/test_conformance/math_brute_force/unary_two_results.c +++ b/test_conformance/math_brute_force/unary_two_results.c @@ -800,8 +800,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d) double test2 = ((double*) q2)[j]; long double correct2; long double correct = f->dfunc.f_fpf( s[j], &correct2 ); - float err = Ulp_Error_Double( test, correct ); - float err2 = Ulp_Error_Double( test2, correct2 ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); + float err2 = Bruteforce_Ulp_Error_Double( test2, correct2 ); int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps); if( ftz ) { @@ -837,10 +837,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d) long double correct2p, correct2n; long double correctp = f->dfunc.f_fpf( 0.0, &correct2p ); long double correctn = f->dfunc.f_fpf( -0.0, &correct2n ); - float errp = Ulp_Error_Double( test, correctp ); - float err2p = Ulp_Error_Double( test, correct2p ); - float errn = Ulp_Error_Double( test, correctn ); - float err2n = Ulp_Error_Double( test, correct2n ); + float errp = Bruteforce_Ulp_Error_Double( test, correctp ); + float err2p = Bruteforce_Ulp_Error_Double( test, correct2p ); + float errn = Bruteforce_Ulp_Error_Double( test, correctn ); + float err2n = Bruteforce_Ulp_Error_Double( test, correct2n ); fail = fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps)) && ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) ); if( fabsf( errp ) < fabsf(err ) ) diff --git a/test_conformance/math_brute_force/unary_two_results_i.c b/test_conformance/math_brute_force/unary_two_results_i.c index c4eea8f1..cdd24f8e 100644 --- a/test_conformance/math_brute_force/unary_two_results_i.c +++ b/test_conformance/math_brute_force/unary_two_results_i.c @@ -633,7 +633,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d) double test = ((double*) q)[j]; int correct2 = INT_MIN; long double correct = f->dfunc.f_fpI( s[j], &correct2 ); - float err = Ulp_Error_Double( test, correct ); + float err = Bruteforce_Ulp_Error_Double( test, correct ); cl_long iErr = (long long) q2[j] - (long long) correct2; int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError ); if( ftz ) @@ -652,8 +652,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d) int correct5, correct6; long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 ); long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 ); - float err2 = Ulp_Error_Double( test, correct3 ); - float err3 = Ulp_Error_Double( test, correct4 ); + float err2 = Bruteforce_Ulp_Error_Double( test, correct3 ); + float err3 = Bruteforce_Ulp_Error_Double( test, correct4 ); cl_long iErr2 = (long long) q2[j] - (long long) correct5; cl_long iErr3 = (long long) q2[j] - (long long) correct6; diff --git a/test_conformance/math_brute_force/unary_u.c b/test_conformance/math_brute_force/unary_u.c index 131d1e7e..336fdd24 100644 --- a/test_conformance/math_brute_force/unary_u.c +++ b/test_conformance/math_brute_force/unary_u.c @@ -567,7 +567,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d) { double test = ((double*) q)[j]; long double correct = f->dfunc.f_u( s[j] ); - float err = Ulp_Error_Double(test, correct); + float err = Bruteforce_Ulp_Error_Double(test, correct); int fail = ! (fabsf(err) <= f->double_ulps); // half_sin/cos/tan are only valid between +-2**16, Inf, NaN diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp index d9366293..16ddad44 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp @@ -613,6 +613,28 @@ void TestNonUniformWorkGroup::showTestInfo () { } } +size_t TestNonUniformWorkGroup::adjustLocalArraySize (size_t localArraySize) { + // In case if localArraySize is too big, sometimes we can not run kernel because of lack + // of resources due to kernel itself requires some local memory to run + int err; + + cl_ulong kernelLocalMemSize = 0; + err = clGetKernelWorkGroupInfo(_testKernel, _device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalMemSize), &kernelLocalMemSize, NULL); + test_error(err, "clGetKernelWorkGroupInfo failed"); + + cl_ulong deviceLocalMemSize = 0; + err = clGetDeviceInfo(_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMemSize), &deviceLocalMemSize, NULL); + test_error(err, "clGetDeviceInfo failed"); + + if (kernelLocalMemSize + localArraySize > deviceLocalMemSize) { + size_t adjustedLocalArraySize = deviceLocalMemSize - kernelLocalMemSize; + log_info("localArraySize was adjusted from %lu to %lu\n", localArraySize, adjustedLocalArraySize); + localArraySize = adjustedLocalArraySize; + } + + return localArraySize; +} + int TestNonUniformWorkGroup::runKernel () { int err; @@ -630,7 +652,8 @@ int TestNonUniformWorkGroup::runKernel () { test_error(err, "clSetKernelArg failed"); //creating local buffer - err = clSetKernelArg(_testKernel, 1, localArraySize*sizeof(unsigned int), NULL); + localArraySize = adjustLocalArraySize(localArraySize*sizeof(unsigned int)); + err = clSetKernelArg(_testKernel, 1, localArraySize, NULL); test_error(err, "clSetKernelArg failed"); clMemWrapper testGlobalArray = clCreateBuffer(_context, CL_MEM_READ_WRITE, _numOfGlobalWorkItems*sizeof(cl_uint), NULL, &err); diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h index e7f261b5..a077296d 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h @@ -116,6 +116,7 @@ private: void verifyData (DataContainerAttrib * reference, DataContainerAttrib * results, short regionNumber); void calculateExpectedValues (); void showTestInfo (); + size_t adjustLocalArraySize(size_t localArraySize); }; // Class responsible for running subtest scenarios in test function diff --git a/test_conformance/printf/test_printf.c b/test_conformance/printf/test_printf.c index a5c5d585..cd5f38cd 100644 --- a/test_conformance/printf/test_printf.c +++ b/test_conformance/printf/test_printf.c @@ -17,6 +17,7 @@ #include #include +#include #if ! defined( _WIN32) #if ! defined( __ANDROID__ ) @@ -356,9 +357,6 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont //----------------------------------------- static bool isLongSupported(cl_device_id device_id) { - //profile type && device extention for long support checking - char *profileType = NULL,*devExt = NULL; - size_t tempSize = 0; cl_int status; bool extSupport = true; @@ -377,7 +375,7 @@ static bool isLongSupported(cl_device_id device_id) return false; } - profileType = new char[tempSize]; + std::unique_ptr profileType(new char[tempSize]); if(profileType == NULL) { log_error("Failed to allocate memory(profileType)"); @@ -388,11 +386,11 @@ static bool isLongSupported(cl_device_id device_id) device_id, CL_DEVICE_PROFILE, sizeof(char) * tempSize, - profileType, + profileType.get(), NULL); - if(!strcmp("EMBEDDED_PROFILE",profileType)) + if(!strcmp("EMBEDDED_PROFILE",profileType.get())) { // Device extention status = clGetDeviceInfo( @@ -408,7 +406,7 @@ static bool isLongSupported(cl_device_id device_id) return false; } - devExt = new char[tempSize]; + std::unique_ptr devExt(new char[tempSize]); if(devExt == NULL) { log_error("Failed to allocate memory(devExt)"); @@ -419,13 +417,10 @@ static bool isLongSupported(cl_device_id device_id) device_id, CL_DEVICE_EXTENSIONS, sizeof(char) * tempSize, - devExt, + devExt.get(), NULL); - extSupport = (strstr(devExt,"cles_khr_int64") != NULL); - - delete devExt; - delete profileType; + extSupport = (strstr(devExt.get(),"cles_khr_int64") != NULL); } return extSupport; } @@ -501,7 +496,8 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int int err; cl_program program; cl_kernel kernel; - cl_mem d_out; + cl_mem d_out = NULL; + cl_mem d_a = NULL; char _analysisBuffer[ANALYSIS_BUFFER_SIZE]; cl_uint out32 = 0; cl_ulong out64 = 0; @@ -523,7 +519,7 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int if(isKernelArgument(allTestCase[testId],testNum)) { int a = 2; - cl_mem d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, + d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(int), &a, &err); if(err!= CL_SUCCESS || d_a == NULL) { log_error("clCreateBuffer failed\n"); @@ -613,6 +609,10 @@ exit: log_error("clReleaseKernel failed\n"); if(clReleaseProgram(program) != CL_SUCCESS) log_error("clReleaseProgram failed\n"); + if(d_out) + clReleaseMemObject(d_out); + if(d_a) + clReleaseMemObject(d_a); ++s_test_cnt; @@ -639,6 +639,8 @@ static void printArch( void ) log_info( "ARCH:\tx86_64\n" ); #elif defined( __arm__ ) log_info( "ARCH:\tarm\n" ); +#elif defined( __aarch64__ ) + log_info( "ARCH:\taarch64\n" ); #else #error unknown arch #endif @@ -757,7 +759,6 @@ int test_float_15(cl_device_id deviceID, cl_context context, cl_command_queue qu { return doTest(gQueue, gContext, TYPE_FLOAT, 15, gDevice); } -#if ! defined( __ANDROID__ ) int test_float_16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return doTest(gQueue, gContext, TYPE_FLOAT, 16, gDevice); @@ -766,7 +767,6 @@ int test_float_17(cl_device_id deviceID, cl_context context, cl_command_queue qu { return doTest(gQueue, gContext, TYPE_FLOAT, 17, gDevice); } -#endif int test_float_18(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return doTest(gQueue, gContext, TYPE_FLOAT, 18, gDevice); @@ -857,10 +857,6 @@ int test_string_2(cl_device_id deviceID, cl_context context, cl_command_queue qu { return doTest(gQueue, gContext, TYPE_STRING, 2, gDevice); } -int test_string_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) -{ - return doTest(gQueue, gContext, TYPE_STRING, 3, gDevice); -} int test_vector_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) @@ -933,10 +929,8 @@ basefn basefn_list[] = { test_float_13, test_float_14, test_float_15, -#if ! defined( __ANDROID__ ) test_float_16, test_float_17, -#endif test_float_18, test_float_19, test_float_20, @@ -962,7 +956,6 @@ basefn basefn_list[] = { test_string_0, test_string_1, test_string_2, - test_string_3, test_vector_0, test_vector_1, @@ -1004,10 +997,8 @@ const char *basefn_names[] = { "float_13", "float_14", "float_15", -#if ! defined( __ANDROID__ ) "float_16", "float_17", -#endif "float_18", "float_19", "float_20", @@ -1033,7 +1024,6 @@ const char *basefn_names[] = { "string_0", "string_1", "string_2", - "string_3", "vector_0", "vector_1", diff --git a/test_conformance/printf/util_printf.c b/test_conformance/printf/util_printf.c index 5bd9be1a..544072a7 100644 --- a/test_conformance/printf/util_printf.c +++ b/test_conformance/printf/util_printf.c @@ -212,8 +212,6 @@ struct printDataGenParameters printFloatGenParameters[] = { {"%+#21.15E","789456123.0"}, -#if ! defined( __ANDROID__ ) - //Double argument representing floating-point,in [-]xh.hhhhpAd style {"%.6a","0.1"}, @@ -222,8 +220,6 @@ struct printDataGenParameters printFloatGenParameters[] = { {"%10.2a","9990.235"}, -#endif - //Infinity (1.0/0.0) {"%f","1.0f/0.0f"}, @@ -275,14 +271,10 @@ const char* correctBufferFloat[] = { "+7.894561230000000E+8", -#if ! defined( __ANDROID__ ) - "0x1.99999ap-4", "0x1.38p+13", -#endif - "inf", "-nan", @@ -622,10 +614,6 @@ struct printDataGenParameters printStringGenParameters[] = { {"%s","\"%%\""}, - //null string - - {"%s","(void*)0"} - }; //--------------------------------------------------------- @@ -887,7 +875,7 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId if(!strcmp(pTestCase->_correctBuffer[testId],"inf")) return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf"); if(!strcmp(pTestCase->_correctBuffer[testId],"nan") || !strcmp(pTestCase->_correctBuffer[testId],"-nan")) { - return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)"); + return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)")&&strcmp(analysisBuffer,"-nan(ind)"); } return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]); } diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py old mode 100644 new mode 100755 diff --git a/test_conformance/select/CMakeLists.txt b/test_conformance/select/CMakeLists.txt index d8d90229..6f25c466 100644 --- a/test_conformance/select/CMakeLists.txt +++ b/test_conformance/select/CMakeLists.txt @@ -8,6 +8,7 @@ set(${MODULE_NAME}_SOURCES ../../test_common/harness/msvc9.c ../../test_common/harness/kernelHelpers.c ../../test_common/harness/errorHelpers.c + ../../test_common/harness/parseParameters.cpp ) include(../CMakeCommon.txt) diff --git a/test_conformance/select/test_select.c b/test_conformance/select/test_select.c index 8aa39a2a..a94d7229 100644 --- a/test_conformance/select/test_select.c +++ b/test_conformance/select/test_select.c @@ -27,12 +27,10 @@ #include #include "test_select.h" - #include "../../test_common/harness/testHarness.h" - - #include "../../test_common/harness/kernelHelpers.h" #include "../../test_common/harness/mt19937.h" +#include "../../test_common/harness/parseParameters.h" //----------------------------------------- @@ -57,6 +55,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device); + +static void printUsage( void ); + //----------------------------------------- // Definitions and initializations //----------------------------------------- @@ -71,6 +72,7 @@ static int doTest(cl_command_queue queue, cl_context context, // range. Otherwise, we test a subset of the range // [-min_short, min_short] static bool s_wimpy_mode = false; +static int s_wimpy_reduction_factor = 256; // Tests are broken into the major test which is based on the // src and cmp type and their corresponding vector types and @@ -344,7 +346,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; size_t block_elements = BUFFER_SIZE / type_size[stype]; - size_t step = s_wimpy_mode ? 256 : 1; + size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1; cl_ulong cmp_stride = block_elements * step; // It is more efficient to create the tests all at once since we @@ -506,99 +508,152 @@ exit: return err; } -static void printUsage( void ) +int test_select_uchar_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { - log_info("test_select: [-cghw] [test_name|start_test_num] \n"); - log_info(" default is to run the full test on the default device\n"); - log_info(" -w run in wimpy mode (smoke test)\n"); - log_info(" test_name will run only one test of that name\n"); - log_info(" start_test_num will start running from that num\n"); + return doTest(queue, context, kuchar, kuchar, deviceID); +} +int test_select_uchar_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kuchar, kchar, deviceID); +} +int test_select_char_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kchar, kuchar, deviceID); +} +int test_select_char_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kchar, kchar, deviceID); +} +int test_select_ushort_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kushort, kushort, deviceID); +} +int test_select_ushort_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kushort, kshort, deviceID); +} +int test_select_short_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kshort, kushort, deviceID); +} +int test_select_short_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kshort, kshort, deviceID); +} +int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kuint, kuint, deviceID); +} +int test_select_uint_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kuint, kint, deviceID); +} +int test_select_int_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kint, kuint, deviceID); +} +int test_select_int_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kint, kint, deviceID); +} +int test_select_float_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kfloat, kuint, deviceID); +} +int test_select_float_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kfloat, kint, deviceID); +} +int test_select_ulong_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kulong, kulong, deviceID); +} +int test_select_ulong_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kulong, klong, deviceID); +} +int test_select_long_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, klong, kulong, deviceID); +} +int test_select_long_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, klong, klong, deviceID); +} +int test_select_double_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kdouble, kulong, deviceID); +} +int test_select_double_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, kdouble, klong, deviceID); } -static void printArch( void ) +basefn basefn_list[] = { + test_select_uchar_uchar, + test_select_uchar_char, + test_select_char_uchar, + test_select_char_char, + test_select_ushort_ushort, + test_select_ushort_short, + test_select_short_ushort, + test_select_short_short, + test_select_uint_uint, + test_select_uint_int, + test_select_int_uint, + test_select_int_int, + test_select_float_uint, + test_select_float_int, + test_select_ulong_ulong, + test_select_ulong_long, + test_select_long_ulong, + test_select_long_long, + test_select_double_ulong, + test_select_double_long, +}; + +const char *basefn_names[] = { + "select_uchar_uchar", + "select_uchar_char", + "select_char_uchar", + "select_char_char", + "select_ushort_ushort", + "select_ushort_short", + "select_short_ushort", + "select_short_short", + "select_uint_uint", + "select_uint_int", + "select_int_uint", + "select_int_int", + "select_float_uint", + "select_float_int", + "select_ulong_ulong", + "select_ulong_long", + "select_long_ulong", + "select_long_long", + "select_double_ulong", + "select_double_long", +}; + +ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); + +int num_fns = sizeof(basefn_names) / sizeof(char *); + +int main(int argc, char* argv[]) { - log_info( "sizeof( void*) = %d\n", (int) sizeof( void *) ); + const char ** argList = (const char **)calloc( argc, sizeof( char*) ); -#if defined( __APPLE__ ) - -#if defined( __ppc__ ) - log_info( "ARCH:\tppc\n" ); -#elif defined( __ppc64__ ) - log_info( "ARCH:\tppc64\n" ); -#elif defined( __i386__ ) - log_info( "ARCH:\ti386\n" ); -#elif defined( __x86_64__ ) - log_info( "ARCH:\tx86_64\n" ); -#elif defined( __arm__ ) - log_info( "ARCH:\tarm\n" ); -#else -#error unknown arch -#endif - - int type = 0; - size_t typeSize = sizeof( type ); - sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 ); - log_info( "cpu type:\t%d\n", type ); - typeSize = sizeof( type ); - sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 ); - log_info( "cpu subtype:\t%d\n", type ); - -#endif -} - - - - -//----------------------------------------- -// main -//----------------------------------------- -int main(int argc, char* argv[]) { - int i; - cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT; - cl_platform_id platform_id; - long test_start_num = 0; // start test number - const char* exec_testname = NULL; - cl_device_id device_id; - uint32_t device_frequency = 0; - uint32_t compute_devices = 0; - - - test_start(); - - // Maybe we want turn off sleep - - // Check the environmental to see if there is device preference - char *device_env = getenv("CL_DEVICE_TYPE"); - if (device_env != NULL) { - if( strcmp( device_env, "gpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_GPU" ) == 0 ) - device_type = CL_DEVICE_TYPE_GPU; - else if( strcmp( device_env, "cpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_CPU" ) == 0 ) - device_type = CL_DEVICE_TYPE_CPU; - else if( strcmp( device_env, "accelerator" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 ) - device_type = CL_DEVICE_TYPE_ACCELERATOR; - else if( strcmp( device_env, "default" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_DEFAULT" ) == 0 ) - device_type = CL_DEVICE_TYPE_DEFAULT; - else - { - log_error( "Unknown CL_DEVICE_TYPE environment variable: %s.\nAborting...\n", device_env ); - abort(); - } + if( NULL == argList ) + { + log_error( "Failed to allocate memory for argList array.\n" ); + return 1; } - // Check for the wimpy mode environment variable - if (getenv("CL_WIMPY_MODE")) { - log_info("*** Detected CL_WIMPY_MODE env\n"); - s_wimpy_mode = 1; - } + argList[0] = argv[0]; + size_t argCount = 1; - // Determine if we want to run a particular test or if we want to - // start running from a certain point and if we want to run on cpu/gpu - // usage: test_selects [test_name] [start test num] [run_long] - // default is to run all tests on the gpu and be short - // test names are of the form select_[src/dest type]_[cmp_type] - // In the long test, we run the full range for any type >= 32 bits - // and 32 bits subset for the 64 bit value. - for (i=1; i < argc; ++i) { + for( int i = 1; i < argc; ++i ) + { const char *arg = argv[i]; if (arg == NULL) break; @@ -612,177 +667,55 @@ int main(int argc, char* argv[]) { case 'h': printUsage(); return 0; - case 'w': // Wimpy mode + case 'w': s_wimpy_mode = true; break; + case '[': + parseWimpyReductionFactor(arg, s_wimpy_reduction_factor); + break; default: - log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg ); - printUsage(); - return 0; + break; } arg++; } } - else { - char* t = NULL; - long num = strtol(argv[i], &t, 0); - if (t != argv[i]) - test_start_num = num; - else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_CPU" ) ) - device_type = CL_DEVICE_TYPE_CPU; - else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_GPU" ) ) - device_type = CL_DEVICE_TYPE_GPU; - else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_ACCELERATOR" ) ) - device_type = CL_DEVICE_TYPE_ACCELERATOR; - else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_DEFAULT" ) ) - device_type = CL_DEVICE_TYPE_DEFAULT; - else if( 0 == strcmp( argv[i], "randomize" ) ) { - gRandomSeed = (cl_uint) time( NULL ); - log_info("\nRandom seed: %u.\n", gRandomSeed ); - } else { - // assume it is a test name that we want to execute - exec_testname = argv[i]; - } + else + { + argList[argCount] = arg; + argCount++; } } - - int err; - - // Get platform - err = clGetPlatformIDs(1, &platform_id, NULL); - checkErr(err,"clGetPlatformIDs failed"); - - // Get Device information - err = clGetDeviceIDs(platform_id, device_type, 1, &device_id, 0); - checkErr(err,"clGetComputeDevices"); - - err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, NULL); - checkErr(err,"clGetComputeConfigInfo 1"); - - size_t config_size = sizeof( device_frequency ); -#if MULTITHREAD - if( (err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, config_size, &compute_devices, NULL )) ) -#endif - compute_devices = 1; - - config_size = sizeof(device_frequency); - if((err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, config_size, &device_frequency, NULL ))) - device_frequency = 1; - - //detect whether profile of the device is embedded - char profile[1024] = ""; - if( (err = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) ){} - else if( strstr(profile, "EMBEDDED_PROFILE" ) ) - { - gIsEmbedded = 1; + if (getenv("CL_WIMPY_MODE")) { + s_wimpy_mode = true; } - - log_info( "\nCompute Device info:\n" ); - log_info( "\tProcessing with %d devices\n", compute_devices ); - log_info( "\tDevice Frequency: %d MHz\n", device_frequency ); - - printDeviceHeader( device_id ); - printArch(); - log_info( "Test binary built %s %s\n", __DATE__, __TIME__ ); if (s_wimpy_mode) { log_info("\n"); log_info("*** WARNING: Testing in Wimpy mode! ***\n"); log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n"); log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n"); + log_info("*** Wimpy Reduction Factor: %-27u ***\n\n", s_wimpy_reduction_factor); } - cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL); - checkNull(context, "clCreateContext"); + int err = runTestHarness( argCount, argList, num_fns, basefn_list, basefn_names, false, false, 0 ); - cl_command_queue queue = clCreateCommandQueueWithProperties(context, device_id, 0, NULL); - checkNull(queue, "clCreateCommandQueue"); + free( argList ); - - if (exec_testname) { - // Parse name - // Skip the first part of the name - bool success = false; - if (strncmp(exec_testname, "select_", 7) == 0) { - int i; - Type src_type = kTypeCount; - Type cmp_type = kTypeCount; - char* sptr = (char *)strchr(exec_testname, '_'); - if (sptr) { - for (++sptr, i=0; i < kTypeCount; i++) { - if (strncmp(sptr, type_name[i], strlen(type_name[i])) == 0) { - src_type = (Type)i; - break; - } - } - sptr = strchr(sptr, '_'); - if (sptr) { - for (++sptr, i=0; i < kTypeCount; i++) { - if (strncmp(sptr, type_name[i], strlen(type_name[i])) == 0) { - cmp_type = (Type)i; - break; - } - } - } - } - if (src_type != kTypeCount && cmp_type != kTypeCount) { - success = true; - log_info("Testing only select_%s_%s\n", - type_name[src_type], type_name[cmp_type]); - if (doTest(queue, context, src_type, cmp_type, device_id) != 0) - log_error("*** select_%s_%s FAILED ***\n\n", - type_name[src_type], type_name[cmp_type]); - } - } - if (!success) { - log_error("can not find test:%s", exec_testname); - return -1; - } - } - else { - int src_type, j; - int test_num; - test_num = 0; - for (src_type = 0; src_type < kTypeCount; ++src_type) { - for (j = 0; j < 2; ++j) { - Type cmp_type = ctype[src_type][j]; - if (++test_num < test_start_num) { - log_info("%d) skipping select_%s_%s\n", test_num, - type_name[src_type], type_name[cmp_type]); - } - else { - log_info("%d) Testing select_%s_%s\n", - test_num, type_name[src_type], type_name[cmp_type]); - if (doTest(queue, context, (Type)src_type, cmp_type, device_id) != 0) - log_error("*** %d) select_%s_%s FAILED ***\n\n", test_num, - type_name[src_type], type_name[cmp_type]); - } - } - } - } - - int error = clFinish(queue); - if (error) { - log_error("clFinish failed: %d\n", error); - } - - clReleaseContext(context); - clReleaseCommandQueue(queue); - - if (s_test_fail == 0) { - if (s_test_cnt > 1) - log_info("PASSED %d of %d tests.\n", s_test_cnt, s_test_cnt); - else - log_info("PASSED test.\n"); - } else if (s_test_fail > 0) { - if (s_test_cnt > 1) - log_error("FAILED %d of %d tests.\n", s_test_fail, s_test_cnt); - else - log_error("FAILED test.\n"); - } - - test_finish(); - return s_test_fail; + return err; +} + +static void printUsage( void ) +{ + log_info("test_select: [-w] \n"); + log_info("\tdefault is to run the full test on the default device\n"); + log_info("\t-w run in wimpy mode (smoke test)\n"); + log_info("\t-[2^n] Set wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", s_wimpy_reduction_factor); + log_info("\n"); + log_info("Test names:\n"); + for( int i = 0; i < num_fns; i++ ) + { + log_info( "\t%s\n", basefn_names[i] ); + } } diff --git a/test_conformance/spir/basic.zip b/test_conformance/spir/basic.zip index d60636fb..14c7a3f4 100644 Binary files a/test_conformance/spir/basic.zip and b/test_conformance/spir/basic.zip differ diff --git a/test_conformance/spir/half.zip b/test_conformance/spir/half.zip index f0d3e8ff..0b1deca1 100644 Binary files a/test_conformance/spir/half.zip and b/test_conformance/spir/half.zip differ diff --git a/test_conformance/spir/images_kernel_read_write.zip b/test_conformance/spir/images_kernel_read_write.zip index cbb84d89..dc701137 100644 Binary files a/test_conformance/spir/images_kernel_read_write.zip and b/test_conformance/spir/images_kernel_read_write.zip differ diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp index 1c1ce6b3..52628e37 100644 --- a/test_conformance/spir/main.cpp +++ b/test_conformance/spir/main.cpp @@ -346,9 +346,12 @@ bool test_suite(cl_device_id device, cl_uint size_t_width, const char *folder, { std::cout << "Failed tests:" << std::endl; std::for_each(ErrList.begin(), ErrList.end(), printError); + std::cout << std::endl; + return false; } + std::cout << std::endl; - return tests_passed == number_of_tests; + return true; } static std::string getTestFolder(const std::string& TS) @@ -840,7 +843,6 @@ bool test_basic (cl_device_id device, cl_uint size_t_width, const char *folder) "test_sizeof.sizeof_uintptr_t", "test_sizeof.sizeof_image2d_t", "test_sizeof.sizeof_image3d_t", - "test_sizeof.sizeof_sampler_t", "test_sizeof.sizeof_double", "test_sizeof.sizeof_double2", "test_sizeof.sizeof_double4", diff --git a/test_conformance/spir/profiling.zip b/test_conformance/spir/profiling.zip index 17f1fbec..c792bbec 100644 Binary files a/test_conformance/spir/profiling.zip and b/test_conformance/spir/profiling.zip differ diff --git a/test_conformance/spir/run_build_test.cpp b/test_conformance/spir/run_build_test.cpp index 4dd3f0d4..b602303d 100644 --- a/test_conformance/spir/run_build_test.cpp +++ b/test_conformance/spir/run_build_test.cpp @@ -378,7 +378,10 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder, if (strstr(test_name, "div_cr") || strstr(test_name, "sqrt_cr")) { if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0) + { + std::cout << "Skipped. CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is not supported." << std::endl; return true; + } else { bcoptions += " -cl-fp32-correctly-rounded-divide-sqrt"; cloptions += " -cl-fp32-correctly-rounded-divide-sqrt"; diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp index 5a289126..9a0bf037 100644 --- a/test_conformance/subgroups/main.cpp +++ b/test_conformance/subgroups/main.cpp @@ -40,15 +40,15 @@ ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_lis static const int num_fns = sizeof(basefn_names) / sizeof(char *); -static int +static test_status checkSubGroupsExtension(cl_device_id device) { if (!is_extension_available(device, "cl_khr_subgroups")) { log_info("Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); - return CL_INVALID_DEVICE; + return TEST_SKIP; } - return CL_SUCCESS; + return TEST_PASS; } int diff --git a/test_extensions/media_sharing/test_create_context.cpp b/test_extensions/media_sharing/test_create_context.cpp index 42deaa04..5637bc54 100644 --- a/test_extensions/media_sharing/test_create_context.cpp +++ b/test_extensions/media_sharing/test_create_context.cpp @@ -40,6 +40,13 @@ int context_create(cl_device_id deviceID, cl_context context, cl_command_queue q while (deviceWrapper->AdapterNext()) { + cl_int error; + //check if the test can be run on the adapter + if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) + { + return result.Result(); + } + if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) { std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; @@ -68,7 +75,6 @@ int context_create(cl_device_id deviceID, cl_context context, cl_command_queue q 0, }; - cl_int error; clContextWrapper ctx; switch(functionCreate) { @@ -230,13 +236,20 @@ int context_create(cl_device_id deviceID, cl_context context, cl_command_queue q } } - if (!deviceWrapper->Status()) + if (deviceWrapper->Status() != DEVICE_PASS) { std::string adapterName; AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { log_error("%s init failed\n", adapterName.c_str()); result.ResultSub(CResult::TEST_FAIL); - return result.Result(); + } + else + { + log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } } return result.Result(); diff --git a/test_extensions/media_sharing/test_functions_api.cpp b/test_extensions/media_sharing/test_functions_api.cpp index 21fe403d..cdc6ce86 100644 --- a/test_extensions/media_sharing/test_functions_api.cpp +++ b/test_extensions/media_sharing/test_functions_api.cpp @@ -51,6 +51,13 @@ int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue qu //iterates through all devices while (deviceWrapper->AdapterNext()) { + cl_int error; + //check if the test can be run on the adapter + if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) + { + return result.Result(); + } + if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) { std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; @@ -81,7 +88,6 @@ int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue qu 0, }; - cl_int error; clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); if (error != CL_SUCCESS) { @@ -506,13 +512,20 @@ int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue qu } } - if (!deviceWrapper->Status()) + if (deviceWrapper->Status() != DEVICE_PASS) { std::string adapterName; AdapterToString(adapterType, adapterName); - log_error("%s init failed\n", adapterName.c_str()); - result.ResultSub(CResult::TEST_FAIL); - return result.Result(); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_FAIL); + } + else + { + log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } } return result.Result(); diff --git a/test_extensions/media_sharing/test_functions_kernel.cpp b/test_extensions/media_sharing/test_functions_kernel.cpp index eff297dc..51ac0b70 100644 --- a/test_extensions/media_sharing/test_functions_kernel.cpp +++ b/test_extensions/media_sharing/test_functions_kernel.cpp @@ -71,6 +71,13 @@ int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue while (deviceWrapper->AdapterNext()) { + cl_int error; + //check if the test can be run on the adapter + if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) + { + return result.Result(); + } + if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) { std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; @@ -109,7 +116,6 @@ int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue 0, }; - cl_int error; clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); if (error != CL_SUCCESS) { @@ -335,13 +341,20 @@ int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue } } - if (!deviceWrapper->Status()) + if (deviceWrapper->Status() != DEVICE_PASS) { - std::string adapter; - AdapterToString(adapterType, adapter); - log_error("%s init failed\n", adapter.c_str()); + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); result.ResultSub(CResult::TEST_FAIL); - return result.Result(); + } + else + { + log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } } return result.Result(); diff --git a/test_extensions/media_sharing/test_get_device_ids.cpp b/test_extensions/media_sharing/test_get_device_ids.cpp index 68fdbf74..f8947ea6 100644 --- a/test_extensions/media_sharing/test_get_device_ids.cpp +++ b/test_extensions/media_sharing/test_get_device_ids.cpp @@ -53,6 +53,12 @@ int get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue q std::vector mediaDevices; mediaDevices.push_back(deviceWrapper->Device()); + //check if the test can be run on the adapter + if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result))) + { + return result.Result(); + } + cl_uint devicesAllNum = 0; error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0], CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum); @@ -140,13 +146,20 @@ int get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue q } } - if (!deviceWrapper->Status()) + if (deviceWrapper->Status() != DEVICE_PASS) { - std::string adapter; - AdapterToString(adapterType, adapter); - log_error("%s init failed\n", adapter.c_str()); + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); result.ResultSub(CResult::TEST_FAIL); - return result.Result(); + } + else + { + log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } } return result.Result(); diff --git a/test_extensions/media_sharing/test_interop_sync.cpp b/test_extensions/media_sharing/test_interop_sync.cpp index d15e4fbf..6831a14d 100644 --- a/test_extensions/media_sharing/test_interop_sync.cpp +++ b/test_extensions/media_sharing/test_interop_sync.cpp @@ -40,6 +40,13 @@ int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queu while (deviceWrapper->AdapterNext()) { + cl_int error; + //check if the test can be run on the adapter + if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) + { + return result.Result(); + } + if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) { @@ -71,7 +78,7 @@ int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queu 0, }; - cl_int error; + clContextWrapper ctx; switch(functionCreate) { @@ -258,13 +265,21 @@ int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queu } } - if (!deviceWrapper->Status()) + if (deviceWrapper->Status() != DEVICE_PASS) { std::string adapterName; AdapterToString(adapterType, adapterName); + + if (deviceWrapper->Status() == DEVICE_FAIL) + { log_error("%s init failed\n", adapterName.c_str()); result.ResultSub(CResult::TEST_FAIL); - return result.Result(); + } + else + { + log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } } return result.Result(); diff --git a/test_extensions/media_sharing/test_memory_access.cpp b/test_extensions/media_sharing/test_memory_access.cpp index 10fe7543..5aabaf6f 100644 --- a/test_extensions/media_sharing/test_memory_access.cpp +++ b/test_extensions/media_sharing/test_memory_access.cpp @@ -45,6 +45,13 @@ int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue qu //iterates through all devices while (deviceWrapper->AdapterNext()) { + cl_int error; + //check if the test can be run on the adapter + if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) + { + return result.Result(); + } + if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat)) { std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; @@ -81,7 +88,6 @@ int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue qu 0, }; - cl_int error; clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); if (error != CL_SUCCESS) { @@ -357,13 +363,20 @@ int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue qu } } - if (!deviceWrapper->Status()) + if (deviceWrapper->Status() != DEVICE_PASS) { std::string adapterName; AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { log_error("%s init failed\n", adapterName.c_str()); result.ResultSub(CResult::TEST_FAIL); - return result.Result(); + } + else + { + log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } } return result.Result(); diff --git a/test_extensions/media_sharing/test_other_data_types.cpp b/test_extensions/media_sharing/test_other_data_types.cpp index 0e0bce35..a2aa7cbd 100644 --- a/test_extensions/media_sharing/test_other_data_types.cpp +++ b/test_extensions/media_sharing/test_other_data_types.cpp @@ -65,13 +65,19 @@ int other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue while (deviceWrapper->AdapterNext()) { + cl_int error; + //check if the test can be run on the adapter + if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle))) + { + return result.Result(); + } + cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected, AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(), 0, }; - cl_int error; clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error); if (error != CL_SUCCESS) { @@ -419,13 +425,20 @@ int other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue } } - if (!deviceWrapper->Status()) + if (deviceWrapper->Status() != DEVICE_PASS) { - std::string adapter; - AdapterToString(adapterType, adapter); - log_error("%s init failed\n", adapter.c_str()); + std::string adapterName; + AdapterToString(adapterType, adapterName); + if (deviceWrapper->Status() == DEVICE_FAIL) + { + log_error("%s init failed\n", adapterName.c_str()); result.ResultSub(CResult::TEST_FAIL); - return result.Result(); + } + else + { + log_error("%s init incomplete due to unsupported device\n", adapterName.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } } return result.Result(); diff --git a/test_extensions/media_sharing/utils.cpp b/test_extensions/media_sharing/utils.cpp index b32e9556..41899b76 100644 --- a/test_extensions/media_sharing/utils.cpp +++ b/test_extensions/media_sharing/utils.cpp @@ -1935,3 +1935,36 @@ float convert_half_to_float( unsigned short halfValue ) outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa; return outFloat.floatValue; } + +cl_int deviceExistForCLTest(cl_platform_id platform, + cl_dx9_media_adapter_type_khr media_adapters_type, + void *media_adapters, + CResult &result, + TSharedHandleType sharedHandle /*default SHARED_HANDLE_ENABLED*/ + ) +{ + cl_int _error; + cl_uint devicesAllNum = 0; + std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no"; + std::string adapterStr; + AdapterToString(media_adapters_type, adapterStr); + + _error = clGetDeviceIDsFromDX9MediaAdapterKHR(platform, 1, + &media_adapters_type, &media_adapters, CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum); + + if (_error != CL_SUCCESS) + { + if(_error != CL_DEVICE_NOT_FOUND) + { + log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(_error)); + result.ResultSub(CResult::TEST_ERROR); + } + else + { + log_info("Skipping test case, device type is not supported by a device (adapter type: %s, shared handle: %s)\n", adapterStr.c_str(), sharedHandleStr.c_str()); + result.ResultSub(CResult::TEST_NOTSUPPORTED); + } + } + + return _error; +} diff --git a/test_extensions/media_sharing/utils.h b/test_extensions/media_sharing/utils.h index 4731b268..295a2d25 100644 --- a/test_extensions/media_sharing/utils.h +++ b/test_extensions/media_sharing/utils.h @@ -15,17 +15,19 @@ // #ifndef __UTILS_KHR_MEDIA_H #define __UTILS_KHR_MEDIA_H - +#include "../../test_common/harness/compat.h" +#include #include "wrappers.h" #include "CL/cl_dx9_media_sharing.h" - #include "../../test_common/harness/typeWrappers.h" - -#include #include #include #include + + + + extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR; extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR; extern clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR; @@ -168,4 +170,5 @@ cl_ushort convert_float_to_half( float f ); float convert_half_to_float( unsigned short halfValue ); int DetectFloatToHalfRoundingMode( cl_command_queue ); +cl_int deviceExistForCLTest(cl_platform_id platform,cl_dx9_media_adapter_type_khr media_adapters_type,void *media_adapters,CResult &result,TSharedHandleType sharedHandle=SHARED_HANDLE_DISABLED); #endif // __UTILS_KHR_MEDIA_H diff --git a/test_extensions/media_sharing/wrappers.cpp b/test_extensions/media_sharing/wrappers.cpp index 27907ca3..af13633b 100644 --- a/test_extensions/media_sharing/wrappers.cpp +++ b/test_extensions/media_sharing/wrappers.cpp @@ -141,7 +141,7 @@ CDeviceWrapper::~CDeviceWrapper() #if defined(_WIN32) CD3D9Wrapper::CD3D9Wrapper(): -_d3d9(NULL), _d3dDevice(NULL), _status(true), _adapterIdx(0), _adapterFound(false) +_d3d9(NULL), _d3dDevice(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false) { WindowInit(); @@ -149,7 +149,7 @@ _d3d9(NULL), _d3dDevice(NULL), _status(true), _adapterIdx(0), _adapterFound(fals if (!_d3d9) { log_error("Direct3DCreate9 failed\n"); - _status = false; + _status = DEVICE_FAIL; } } @@ -169,16 +169,16 @@ void CD3D9Wrapper::Destroy() _d3dDevice = 0; } -bool CD3D9Wrapper::Init() +cl_int CD3D9Wrapper::Init() { if (!WindowHandle()) { log_error("D3D9: Window is not created\n"); - _status = false; - return false; + _status = DEVICE_FAIL; + return DEVICE_FAIL; } - if(!_d3d9 || !_status || !_adapterFound) + if(!_d3d9 || DEVICE_PASS != _status || !_adapterFound) return false; _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm); @@ -201,8 +201,8 @@ bool CD3D9Wrapper::Init() processingType, &d3dParams, &_d3dDevice) ) ) { log_error("CreateDevice failed\n"); - _status = false; - return false; + _status = DEVICE_FAIL; + return DEVICE_FAIL; } _d3dDevice->BeginScene(); @@ -232,14 +232,14 @@ D3DADAPTER_IDENTIFIER9 CD3D9Wrapper::Adapter() return _adapter; } -bool CD3D9Wrapper::Status() const +TDeviceStatus CD3D9Wrapper::Status() const { return _status; } bool CD3D9Wrapper::AdapterNext() { - if (!_status) + if (DEVICE_PASS != _status) return false; _adapterFound = false; @@ -253,7 +253,7 @@ bool CD3D9Wrapper::AdapterNext() if(FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter))) { log_error("D3D9: GetAdapterIdentifier failed\n"); - _status = false; + _status = DEVICE_FAIL; return false; } @@ -262,7 +262,7 @@ bool CD3D9Wrapper::AdapterNext() Destroy(); if(!Init()) { - _status = false; + _status = DEVICE_FAIL; _adapterFound = false; } break; @@ -278,7 +278,7 @@ unsigned int CD3D9Wrapper::AdapterIdx() const CD3D9ExWrapper::CD3D9ExWrapper(): -_d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(true), _adapterIdx(0), _adapterFound(false) +_d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false) { WindowInit(); @@ -286,7 +286,7 @@ _d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(true), _adapterIdx(0), _adapterFound( if (FAILED(result) || !_d3d9Ex) { log_error("Direct3DCreate9Ex failed\n"); - _status = false; + _status = DEVICE_FAIL; } } @@ -319,17 +319,17 @@ D3DADAPTER_IDENTIFIER9 CD3D9ExWrapper::Adapter() return _adapter; } -bool CD3D9ExWrapper::Init() +cl_int CD3D9ExWrapper::Init() { if (!WindowHandle()) { log_error("D3D9EX: Window is not created\n"); - _status = false; - return false; + _status = DEVICE_FAIL; + return DEVICE_FAIL; } - if(!_d3d9Ex || !_status || !_adapterFound) - return false; + if(!_d3d9Ex || DEVICE_FAIL == _status || !_adapterFound) + return DEVICE_FAIL; RECT rect; GetClientRect(WindowHandle(),&rect); @@ -353,15 +353,15 @@ bool CD3D9ExWrapper::Init() processingType, &d3dParams, NULL, &_d3dDeviceEx) ) ) { log_error("CreateDeviceEx failed\n"); - _status = false; - return false; + _status = DEVICE_FAIL; + return DEVICE_FAIL; } _d3dDeviceEx->BeginScene(); _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0); _d3dDeviceEx->EndScene(); - return true; + return DEVICE_PASS; } void CD3D9ExWrapper::Destroy() @@ -371,14 +371,14 @@ void CD3D9ExWrapper::Destroy() _d3dDeviceEx = 0; } -bool CD3D9ExWrapper::Status() const +TDeviceStatus CD3D9ExWrapper::Status() const { return _status; } bool CD3D9ExWrapper::AdapterNext() { - if (!_status) + if (DEVICE_FAIL == _status) return false; _adapterFound = false; @@ -392,7 +392,7 @@ bool CD3D9ExWrapper::AdapterNext() if(FAILED(_d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter))) { log_error("D3D9EX: GetAdapterIdentifier failed\n"); - _status = false; + _status = DEVICE_FAIL; return false; } @@ -400,8 +400,8 @@ bool CD3D9ExWrapper::AdapterNext() Destroy(); if(!Init()) { - _status = false; - _adapterFound = false; + _status = DEVICE_FAIL; + _adapterFound = _status; } break; @@ -416,7 +416,7 @@ unsigned int CD3D9ExWrapper::AdapterIdx() const } CDXVAWrapper::CDXVAWrapper(): -_dxvaDevice(NULL), _status(true), _adapterFound(false) +_dxvaDevice(NULL), _status(DEVICE_PASS), _adapterFound(false) { _status = _d3d9.Status(); } @@ -431,19 +431,24 @@ void * CDXVAWrapper::Device() const return _dxvaDevice; } -bool CDXVAWrapper::Status() const +TDeviceStatus CDXVAWrapper::Status() const { - return _status && _d3d9.Status(); + if(_status == DEVICE_FAIL || _d3d9.Status() == DEVICE_FAIL) + return DEVICE_FAIL; + else if(_status == DEVICE_NOTSUPPORTED || _d3d9.Status() == DEVICE_NOTSUPPORTED) + return DEVICE_NOTSUPPORTED; + else + return DEVICE_PASS; } bool CDXVAWrapper::AdapterNext() { - if (!_status) + if (DEVICE_PASS != _status) return false; _adapterFound = _d3d9.AdapterNext(); _status = _d3d9.Status(); - if (!_status) + if (DEVICE_PASS != _status) { _adapterFound = false; return false; @@ -454,7 +459,7 @@ bool CDXVAWrapper::AdapterNext() DXVAHDDestroy(); _status = DXVAHDInit(); - if (!_status) + if (DEVICE_PASS != _status) { _adapterFound = false; return false; @@ -463,10 +468,10 @@ bool CDXVAWrapper::AdapterNext() return true; } -bool CDXVAWrapper::DXVAHDInit() +TDeviceStatus CDXVAWrapper::DXVAHDInit() { - if (!_status || !_d3d9.Status() || !_adapterFound) - return false; + if ((_status == DEVICE_FAIL) || (_d3d9.Status() == DEVICE_FAIL) || !_adapterFound) + return DEVICE_FAIL; DXVAHD_RATIONAL fps = { VIDEO_FPS, 1 }; @@ -480,20 +485,27 @@ bool CDXVAWrapper::DXVAHDInit() desc.OutputHeight = WindowHeight(); #ifdef USE_SOFTWARE_PLUGIN - _status = false; - return false; + _status = DEVICE_FAIL; + return DEVICE_FAIL; #endif HRESULT hr = DXVAHD_CreateDevice(static_cast(_d3d9.Device()), &desc, DXVAHD_DEVICE_USAGE_PLAYBACK_NORMAL, NULL, &_dxvaDevice); if(FAILED(hr)) { + if (hr == E_NOINTERFACE) + { + log_error("DXVAHD_CreateDevice skipped due to no supported devices!\n"); + _status = DEVICE_NOTSUPPORTED; + } + else + { log_error("DXVAHD_CreateDevice failed\n"); - _status = false; - return false; + _status = DEVICE_FAIL; + } } - return true; + return _status; } void CDXVAWrapper::DXVAHDDestroy() diff --git a/test_extensions/media_sharing/wrappers.h b/test_extensions/media_sharing/wrappers.h index bafc8a0a..45b70326 100644 --- a/test_extensions/media_sharing/wrappers.h +++ b/test_extensions/media_sharing/wrappers.h @@ -18,10 +18,37 @@ #if defined(_WIN32) #include +#if defined (__MINGW32__) +#include +typedef unsigned char UINT8; +#define __out +#define __in +#define __inout +#define __out_bcount(size) +#define __out_bcount_opt(size) +#define __in_opt +#define __in_ecount(size) +#define __in_ecount_opt(size) +#define __out_opt +#define __out_ecount(size) +#define __out_ecount_opt(size) +#define __in_bcount_opt(size) +#define __inout_opt +#define __inout_bcount(size) +#define __in_bcount(size) +#define __deref_out +#endif #include #include #endif +enum TDeviceStatus +{ + DEVICE_NOTSUPPORTED, + DEVICE_PASS, + DEVICE_FAIL, +}; + class CDeviceWrapper { public: enum TAccelerationType @@ -36,7 +63,7 @@ public: virtual bool AdapterNext() = 0; virtual unsigned int AdapterIdx() const = 0; virtual void *Device() const = 0; - virtual bool Status() const = 0; + virtual TDeviceStatus Status() const = 0; virtual void *D3D() const = 0; #if defined(_WIN32) @@ -81,7 +108,7 @@ public: virtual bool AdapterNext(); virtual unsigned int AdapterIdx() const; virtual void *Device() const; - virtual bool Status() const; + virtual TDeviceStatus Status() const; virtual void *D3D() const; private: @@ -89,13 +116,13 @@ private: LPDIRECT3DDEVICE9 _d3dDevice; D3DDISPLAYMODE _d3ddm; D3DADAPTER_IDENTIFIER9 _adapter; - bool _status; + TDeviceStatus _status; unsigned int _adapterIdx; bool _adapterFound; D3DFORMAT Format(); D3DADAPTER_IDENTIFIER9 Adapter(); - bool Init(); + int Init(); void Destroy(); }; @@ -107,7 +134,7 @@ public: virtual bool AdapterNext(); virtual unsigned int AdapterIdx() const; virtual void *Device() const; - virtual bool Status() const; + virtual TDeviceStatus Status() const; virtual void *D3D() const; private: @@ -115,13 +142,13 @@ private: LPDIRECT3DDEVICE9EX _d3dDeviceEx; D3DDISPLAYMODEEX _d3ddmEx; D3DADAPTER_IDENTIFIER9 _adapter; - bool _status; + TDeviceStatus _status; unsigned int _adapterIdx; bool _adapterFound; D3DFORMAT Format(); D3DADAPTER_IDENTIFIER9 Adapter(); - bool Init(); + int Init(); void Destroy(); }; @@ -133,21 +160,21 @@ public: virtual bool AdapterNext(); virtual unsigned int AdapterIdx() const; virtual void *Device() const; - virtual bool Status() const; + virtual TDeviceStatus Status() const; virtual void *D3D() const; const CD3D9ExWrapper &D3D9() const; private: CD3D9ExWrapper _d3d9; IDXVAHD_Device *_dxvaDevice; - bool _status; + TDeviceStatus _status; bool _adapterFound; static const D3DFORMAT RENDER_TARGET_FORMAT; static const D3DFORMAT VIDEO_FORMAT; static const unsigned int VIDEO_FPS; - bool DXVAHDInit(); + TDeviceStatus DXVAHDInit(); void DXVAHDDestroy(); };