Synchronise with Khronos-private Gitlab branch

The maintenance of the conformance tests is moving to Github.

This commit contains all the changes that have been done in
Gitlab since the first public release of the conformance tests.

Signed-off-by: Kevin Petit <kevin.petit@arm.com>
This commit is contained in:
Kevin Petit
2019-02-20 16:10:04 +00:00
committed by Kévin Petit
parent b1603eb6ba
commit 53db6e7f9f
115 changed files with 2632 additions and 1304 deletions

View File

@@ -18,7 +18,7 @@ set(CLConform_VERSION "${CLConform_VERSION_MAJOR}.${CLConform_VERSION_MINOR}")
set(CLConform_VERSION_FULL set(CLConform_VERSION_FULL
"${CLConform_VERSION}.${CLConform_VERSION_MICRO}${CLConform_VERSION_EXTRA}") "${CLConform_VERSION}.${CLConform_VERSION_MICRO}${CLConform_VERSION_EXTRA}")
cmake_minimum_required(VERSION 2.8) cmake_minimum_required(VERSION 3.1)
add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1) add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1)
add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS=1) add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS=1)
@@ -77,6 +77,11 @@ else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
endif() endif()
# Clang gives C++11 narrowing warnings so surpress these for now
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-narrowing")
endif()
list(APPEND CLConform_LIBRARIES ${OPENCL_LIBRARIES}) list(APPEND CLConform_LIBRARIES ${OPENCL_LIBRARIES})
if(ANDROID) if(ANDROID)
list(APPEND CLConform_LIBRARIES m) list(APPEND CLConform_LIBRARIES m)

4
build_android.py Normal file → Executable file
View File

@@ -108,7 +108,9 @@ def install_android_cmake():
print "input: " print "input: "
if get_input(): if get_input():
print("installing android-cmake") print("installing android-cmake")
subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake']) #subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake'])
# Use a newer fork of android-cmake which has been updated to support Clang. GCC is deprecated in newer NDKs and C11 atomics conformance doesn't build with NDK > 10.
subprocess.call(['git', 'clone', 'https://github.com/daewoong-jang/android-cmake'])
args.android_cmake = os.path.join(args.src_dir,"android-cmake") args.android_cmake = os.path.join(args.src_dir,"android-cmake")
else: else:
exit() exit()

0
build_lnx.sh Normal file → Executable file
View File

0
clean_tests.py Normal file → Executable file
View File

View File

@@ -207,7 +207,10 @@ long double roundl(long double x);
int cf_signbit(double x); int cf_signbit(double x);
int cf_signbitf(float x); int cf_signbitf(float x);
// Added in _MSC_VER == 1800 (Visual Studio 2013)
#if _MSC_VER < 1800
static int signbit(double x) { return cf_signbit(x); } static int signbit(double x) { return cf_signbit(x); }
#endif
static int signbitf(float x) { return cf_signbitf(x); } static int signbitf(float x) { return cf_signbitf(x); }
long int lrint (double flt); long int lrint (double flt);
@@ -242,8 +245,11 @@ int32_t float2int (float fx);
// //
#if defined(_MSC_VER) #if defined(_MSC_VER)
// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
#if _MSC_VER < 1900
#define snprintf sprintf_s #define snprintf sprintf_s
#endif #endif
#endif

View File

@@ -479,16 +479,6 @@ struct AddressingTable
static AddressingTable sAddressingTable; static AddressingTable sAddressingTable;
bool alpha_is_x(cl_image_format *format){
switch (format->image_channel_order) {
case CL_RGBx:
case CL_sRGBx:
return true;
default:
return false;
}
}
bool is_sRGBA_order(cl_channel_order image_channel_order){ bool is_sRGBA_order(cl_channel_order image_channel_order){
switch (image_channel_order) { switch (image_channel_order) {
case CL_sRGB: case CL_sRGB:
@@ -508,19 +498,21 @@ int has_alpha(cl_image_format *format) {
case CL_R: case CL_R:
return 0; return 0;
case CL_A: case CL_A:
case CL_Rx:
return 1; return 1;
case CL_Rx:
return 0;
case CL_RG: case CL_RG:
return 0; return 0;
case CL_RA: case CL_RA:
case CL_RGx:
return 1; return 1;
case CL_RGx:
return 0;
case CL_RGB: case CL_RGB:
case CL_sRGB: case CL_sRGB:
return 0; return 0;
case CL_RGBx: case CL_RGBx:
case CL_sRGBx: case CL_sRGBx:
return 1; return 0;
case CL_RGBA: case CL_RGBA:
return 1; return 1;
case CL_BGRA: case CL_BGRA:
@@ -719,13 +711,6 @@ void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
} }
} }
int issubnormal(float a)
{
union { cl_int i; cl_float f; } u;
u.f = a;
return (u.i & 0x7f800000U) == 0;
}
float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) { float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
if (sampler->filter_mode == CL_FILTER_NEAREST) if (sampler->filter_mode == CL_FILTER_NEAREST)
return 0.0f; return 0.0f;
@@ -1412,16 +1397,9 @@ void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
|| ( depth_lod != 0 && z >= (int)depth_lod ) || ( depth_lod != 0 && z >= (int)depth_lod )
|| ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) ) || ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) )
{ {
// Border color
if (imageInfo->format->image_channel_order == CL_DEPTH)
{
outData[ 0 ] = 0;
}
else {
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0; outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
if (!has_alpha(imageInfo->format)) if (!has_alpha(imageInfo->format))
outData[3] = alpha_is_x(imageInfo->format) ? 0 : 1; outData[3] = 1;
}
return; return;
} }

View File

@@ -136,8 +136,6 @@ extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *d
int has_alpha(cl_image_format *format); int has_alpha(cl_image_format *format);
extern bool alpha_is_x(cl_image_format *format);
extern bool is_sRGBA_order(cl_channel_order image_channel_order); extern bool is_sRGBA_order(cl_channel_order image_channel_order);
inline float calculate_array_index( float coord, float extent ); inline float calculate_array_index( float coord, float extent );
@@ -594,7 +592,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor *
extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine ); extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler); extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter ); extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
extern int issubnormal(float);
#define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) ) #define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )

View File

@@ -645,14 +645,14 @@ size_t get_pixel_bytes( const cl_image_format *fmt )
return 0; return 0;
} }
int verifyImageSupport( cl_device_id device ) test_status verifyImageSupport( cl_device_id device )
{ {
if( checkForImageSupport( device ) ) if( checkForImageSupport( device ) )
{ {
log_error( "ERROR: Device does not supported images as required by this test!\n" ); log_error( "ERROR: Device does not supported images as required by this test!\n" );
return CL_IMAGE_FORMAT_NOT_SUPPORTED; return TEST_FAIL;
} }
return 0; return TEST_PASS;
} }
int checkForImageSupport( cl_device_id device ) int checkForImageSupport( cl_device_id device )

View File

@@ -17,6 +17,7 @@
#define _kernelHelpers_h #define _kernelHelpers_h
#include "compat.h" #include "compat.h"
#include "testHarness.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -90,8 +91,8 @@ extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl
/* Helper to get pixel size for a pixel format */ /* Helper to get pixel size for a pixel format */
size_t get_pixel_bytes( const cl_image_format *fmt ); size_t get_pixel_bytes( const cl_image_format *fmt );
/* Verify the given device supports images. 0 means you're good to go, otherwise an error */ /* Verify the given device supports images. */
extern int verifyImageSupport( cl_device_id device ); extern test_status verifyImageSupport( cl_device_id device );
/* Checks that the given device supports images. Same as verify, but doesn't print an error */ /* Checks that the given device supports images. Same as verify, but doesn't print an error */
extern int checkForImageSupport( cl_device_id device ); extern int checkForImageSupport( cl_device_id device );

View File

@@ -566,6 +566,7 @@ long int lrintf (float x)
// //
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
#if _MSC_VER < 1900
int fetestexcept(int excepts) int fetestexcept(int excepts)
{ {
unsigned int status = _statusfp(); unsigned int status = _statusfp();
@@ -583,6 +584,7 @@ int feclearexcept(int excepts)
_clearfp(); _clearfp();
return 0; return 0;
} }
#endif
#endif // __INTEL_COMPILER #endif // __INTEL_COMPILER

View File

@@ -0,0 +1,42 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "parseParameters.h"
#include "errorHelpers.h"
#include <string.h>
bool is_power_of_two(int number)
{
return number && !(number & (number - 1));
}
extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor)
{
const char *arg_temp = strchr(&arg[1], ']');
if (arg_temp != 0)
{
int new_factor = atoi(&arg[1]);
arg = arg_temp; // Advance until ']'
if (is_power_of_two(new_factor))
{
log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor);
wimpyReductionFactor = new_factor;
}
else
{
log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor);
}
}
}

View File

@@ -0,0 +1,24 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _parseParameters_h
#define _parseParameters_h
#include "compat.h"
#include <string>
extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor);
#endif // _parseParameters_h

View File

@@ -135,7 +135,7 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" ); log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" ); log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
for( i = 0; i < num_fns - 1; i++ ) for( i = 0; i < num_fns; i++ )
{ {
log_info( "\t\t%s\n", fnNames[ i ] ); log_info( "\t\t%s\n", fnNames[ i ] );
} }
@@ -431,10 +431,18 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
/* If we have a device checking function, run it */ /* If we have a device checking function, run it */
if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS ) if( ( deviceCheckFn != NULL ) )
{ {
test_finish(); test_status status = deviceCheckFn( device );
return -1; switch (status)
{
case TEST_PASS:
break;
case TEST_FAIL:
return 1;
case TEST_SKIP:
return 0;
}
} }
if (num_elements <= 0) if (num_elements <= 0)

View File

@@ -23,6 +23,13 @@
extern "C" { extern "C" {
#endif #endif
typedef enum test_status
{
TEST_PASS = 0,
TEST_FAIL = 1,
TEST_SKIP = 2,
} test_status;
extern cl_uint gReSeed; extern cl_uint gReSeed;
extern cl_uint gRandomSeed; extern cl_uint gRandomSeed;
@@ -32,8 +39,8 @@ extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[], basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps ); int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits. // Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits.
typedef int (*DeviceCheckFn)( cl_device_id device ); typedef test_status (*DeviceCheckFn)( cl_device_id device );
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality. // Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,

View File

@@ -1,3 +1,4 @@
set(CMAKE_CXX_STANDARD 11)
set(MODULE_NAME SVM) set(MODULE_NAME SVM)
set(${MODULE_NAME}_SOURCES set(${MODULE_NAME}_SOURCES

View File

@@ -226,7 +226,7 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt
} }
error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL); error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING"); test_error(error,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS))) if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
{ {
log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value"); log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");

View File

@@ -97,6 +97,10 @@ int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_c
log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc"); log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
return -1; return -1;
} }
err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL);
test_error(err, "clEnqueueUnmapMemObject failed");
err = clFinish(queues[0]);
test_error(err, "clFinish failed");
} }
} }

View File

@@ -17,10 +17,15 @@
#include "../../test_common/harness/mt19937.h" #include "../../test_common/harness/mt19937.h"
#include <vector> #include <vector>
#include <atomic>
#if !defined(_WIN32)
#include <unistd.h>
#endif
typedef struct typedef struct
{ {
cl_uint status; std::atomic<cl_uint> status;
cl_uint num_svm_pointers; cl_uint num_svm_pointers;
std::vector<void *> svm_pointers; std::vector<void *> svm_pointers;
} CallbackData; } CallbackData;
@@ -62,7 +67,7 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point
clSVMFree(context, svm_pointers[i]); clSVMFree(context, svm_pointers[i]);
} }
data->status = 1; data->status.store(1, std::memory_order_release);
} }
int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements) int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
@@ -231,7 +236,9 @@ int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue
test_error(error, "clFinish failed"); test_error(error, "clFinish failed");
//wait for the callback //wait for the callback
while(data.status == 0) { } while(data.status.load(std::memory_order_acquire) == 0) {
usleep(1);
}
//check if number of SVM pointers returned in the callback matches with expected //check if number of SVM pointers returned in the callback matches with expected
if (data.num_svm_pointers != buffers.size()) if (data.num_svm_pointers != buffers.size())

View File

@@ -1303,7 +1303,6 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
int error; int error;
clProgramWrapper program; clProgramWrapper program;
clKernelWrapper kernel; clKernelWrapper kernel;
clMemWrapper streams[3];
size_t threads[1], localThreads[1]; size_t threads[1], localThreads[1];
cl_int *constantData, *resultData; cl_int *constantData, *resultData;
cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize; cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
@@ -1324,12 +1323,12 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
log_info("Reported max constant buffer size of %lld bytes.\n", maxSize); log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
// Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE
error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
if (maxSize > maxGlobalSize / 4) if (maxSize > maxGlobalSize / 8)
maxSize = maxGlobalSize / 4; maxSize = maxGlobalSize / 8;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
@@ -1358,6 +1357,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
for(i=0; i<(int)(numberOfInts); i++) for(i=0; i<(int)(numberOfInts); i++)
constantData[i] = (int)genrand_int32(d); constantData[i] = (int)genrand_int32(d);
clMemWrapper streams[3];
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error); streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error);
test_error( error, "Creating test array failed" ); test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeToAllocate, NULL, &error); streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeToAllocate, NULL, &error);
@@ -1427,7 +1427,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
if (allocPassed) { if (allocPassed) {
if (currentSize < maxSize/PASSING_FRACTION) { if (currentSize < maxSize/PASSING_FRACTION) {
log_error("Failed to allocate at least 1/4 of the reported constant size.\n"); log_error("Failed to allocate at least 1/8 of the reported constant size.\n");
return -1; return -1;
} else if (currentSize != maxSize) { } else if (currentSize != maxSize) {
log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize); log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);

0
test_conformance/basic/run_array Normal file → Executable file
View File

0
test_conformance/basic/run_array_image_copy Normal file → Executable file
View File

0
test_conformance/basic/run_image Normal file → Executable file
View File

0
test_conformance/basic/run_multi_read_image Normal file → Executable file
View File

View File

@@ -209,13 +209,12 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
log_error( "ERROR: Results of copy did not validate!\n" ); log_error( "ERROR: Results of copy did not validate!\n" );
sprintf(values + strlen( values), "%d -> [", i); sprintf(values + strlen( values), "%d -> [", i);
for (int j=0; j<(int)elementSize; j++) for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]); sprintf(values + strlen( values), "%2x ", inchar[j]);
sprintf(values + strlen(values), "] != ["); sprintf(values + strlen(values), "] != [");
for (int j=0; j<(int)elementSize; j++) for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]); sprintf(values + strlen( values), "%2x ", outchar[j]);
sprintf(values + strlen(values), "]"); sprintf(values + strlen(values), "]");
log_error("%s\n", values); log_error("%s\n", values);
return -1; return -1;
} }
} }

View File

@@ -872,12 +872,14 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
// We need to create 5 random values of the given type, // We need to create 5 random values of the given type,
// and read 4 of them back. // and read 4 of them back.
cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)]; const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) ); clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
test_error_ret(status,"Failed to allocate write buffer",status); test_error_ret(status,"Failed to allocate write buffer",status);
clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) ); clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
test_error_ret(status,"Failed to allocate read buffer",status); test_error_ret(status,"Failed to allocate read buffer",status);
status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
@@ -892,7 +894,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
// Generate new random data to push through. // Generate new random data to push through.
// Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0); cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
if ( ti.is_bool() ) { if ( ti.is_bool() ) {
// For boolean, random data cast to bool isn't very random. // For boolean, random data cast to bool isn't very random.
@@ -904,7 +906,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
} }
bool_iter++; bool_iter++;
} else { } else {
l_set_randomly( write_data, sizeof(write_data), rand_state ); l_set_randomly( write_data, write_data_size, rand_state );
} }
status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status); status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
@@ -913,7 +915,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status); status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
// Determine the expected values. // Determine the expected values.
cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; cl_uchar expected[read_data_size];
memset( expected, -1, sizeof(expected) ); memset( expected, -1, sizeof(expected) );
l_copy( expected, 0, write_data, 0, ti ); l_copy( expected, 0, write_data, 0, ti );
l_copy( expected, 1, write_data, 1, ti ); l_copy( expected, 1, write_data, 1, ti );
@@ -930,8 +932,8 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i]; for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
} }
cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
memset( read_data, -1, sizeof(read_data) ); memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
// Now run the kernel // Now run the kernel
@@ -940,7 +942,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
status = clFinish(queue); test_error_ret(status,"finish",status); status = clFinish(queue); test_error_ret(status,"finish",status);
read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
if ( ti.is_bool() ) { if ( ti.is_bool() ) {
// Collapse down to one bit. // Collapse down to one bit.
@@ -959,7 +961,8 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
} }
if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
align_free(write_data);
align_free(read_data);
return err; return err;
} }
@@ -1018,12 +1021,14 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
// We need to create 5 random values of the given type, // We need to create 5 random values of the given type,
// and read 4 of them back. // and read 4 of them back.
cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)]; const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16);
clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) ); cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
test_error_ret(status,"Failed to allocate write buffer",status); test_error_ret(status,"Failed to allocate write buffer",status);
clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) ); clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
test_error_ret(status,"Failed to allocate read buffer",status); test_error_ret(status,"Failed to allocate read buffer",status);
status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
@@ -1043,7 +1048,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
// Generate new random data to push through. // Generate new random data to push through.
// Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0); cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
if ( ti.is_bool() ) { if ( ti.is_bool() ) {
// For boolean, random data cast to bool isn't very random. // For boolean, random data cast to bool isn't very random.
@@ -1055,7 +1060,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
} }
bool_iter++; bool_iter++;
} else { } else {
l_set_randomly( write_data, sizeof(write_data), rand_state ); l_set_randomly( write_data, write_data_size, rand_state );
} }
status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status); status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
@@ -1071,7 +1076,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status); status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
// Determine the expected values. // Determine the expected values.
cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)]; cl_uchar expected[read_data_size];
memset( expected, -1, sizeof(expected) ); memset( expected, -1, sizeof(expected) );
if ( iteration ) { if ( iteration ) {
l_copy( expected, 0, write_data, 0, ti ); l_copy( expected, 0, write_data, 0, ti );
@@ -1102,8 +1107,8 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0); clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
memset( read_data, -1, sizeof(read_data) ); memset( read_data, -1, read_data_size );
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0); clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
// Now run the kernel // Now run the kernel
@@ -1117,7 +1122,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
status = clFinish(queue); test_error_ret(status,"finish",status); status = clFinish(queue); test_error_ret(status,"finish",status);
read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0); read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
if ( ti.is_bool() ) { if ( ti.is_bool() ) {
// Collapse down to one bit. // Collapse down to one bit.
@@ -1139,6 +1144,8 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
} }
if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
align_free(write_data);
align_free(read_data);
return err; return err;
} }
@@ -1352,6 +1359,13 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS); print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
return status; return status;
} }
status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status)))
{
print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
return status;
}
} }
@@ -1372,12 +1386,12 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
test_error_ret(status,"Failed to create reader kernel for user type test",status); test_error_ret(status,"Failed to create reader kernel for user type test",status);
// Set up data. // Set up data.
cl_uchar CL_ALIGNED(ALIGNMENT) uchar_data; cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
cl_uint CL_ALIGNED(ALIGNMENT) uint_data; cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uchar_data), &uchar_data, &status ) ); clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) );
test_error_ret(status,"Failed to allocate uchar buffer",status); test_error_ret(status,"Failed to allocate uchar buffer",status);
clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uint_data), &uint_data, &status ) ); clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) );
test_error_ret(status,"Failed to allocate uint buffer",status); test_error_ret(status,"Failed to allocate uint buffer",status);
status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status); status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
@@ -1387,18 +1401,18 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
cl_uint expected_uint = 42; cl_uint expected_uint = 42;
for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
// Read back data // Read back data
uchar_data = -1; *uchar_data = -1;
uint_data = -1; *uint_data = -1;
const size_t one = 1; const size_t one = 1;
status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status); status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
status = clFinish(queue); test_error_ret(status,"finish",status); status = clFinish(queue); test_error_ret(status,"finish",status);
cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uint_data), 0, 0, 0, 0); cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0);
cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uchar_data), 0, 0, 0, 0); cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0);
if ( expected_uchar != uchar_data || expected_uint != uint_data ) { if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) {
log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n", log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
iter, (int)uchar_data, uint_data, (int)expected_uchar, expected_uint ); iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint );
err |= 1; err |= 1;
} }
@@ -1410,16 +1424,17 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
expected_uint++; expected_uint++;
// Write the new values into persistent store. // Write the new values into persistent store.
uchar_data = expected_uchar; *uchar_data = expected_uchar;
uint_data = expected_uint; *uint_data = expected_uint;
status = clSetKernelArg(writer,0,sizeof(uchar_data),&uchar_data); test_error_ret(status,"set arg",status); status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status);
status = clSetKernelArg(writer,1,sizeof(uint_data),&uint_data); test_error_ret(status,"set arg",status); status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status);
status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status); status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
status = clFinish(queue); test_error_ret(status,"finish",status); status = clFinish(queue); test_error_ret(status,"finish",status);
} }
if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
align_free(uchar_data);
align_free(uint_data);
return err; return err;
} }

View File

@@ -1566,6 +1566,7 @@ public:
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder; using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr; using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr; using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory; using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM) CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
{ {
@@ -1606,7 +1607,7 @@ public:
program += " atomic_work_item_fence(" + program += " atomic_work_item_fence(" +
std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
"memory_order_acquire," + "memory_order_acquire," +
std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
");\n"; ");\n";
program += program +=
@@ -1632,7 +1633,7 @@ public:
program += " atomic_work_item_fence(" + program += " atomic_work_item_fence(" +
std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") + std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
"memory_order_release," + "memory_order_release," +
std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") + std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
");\n"; ");\n";
program += program +=

View File

@@ -13,32 +13,63 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
/*
Header compat.h should be used instead of stdlib.h, stdbool.h, stdint.h, float.h, fenv.h,
math.h. It provides workarounds if these headers are not available or not complete.
Important: It should be included before math.h, directly or indirectly, because Intel mathimf.h
is not compatible with Microsoft math.h. Including math.h before mathimf.h causes compile-time
error.
*/
#ifndef _COMPAT_H_ #ifndef _COMPAT_H_
#define _COMPAT_H_ #define _COMPAT_H_
#if defined(_WIN32) && defined (_MSC_VER) #if defined(_WIN32) && defined (_MSC_VER)
#include <Windows.h> #include <Windows.h>
#include <Winbase.h>
#include <CL/cl.h>
#include <float.h>
#include <xmmintrin.h>
#include <math.h>
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
#define isfinite(x) _finite(x)
#if !defined(__cplusplus)
typedef char bool;
#define inline
#else
extern "C" {
#endif #endif
#ifdef __cplusplus
#define EXTERN_C extern "C"
#else
#define EXTERN_C
#endif
//
// stdlib.h
//
#include <stdlib.h> // On Windows, _MAX_PATH defined there.
// llabs appeared in MS C v16 (VS 10/2010).
#if defined( _MSC_VER ) && _MSC_VER <= 1500
EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
#endif
//
// stdbool.h
//
// stdbool.h appeared in MS C v18 (VS 12/2013).
#if defined( _MSC_VER ) && MSC_VER <= 1700
#if !defined(__cplusplus)
typedef char bool;
#define true 1
#define false 0
#endif
#else
#include <stdbool.h>
#endif
//
// stdint.h
//
// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
typedef unsigned char uint8_t; typedef unsigned char uint8_t;
typedef char int8_t; typedef char int8_t;
typedef unsigned short uint16_t; typedef unsigned short uint16_t;
@@ -47,25 +78,83 @@ typedef unsigned int uint32_t;
typedef int int32_t; typedef int int32_t;
typedef unsigned long long uint64_t; typedef unsigned long long uint64_t;
typedef long long int64_t; typedef long long int64_t;
#else
#define MAXPATHLEN MAX_PATH #ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
typedef unsigned short ushort; #endif
typedef unsigned int uint; #include <stdint.h>
typedef unsigned long ulong; #endif
#define INFINITY (FLT_MAX + FLT_MAX)
//#define NAN (INFINITY | 1) //
//const static int PINFBITPATT_SP32 = INFINITY; // float.h
//
#include <float.h>
//
// fenv.h
//
// fenv.h appeared in MS C v18 (VS 12/2013).
#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
// reimplement fenv.h because windows doesn't have it
#define FE_INEXACT 0x0020
#define FE_UNDERFLOW 0x0010
#define FE_OVERFLOW 0x0008
#define FE_DIVBYZERO 0x0004
#define FE_INVALID 0x0001
#define FE_ALL_EXCEPT 0x003D
int fetestexcept(int excepts);
int feclearexcept(int excepts);
#else
#include <fenv.h>
#endif
//
// math.h
//
#if defined( __INTEL_COMPILER )
#include <mathimf.h>
#else
#include <math.h>
#endif
#if defined( _MSC_VER )
#ifdef __cplusplus
extern "C" {
#endif
#ifndef M_PI #ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288 #define M_PI 3.14159265358979323846264338327950288
#endif #endif
#if ! defined( __INTEL_COMPILER )
#ifndef NAN
#define NAN (INFINITY - INFINITY)
#endif
#ifndef HUGE_VALF
#define HUGE_VALF (float)HUGE_VAL
#endif
#ifndef INFINITY
#define INFINITY (FLT_MAX + FLT_MAX)
#endif
#ifndef isfinite
#define isfinite(x) _finite(x)
#endif
#ifndef isnan
#define isnan( x ) ((x) != (x)) #define isnan( x ) ((x) != (x))
#endif
#ifndef isinf
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY) #define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
#endif
double rint( double x); double rint( double x);
float rintf( float x); float rintf( float x);
@@ -99,27 +188,6 @@ long double remquol( long double x, long double y, int *quo);
long double scalblnl(long double x, long n); long double scalblnl(long double x, long n);
inline long long
llabs(long long __x) { return __x >= 0 ? __x : -__x; }
// end of math functions
uint64_t ReadTime( void );
double SubtractTime( uint64_t endTime, uint64_t startTime );
#define sleep(X) Sleep(1000*X)
#define snprintf sprintf_s
//#define hypotl _hypot
float make_nan();
float nanf( const char* str);
double nan( const char* str);
long double nanl( const char* str);
//#if defined USE_BOOST
//#include <boost/math/tr1.hpp>
//double hypot(double x, double y);
float hypotf(float x, float y); float hypotf(float x, float y);
long double hypotl(long double x, long double y) ; long double hypotl(long double x, long double y) ;
double lgamma(double x); double lgamma(double x);
@@ -144,67 +212,179 @@ double round(double x);
float roundf(float x); float roundf(float x);
long double roundl(long double x); long double roundl(long double x);
int signbit(double x); int cf_signbit(double x);
int signbitf(float x); int cf_signbitf(float x);
//bool signbitl(long double x) { return boost::math::tr1::signbit<long double>(x); } static int signbit(double x) { return cf_signbit(x); }
//#endif // USE_BOOST static int signbitf(float x) { return cf_signbitf(x); }
long int lrint (double flt); long int lrint (double flt);
long int lrintf (float flt); long int lrintf (float flt);
float int2float (int32_t ix); float int2float (int32_t ix);
int32_t float2int (float fx); int32_t float2int (float fx);
/** Returns the number of leading 0-bits in x, #endif
starting at the most significant bit position.
If x is 0, the result is undefined.
*/
int __builtin_clz(unsigned int pattern);
#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
static const double zero= 0.00000000000000000000e+00; // These functions appeared in Intel C v13.
#define NAN (INFINITY - INFINITY) float nanf( const char* str);
#define HUGE_VALF (float)HUGE_VAL double nan( const char* str);
long double nanl( const char* str);
int usleep(int usec); #endif
// reimplement fenv.h because windows doesn't have it
#define FE_INEXACT 0x0020
#define FE_UNDERFLOW 0x0010
#define FE_OVERFLOW 0x0008
#define FE_DIVBYZERO 0x0004
#define FE_INVALID 0x0001
#define FE_ALL_EXCEPT 0x003D
int fetestexcept(int excepts);
int feclearexcept(int excepts);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#else // !((defined(_WIN32) && defined(_MSC_VER) #endif
#if defined(__MINGW32__)
#include <windows.h> #if defined( __ANDROID__ )
#define sleep(X) Sleep(1000*X) #define log2(X) (log(X)/log(2))
#endif
//
// stdio.h
//
#if defined( _MSC_VER )
#define snprintf sprintf_s
#endif
//
// unistd.h
//
#if defined( _MSC_VER )
EXTERN_C unsigned int sleep( unsigned int sec );
EXTERN_C int usleep( int usec );
#endif
//
// syscall.h
//
#if defined( __ANDROID__ )
// Android bionic's isn't providing SYS_sysctl wrappers.
#define SYS__sysctl __NR__sysctl
#endif
// Some tests use _malloca which defined in malloc.h.
#if !defined (__APPLE__)
#include <malloc.h>
#endif
//
// ???
//
#if defined( _MSC_VER )
#define MAXPATHLEN _MAX_PATH
EXTERN_C uint64_t ReadTime( void );
EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
/** Returns the number of leading 0-bits in x,
starting at the most significant bit position.
If x is 0, the result is undefined.
*/
EXTERN_C int __builtin_clz(unsigned int pattern);
#endif #endif
#if defined(__linux__) || defined(__MINGW32__) || defined(__APPLE__)
#ifndef __STDC_LIMIT_MACROS #ifndef MIN
#define __STDC_LIMIT_MACROS #define MIN(x,y) (((x)<(y))?(x):(y))
#endif #endif
#include <fenv.h> #ifndef MAX
#include <math.h> #define MAX(x,y) (((x)>(y))?(x):(y))
#include <float.h>
#include <stdint.h>
#endif #endif
/*
------------------------------------------------------------------------------------------------
WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
This is a typical usage of the macros:
double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
(taken from math_brute_force/reference_math.c). There are two problems:
1. There is an error here. On Windows in will produce incorrect result
`0x1.5555555555555p+50'. To have a correct result it should be written as
`MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
third argument is not obvious -- sometimes it should be the same as exponent of the
first argument, but sometimes not.
2. Information is duplicated. It is easy to make a mistake.
Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
------------------------------------------------------------------------------------------------
*/
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
#else
// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
#define MAKE_HEX_FLOAT(x,y,z) x #define MAKE_HEX_FLOAT(x,y,z) x
#define MAKE_HEX_DOUBLE(x,y,z) x #define MAKE_HEX_DOUBLE(x,y,z) x
#define MAKE_HEX_LONG(x,y,z) x #define MAKE_HEX_LONG(x,y,z) x
#endif // !((defined(_WIN32) && defined(_MSC_VER) #endif
/*
------------------------------------------------------------------------------------------------
HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
double respectively. Arguments:
sm -- sign of number,
int -- integer part of mantissa (without `0x' prefix),
fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
se -- sign of exponent,
exp -- absolute value of (binary) exponent.
Example:
double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
Note:
We have to pass signs as separate arguments because gcc pass negative integer values
(e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
`0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
literal.
------------------------------------------------------------------------------------------------
*/
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
// If compiler does not support hex floating point literals:
#define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) )
#define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) )
#define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
#else
// If compiler supports hex floating point literals: just concatenate all the parts into a literal.
#define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
#define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
#define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
#endif
#if defined(__MINGW32__)
#include <Windows.h>
#define sleep(sec) Sleep((sec) * 1000)
#endif
#endif // _COMPAT_H_ #endif // _COMPAT_H_

View File

@@ -475,14 +475,14 @@ size_t get_pixel_bytes( const cl_image_format *fmt )
return 0; return 0;
} }
int verifyImageSupport( cl_device_id device ) test_status verifyImageSupport( cl_device_id device )
{ {
if( checkForImageSupport( device ) ) if( checkForImageSupport( device ) )
{ {
log_error( "ERROR: Device does not supported images as required by this test!\n" ); log_error( "ERROR: Device does not supported images as required by this test!\n" );
return CL_IMAGE_FORMAT_NOT_SUPPORTED; return TEST_FAIL;
} }
return 0; return TEST_PASS;
} }
int checkForImageSupport( cl_device_id device ) int checkForImageSupport( cl_device_id device )

View File

@@ -17,6 +17,7 @@
#define _kernelHelpers_h #define _kernelHelpers_h
#include "compat.h" #include "compat.h"
#include "testHarness.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -84,8 +85,8 @@ extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl
/* Helper to get pixel size for a pixel format */ /* Helper to get pixel size for a pixel format */
size_t get_pixel_bytes( const cl_image_format *fmt ); size_t get_pixel_bytes( const cl_image_format *fmt );
/* Verify the given device supports images. 0 means you're good to go, otherwise an error */ /* Verify the given device supports images. */
extern int verifyImageSupport( cl_device_id device ); extern test_status verifyImageSupport( cl_device_id device );
/* Checks that the given device supports images. Same as verify, but doesn't print an error */ /* Checks that the given device supports images. Same as verify, but doesn't print an error */
extern int checkForImageSupport( cl_device_id device ); extern int checkForImageSupport( cl_device_id device );

View File

@@ -13,15 +13,18 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// //
#if defined(_WIN32) && defined (_MSC_VER)
#include "compat.h" #include "compat.h"
#include <math.h>
#include <float.h>
#include <assert.h>
#include <CL/cl_platform.h>
#if defined ( _MSC_VER )
#include <limits.h>
#include <stdlib.h>
#include <CL/cl.h>
#include <windows.h>
#if ! defined( __INTEL_COMPILER )
/////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////
// //
@@ -387,86 +390,6 @@ long double log2l(long double x)
return 1.44269504088896340735992468100189214L * log(x); return 1.44269504088896340735992468100189214L * log(x);
} }
///////////////////////////////////////////////////////////////////
//
// misc functions
//
///////////////////////////////////////////////////////////////////
/*
// This function is commented out because the Windows implementation should never call munmap.
// If it is calling it, we have a bug. Please file a bugzilla.
int munmap(void *addr, size_t len)
{
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
return (int)VirtualAlloc( (LPVOID)addr, len,
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
}
*/
uint64_t ReadTime( void )
{
LARGE_INTEGER current;
QueryPerformanceCounter(&current);
return (uint64_t)current.QuadPart;
}
double SubtractTime( uint64_t endTime, uint64_t startTime )
{
static double PerformanceFrequency = 0.0;
if (PerformanceFrequency == 0.0) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
PerformanceFrequency = (double) frequency.QuadPart;
}
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
}
float make_nan()
{
/* This is the IEEE 754 single-precision format:
unsigned int mantissa: 22;
unsigned int quiet_nan: 1;
unsigned int exponent: 8;
unsigned int negative: 1;
*/
//const static unsigned
static const int32_t _nan = 0x7fc00000;
return *(const float*)(&_nan);
}
float nanf( const char* str)
{
cl_uint u = atoi( str );
u |= 0x7fc00000U;
return *( float*)(&u);
}
double nan( const char* str)
{
cl_ulong u = atoi( str );
u |= 0x7ff8000000000000ULL;
return *( double*)(&u);
}
// double check this implementatation
long double nanl( const char* str)
{
union
{
long double f;
struct { cl_ulong m; cl_ushort sexp; }u;
}u;
u.u.sexp = 0x7fff;
u.u.m = 0x8000000000000000ULL | atoi( str );
return u.f;
}
double trunc(double x) double trunc(double x)
{ {
double absx = fabs(x); double absx = fabs(x);
@@ -589,7 +512,165 @@ long double roundl(long double x)
return x; return x;
} }
int signbit(double x) float cbrtf( float x )
{
float z = pow( fabs((double) x), 1.0 / 3.0 );
return copysignf( z, x );
}
double cbrt( double x )
{
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
}
long int lrint (double x)
{
double absx = fabs(x);
if( x >= (double) LONG_MAX )
return LONG_MAX;
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
{
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
double rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
long int lrintf (float x)
{
float absx = fabsf(x);
if( x >= (float) LONG_MAX )
return LONG_MAX;
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
float rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
///////////////////////////////////////////////////////////////////
//
// fenv functions
//
///////////////////////////////////////////////////////////////////
int fetestexcept(int excepts)
{
unsigned int status = _statusfp();
return excepts & (
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
((status & _SW_INVALID) ? FE_INVALID : 0)
);
}
int feclearexcept(int excepts)
{
_clearfp();
return 0;
}
#endif // __INTEL_COMPILER
#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
float make_nan()
{
/* This is the IEEE 754 single-precision format:
unsigned int mantissa: 22;
unsigned int quiet_nan: 1;
unsigned int exponent: 8;
unsigned int negative: 1;
*/
//const static unsigned
static const int32_t _nan = 0x7fc00000;
return *(const float*)(&_nan);
}
float nanf( const char* str)
{
cl_uint u = atoi( str );
u |= 0x7fc00000U;
return *( float*)(&u);
}
double nan( const char* str)
{
cl_ulong u = atoi( str );
u |= 0x7ff8000000000000ULL;
return *( double*)(&u);
}
// double check this implementatation
long double nanl( const char* str)
{
union
{
long double f;
struct { cl_ulong m; cl_ushort sexp; }u;
}u;
u.u.sexp = 0x7fff;
u.u.m = 0x8000000000000000ULL | atoi( str );
return u.f;
}
#endif
///////////////////////////////////////////////////////////////////
//
// misc functions
//
///////////////////////////////////////////////////////////////////
/*
// This function is commented out because the Windows implementation should never call munmap.
// If it is calling it, we have a bug. Please file a bugzilla.
int munmap(void *addr, size_t len)
{
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
return (int)VirtualAlloc( (LPVOID)addr, len,
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
}
*/
uint64_t ReadTime( void )
{
LARGE_INTEGER current;
QueryPerformanceCounter(&current);
return (uint64_t)current.QuadPart;
}
double SubtractTime( uint64_t endTime, uint64_t startTime )
{
static double PerformanceFrequency = 0.0;
if (PerformanceFrequency == 0.0) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
PerformanceFrequency = (double) frequency.QuadPart;
}
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
}
int cf_signbit(double x)
{ {
union union
{ {
@@ -600,7 +681,7 @@ int signbit(double x)
return u.u >> 63; return u.u >> 63;
} }
int signbitf(float x) int cf_signbitf(float x)
{ {
union union
{ {
@@ -611,17 +692,6 @@ int signbitf(float x)
return u.u >> 31; return u.u >> 31;
} }
float cbrtf( float x )
{
float z = pow( fabs((double) x), 1.0 / 3.0 );
return copysignf( z, x );
}
double cbrt( double x )
{
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
}
float int2float (int32_t ix) float int2float (int32_t ix)
{ {
union { union {
@@ -642,7 +712,7 @@ int32_t float2int (float fx)
return u.i; return u.i;
} }
#if defined(_MSC_VER) && !defined(_WIN64) #if !defined(_WIN64)
/** Returns the number of leading 0-bits in x, /** Returns the number of leading 0-bits in x,
starting at the most significant bit position. starting at the most significant bit position.
If x is 0, the result is undefined. If x is 0, the result is undefined.
@@ -682,45 +752,10 @@ int __builtin_clz(unsigned int pattern)
return count; return count;
} }
#endif //defined(_MSC_VER) && !defined(_WIN64) #endif // !defined(_WIN64)
#include <intrin.h> #include <intrin.h>
#include <emmintrin.h> #include <emmintrin.h>
long int lrint (double x)
{
double absx = fabs(x);
if( x >= (double) LONG_MAX )
return LONG_MAX;
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
{
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
double rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
long int lrintf (float x)
{
float absx = fabsf(x);
if( x >= (float) LONG_MAX )
return LONG_MAX;
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
float rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
int usleep(int usec) int usleep(int usec)
{ {
@@ -728,22 +763,10 @@ int usleep(int usec)
return 0; return 0;
} }
int fetestexcept(int excepts) unsigned int sleep( unsigned int sec )
{ {
unsigned int status = _statusfp(); Sleep( sec * 1000 );
return excepts & (
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
((status & _SW_INVALID) ? FE_INVALID : 0)
);
}
int feclearexcept(int excepts)
{
_clearfp();
return 0; return 0;
} }
#endif //defined(_WIN32) #endif // defined( _MSC_VER )

View File

@@ -439,10 +439,18 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
/* If we have a device checking function, run it */ /* If we have a device checking function, run it */
if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS ) if( ( deviceCheckFn != NULL ) )
{ {
test_finish(); test_status status = deviceCheckFn( device );
return -1; switch (status)
{
case TEST_PASS:
break;
case TEST_FAIL:
return 1;
case TEST_SKIP:
return 0;
}
} }
if (num_elements <= 0) if (num_elements <= 0)

View File

@@ -23,6 +23,13 @@
extern "C" { extern "C" {
#endif #endif
typedef enum test_status
{
TEST_PASS = 0,
TEST_FAIL = 1,
TEST_SKIP = 2,
} test_status;
extern cl_uint gReSeed; extern cl_uint gReSeed;
extern cl_uint gRandomSeed; extern cl_uint gRandomSeed;
@@ -32,8 +39,8 @@ extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[], basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps ); int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits. // Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits.
typedef int (*DeviceCheckFn)( cl_device_id device ); typedef test_status (*DeviceCheckFn)( cl_device_id device );
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality. // Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns, extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,

View File

@@ -20,6 +20,7 @@ set(${MODULE_NAME}_SOURCES
test_mem_object_info.cpp test_mem_object_info.cpp
test_null_buffer_arg.c test_null_buffer_arg.c
test_kernel_arg_info.c test_kernel_arg_info.c
test_queue_properties.cpp
../../test_common/harness/errorHelpers.c ../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c ../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c ../../test_common/harness/testHarness.c

View File

@@ -113,6 +113,7 @@ basefn basefn_list[] = {
test_get_image1d_info, test_get_image1d_info,
test_get_image1d_array_info, test_get_image1d_array_info,
test_get_image2d_array_info, test_get_image2d_array_info,
test_queue_properties,
}; };
@@ -200,6 +201,7 @@ const char *basefn_names[] = {
"get_image1d_info", "get_image1d_info",
"get_image1d_array_info", "get_image1d_array_info",
"get_image2d_array_info", "get_image2d_array_info",
"queue_properties",
}; };
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));

View File

@@ -105,4 +105,5 @@ extern int test_get_image1d_info( cl_device_id deviceID, cl_context context
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements ); extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements ); extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ); extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
extern int test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );

View File

@@ -1319,12 +1319,12 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
log_info("Reported max constant buffer size of %lld bytes.\n", maxSize); log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
// Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE
error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
if (maxSize > maxGlobalSize / 4) if (maxSize > maxGlobalSize / 8)
maxSize = maxGlobalSize / 4; maxSize = maxGlobalSize / 8;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
@@ -1422,7 +1422,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
if (allocPassed) { if (allocPassed) {
if (currentSize < maxSize/PASSING_FRACTION) { if (currentSize < maxSize/PASSING_FRACTION) {
log_error("Failed to allocate at least 1/4 of the reported constant size.\n"); log_error("Failed to allocate at least 1/8 of the reported constant size.\n");
return -1; return -1;
} else if (currentSize != maxSize) { } else if (currentSize != maxSize) {
log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize); log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);

View File

@@ -0,0 +1,174 @@
//
// Copyright (c) 2018 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
#include <sstream>
#include <string>
#include <vector>
using namespace std;
/*
The test against cl_khr_create_command_queue extension. It validates if devices with Opencl 1.X can use clCreateCommandQueueWithPropertiesKHR function.
Based on device capabilities test will create queue with NULL properties, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property and
CL_QUEUE_PROFILING_ENABLE property. Finally simple kernel will be executed on such queue.
*/
const char *queue_test_kernel[] = {
"__kernel void vec_cpy(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n" };
int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop_def, cl_device_id deviceID, clKernelWrapper& kernel, size_t num_elements)
{
clMemWrapper streams[2];
int error;
std::vector<int> buf(num_elements);
clCreateCommandQueueWithPropertiesKHR_fn clCreateCommandQueueWithPropertiesKHR = NULL;
cl_platform_id platform;
clEventWrapper event;
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
clCreateCommandQueueWithPropertiesKHR = (clCreateCommandQueueWithPropertiesKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clCreateCommandQueueWithPropertiesKHR");
if (clCreateCommandQueueWithPropertiesKHR == NULL)
{
log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
return -1;
}
clCommandQueueWrapper queue = clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def, &error);
test_error(error, "clCreateCommandQueueWithPropertiesKHR failed");
for (int i = 0; i < num_elements; ++i)
{
buf[i] = i;
}
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf.data(), &error);
test_error( error, "clCreateBuffer failed." );
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error);
test_error( error, "clCreateBuffer failed." );
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
test_error( error, "clSetKernelArg failed." );
error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
test_error( error, "clSetKernelArg failed." );
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, &event);
test_error( error, "clEnqueueNDRangeKernel failed." );
error = clWaitForEvents(1, &event);
test_error(error, "clWaitForEvents failed.");
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements, buf.data(), 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed." );
for (int i = 0; i < num_elements; ++i)
{
if (buf[i] != i)
{
log_error("ERROR: Incorrect vector copy result.");
return -1;
}
}
return 0;
}
int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
if (num_elements <= 0)
{
num_elements = 128;
}
int error = 0;
clProgramWrapper program;
clKernelWrapper kernel;
size_t strSize;
std::string strExt(0, '\0');
cl_queue_properties_khr device_props = NULL;
cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 };
// Query extension
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &strSize);
test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
strExt.resize(strSize);
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, strExt.size(), &strExt[0], NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
log_info("CL_DEVICE_EXTENSIONS:\n%s\n\n", strExt.c_str());
if (strExt.find("cl_khr_create_command_queue") == string::npos)
{
log_info("extension cl_khr_create_command_queue is not supported.\n");
return 0;
}
error = create_single_kernel_helper(context, &program, &kernel, 1, queue_test_kernel, "vec_cpy");
test_error(error, "create_single_kernel_helper failed");
log_info("Queue property NULL. Testing ... \n");
error = enqueue_kernel(context, NULL,deviceID, kernel, (size_t)num_elements);
test_error(error, "enqueue_kernel failed");
error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
{
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE supported. Testing ... \n");
queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
test_error(error, "enqueue_kernel failed");
} else
{
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not supported \n");
}
if (device_props & CL_QUEUE_PROFILING_ENABLE)
{
log_info("Queue property CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
queue_prop_def[1] = CL_QUEUE_PROFILING_ENABLE;
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
test_error(error, "enqueue_kernel failed");
} else
{
log_info("Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
}
if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE && device_props & CL_QUEUE_PROFILING_ENABLE)
{
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE;
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
test_error(error, "enqueue_kernel failed");
}
else
{
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE or CL_QUEUE_PROFILING_ENABLE not supported \n");
}
return 0;
}

View File

View File

View File

@@ -202,10 +202,10 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
log_error( "ERROR: Results of copy did not validate!\n" ); log_error( "ERROR: Results of copy did not validate!\n" );
sprintf(values + strlen( values), "%d -> [", i); sprintf(values + strlen( values), "%d -> [", i);
for (int j=0; j<(int)elementSize; j++) for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]); sprintf(values + strlen( values), "%2x ", inchar[j]);
sprintf(values + strlen(values), "] != ["); sprintf(values + strlen(values), "] != [");
for (int j=0; j<(int)elementSize; j++) for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]); sprintf(values + strlen( values), "%2x ", outchar[j]);
sprintf(values + strlen(values), "]"); sprintf(values + strlen(values), "]");
log_error("%s\n", values); log_error("%s\n", values);

View File

@@ -16,6 +16,7 @@
#include "../../test_common/harness/compat.h" #include "../../test_common/harness/compat.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
@@ -112,6 +113,10 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que
log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n", log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0)); max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
if (max_mem_size > (cl_ulong)SIZE_MAX) {
max_mem_size = (cl_ulong)SIZE_MAX;
}
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed"); test_error(err, "clCreateSampler failed");
@@ -182,7 +187,7 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que
size_t origin[3] = {0,0,0}; size_t origin[3] = {0,0,0};
size_t region[3] = {img_width, img_height, 1}; size_t region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL); err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS) if (err != CL_SUCCESS)
{ {
log_error("clWriteImage failed\n"); log_error("clWriteImage failed\n");
@@ -324,6 +329,10 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
max_img_width = (int)max_image2d_width; max_img_width = (int)max_image2d_width;
max_img_height = (int)max_image2d_height; max_img_height = (int)max_image2d_height;
if (max_mem_size > (cl_ulong)SIZE_MAX) {
max_mem_size = (cl_ulong)SIZE_MAX;
}
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel, // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
// and we want to consume 1/4 of global memory (this is the minimum required to be // and we want to consume 1/4 of global memory (this is the minimum required to be
// supported by the spec) // supported by the spec)
@@ -351,6 +360,9 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0)); max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
d = init_genrand( gRandomSeed ); d = init_genrand( gRandomSeed );
input_ptr = generate_8888_image(max_img_width, max_img_height, d);
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
int plus_minus; int plus_minus;
for (plus_minus=0; plus_minus < 3; plus_minus++) for (plus_minus=0; plus_minus < 3; plus_minus++)
{ {
@@ -390,9 +402,6 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
break; break;
} }
input_ptr = generate_8888_image(effective_img_width, effective_img_height, d);
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * effective_img_width * effective_img_height);
img_format.image_channel_order = CL_RGBA; img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8; img_format.image_channel_data_type = CL_UNORM_INT8;
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, effective_img_width, effective_img_height, 0, NULL, NULL); streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
@@ -419,7 +428,7 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
size_t origin[3] = {0,0,0}; size_t origin[3] = {0,0,0};
size_t region[3] = {effective_img_width, effective_img_height, 1}; size_t region[3] = {effective_img_width, effective_img_height, 1};
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL); err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS) if (err != CL_SUCCESS)
{ {
log_error("clWriteImage failed\n"); log_error("clWriteImage failed\n");
@@ -484,14 +493,14 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
clReleaseMemObject(streams[0]); clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]); clReleaseMemObject(streams[1]);
free(input_ptr);
free(output_ptr);
} }
} }
} }
// cleanup // cleanup
free(input_ptr);
free(output_ptr);
free_mtdata(d); free_mtdata(d);
clReleaseSampler(sampler); clReleaseSampler(sampler);
clReleaseKernel(kernel); clReleaseKernel(kernel);

View File

@@ -133,7 +133,7 @@ const size_table vector_table[] =
const char *ptr_table[] = const char *ptr_table[] =
{ {
"void*", "global void*",
"size_t", "size_t",
"sizeof(int)", // check return type of sizeof "sizeof(int)", // check return type of sizeof
"ptrdiff_t" "ptrdiff_t"

View File

@@ -481,7 +481,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor *
extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine ); extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler); extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter ); extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
extern int issubnormal(float);
#define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) ) #define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )

View File

@@ -54,6 +54,9 @@ const char *known_extensions[] = {
"cl_khr_egl_image", "cl_khr_egl_image",
"cl_khr_egl_event", "cl_khr_egl_event",
"cl_khr_il_program", "cl_khr_il_program",
"cl_khr_create_command_queue",
"cl_khr_throttle_hints",
"cl_khr_priority_hints",
}; };
size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*); size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);

View File

@@ -515,6 +515,8 @@ static void PrintArch( void )
vlog( "\tARCH:\tx86_64\n" ); vlog( "\tARCH:\tx86_64\n" );
#elif defined( __arm__ ) #elif defined( __arm__ )
vlog( "\tARCH:\tarm\n" ); vlog( "\tARCH:\tarm\n" );
#elif defined( __aarch64__ )
vlog( "\tARCH:\taarch64\n" );
#else #else
vlog( "\tARCH:\tunknown\n" ); vlog( "\tARCH:\tunknown\n" );
#endif #endif

View File

@@ -19,6 +19,7 @@ set (${MODULE_NAME}_SOURCES
../../test_common/harness/errorHelpers.c ../../test_common/harness/errorHelpers.c
../../test_common/harness/kernelHelpers.c ../../test_common/harness/kernelHelpers.c
../../test_common/harness/testHarness.c ../../test_common/harness/testHarness.c
../../test_common/harness/parseParameters.cpp
) )
if(ANDROID) if(ANDROID)

View File

@@ -751,16 +751,11 @@ static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((
static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; } static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; }
static void ulong2float( void *out, void *in) static void ulong2float( void *out, void *in)
{ {
#if defined(_MSC_VER) #if defined(_MSC_VER) && defined(_M_X64)
cl_ulong l = ((cl_ulong*) in)[0]; cl_ulong l = ((cl_ulong*) in)[0];
float result; float result;
cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
#if defined(_M_X64)
_mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
#else
result = sl;
#endif
((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result)); ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
#else #else
cl_ulong l = ((cl_ulong*) in)[0]; cl_ulong l = ((cl_ulong*) in)[0];

View File

@@ -18,6 +18,7 @@
#include "../../test_common/harness/ThreadPool.h" #include "../../test_common/harness/ThreadPool.h"
#include "../../test_common/harness/testHarness.h" #include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/kernelHelpers.h" #include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/parseParameters.h"
#if !defined(_WIN32) && !defined(__ANDROID__) #if !defined(_WIN32) && !defined(__ANDROID__)
#include <sys/sysctl.h> #include <sys/sysctl.h>
#endif #endif
@@ -98,6 +99,7 @@ cl_mem gOutBuffers[ kCallStyleCount ];
size_t gComputeDevices = 0; size_t gComputeDevices = 0;
uint32_t gDeviceFrequency = 0; uint32_t gDeviceFrequency = 0;
int gWimpyMode = 0; int gWimpyMode = 0;
int gWimpyReductionFactor = 128;
int gSkipTesting = 0; int gSkipTesting = 0;
int gForceFTZ = 0; int gForceFTZ = 0;
int gMultithread = 1; int gMultithread = 1;
@@ -438,6 +440,9 @@ static int ParseArgs( int argc, const char **argv )
case 'w': case 'w':
gWimpyMode ^= 1; gWimpyMode ^= 1;
break; break;
case '[':
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
break;
case 'z': case 'z':
gForceFTZ ^= 1; gForceFTZ ^= 1;
break; break;
@@ -540,6 +545,7 @@ static int ParseArgs( int argc, const char **argv )
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" ); vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" ); vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" ); vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
vlog("*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
} }
return 0; return 0;
@@ -566,6 +572,7 @@ static void PrintUsage( void )
vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" ); vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" );
vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" ); vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" );
vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" ); vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" );
vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
vlog( "\t\t-z\tToggle flush to zero mode (Default: per device)\n" ); vlog( "\t\t-z\tToggle flush to zero mode (Default: per device)\n" );
vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" ); vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" );
vlog( "\n" ); vlog( "\n" );
@@ -1243,15 +1250,12 @@ static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode r
if ( !gWimpyMode && gIsEmbedded ) if ( !gWimpyMode && gIsEmbedded )
step = blockCount * EMBEDDED_REDUCTION_FACTOR; step = blockCount * EMBEDDED_REDUCTION_FACTOR;
if ( gWimpyMode )
step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
vlog( "Testing... " ); vlog( "Testing... " );
fflush(stdout); fflush(stdout);
for( i = 0; i < (uint64_t)lastCase; i += step ) for( i = 0; i < (uint64_t)lastCase; i += step )
{ {
if (gWimpyMode) {
uint64_t blockIndex = (i / blockCount) & 0xFF;
if (blockIndex != 0 && blockIndex != 0xFF)
continue;
}
if( 0 == ( i & ((lastCase >> 3) -1))) { if( 0 == ( i & ((lastCase >> 3) -1))) {
vlog("."); vlog(".");

View File

@@ -200,7 +200,7 @@ cl_int HarnessD3D10_CreateDevice(IDXGIAdapter* pAdapter, ID3D10Device **ppDevice
pAdapter, pAdapter,
D3D10_DRIVER_TYPE_HARDWARE, D3D10_DRIVER_TYPE_HARDWARE,
NULL, NULL,
D3D10_CREATE_DEVICE_DEBUG, 0,
D3D10_SDK_VERSION, D3D10_SDK_VERSION,
&sd, &sd,
&HarnessD3D10_pSwapChain, &HarnessD3D10_pSwapChain,

View File

@@ -928,15 +928,16 @@ static const char* block_barrier[] =
NL, " size_t gid = get_group_id(0);" NL, " size_t gid = get_group_id(0);"
NL, " size_t idx = gid*lsz;" NL, " size_t idx = gid*lsz;"
NL, "" NL, ""
NL, " res[tid]=lsz;"
NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
NL, " int (^kernelBlock)(int) = ^(int a)" NL, " int (^kernelBlock)(int) = ^(int a)"
NL, " {" NL, " {"
NL, " atomic_inc(res+idx);" NL, " atomic_dec(res+idx);"
NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
NL, " return (int)abs(a - b) - (res[idx] != lsz ? 0 : 1);" NL, " return (int)abs(a - b) - (res[idx] != 0 ? 0 : 1);"
NL, " };" NL, " };"
NL, "" NL, ""
NL, " int d = kernelBlock(2);" NL, " int d = kernelBlock(2);"
NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
NL, " res[tid] = d;" NL, " res[tid] = d;"
NL, "}" NL, "}"
NL NL

View File

@@ -65,7 +65,7 @@ ct_assert(arr_size(commonfn_names) == arr_size(basefn_list))
static const int num_commonfns = arr_size(commonfn_names); static const int num_commonfns = arr_size(commonfn_names);
int deviceCheck(cl_device_id device) test_status deviceCheck(cl_device_id device)
{ {
static const char expected_cl[] = "OpenCL 2.0"; static const char expected_cl[] = "OpenCL 2.0";
static const char expected_clc[] = "OpenCL C 2.0"; static const char expected_clc[] = "OpenCL C 2.0";
@@ -77,7 +77,7 @@ int deviceCheck(cl_device_id device)
if(res != CL_SUCCESS || ret_len < strlen(expected_cl) || strncmp(version, expected_cl, strlen(expected_cl))) if(res != CL_SUCCESS || ret_len < strlen(expected_cl) || strncmp(version, expected_cl, strlen(expected_cl)))
{ {
log_info("Device does not support '%s'. Skipping the test.\n", expected_cl); log_info("Device does not support '%s'. Skipping the test.\n", expected_cl);
return CL_INVALID_DEVICE; return TEST_FAIL;
} }
version[0] = 0; version[0] = 0;
@@ -87,10 +87,10 @@ int deviceCheck(cl_device_id device)
if(res != CL_SUCCESS || ret_len < strlen(expected_clc) || strncmp(version, expected_clc, strlen(expected_clc))) if(res != CL_SUCCESS || ret_len < strlen(expected_clc) || strncmp(version, expected_clc, strlen(expected_clc)))
{ {
log_info("Device does not support '%s'. Skipping the test.\n", expected_clc); log_info("Device does not support '%s'. Skipping the test.\n", expected_clc);
return CL_INVALID_DEVICE; return TEST_FAIL;
} }
return CL_SUCCESS; return TEST_PASS;
} }
int int

View File

@@ -203,7 +203,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command
return -1; return -1;
/* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */ /* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */
for( i = 0; i < TEST_SIZE * vecsize; i++ ) for( i = 0; i < size * vecsize; i++ )
{ {
inDataA[ i ] = get_random_double( -512.f, 512.f, d ); inDataA[ i ] = get_random_double( -512.f, 512.f, d );
inDataB[ i ] = get_random_double( -512.f, 512.f, d ); inDataB[ i ] = get_random_double( -512.f, 512.f, d );
@@ -237,7 +237,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command
} }
/* Run the kernel */ /* Run the kernel */
threads[0] = TEST_SIZE; threads[0] = size;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" ); test_error( error, "Unable to get work group size to use" );

View File

@@ -35,6 +35,7 @@ typedef struct image_kernel_data
}; };
static const char *methodTestKernelPattern = static const char *methodTestKernelPattern =
"%s"
"typedef struct {\n" "typedef struct {\n"
" int width;\n" " int width;\n"
" int height;\n" " int height;\n"
@@ -75,6 +76,8 @@ static const char *channelOrderConstLine =
" outData->expectedChannelOrder = CLK_%s;\n"; " outData->expectedChannelOrder = CLK_%s;\n";
static const char *numSamplesKernelLine = static const char *numSamplesKernelLine =
" outData->numSamples = get_image_num_samples( input );\n"; " outData->numSamples = get_image_num_samples( input );\n";
static const char *enableMSAAKernelLine =
"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n";
static int verify(cl_int input, cl_int kernelOutput, const char * description) static int verify(cl_int input, cl_int kernelOutput, const char * description)
{ {
@@ -185,6 +188,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
bool doImageChannelOrder = false; bool doImageChannelOrder = false;
bool doImageDim = false; bool doImageDim = false;
bool doNumSamples = false; bool doNumSamples = false;
bool doMSAA = false;
switch(target) { switch(target) {
case GL_TEXTURE_2D: case GL_TEXTURE_2D:
imageType = "image2d_depth_t"; imageType = "image2d_depth_t";
@@ -206,6 +210,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
break; break;
case GL_TEXTURE_2D_MULTISAMPLE: case GL_TEXTURE_2D_MULTISAMPLE:
doNumSamples = true; doNumSamples = true;
doMSAA = true;
if(format.formattype == GL_DEPTH_COMPONENT) { if(format.formattype == GL_DEPTH_COMPONENT) {
doImageWidth = true; doImageWidth = true;
imageType = "image2d_msaa_depth_t"; imageType = "image2d_msaa_depth_t";
@@ -214,6 +219,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
} }
break; break;
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
doMSAA = true;
if(format.formattype == GL_DEPTH_COMPONENT) { if(format.formattype == GL_DEPTH_COMPONENT) {
doImageWidth = true; doImageWidth = true;
imageType = "image2d_msaa_array_depth_t"; imageType = "image2d_msaa_array_depth_t";
@@ -245,7 +251,9 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
} }
// Create a program to run against // Create a program to run against
sprintf( programSrc, methodTestKernelPattern, sprintf(programSrc,
methodTestKernelPattern,
( doMSAA ) ? enableMSAAKernelLine : "",
imageType, imageType,
( doArraySize ) ? arraySizeKernelLine : "", ( doArraySize ) ? arraySizeKernelLine : "",
( doImageWidth ) ? imageWidthKernelLine : "", ( doImageWidth ) ? imageWidthKernelLine : "",
@@ -265,7 +273,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
if (error) if (error)
print_error(error, "clFinish failed.\n"); print_error(error, "clFinish failed.\n");
const char *ptr = programSrc; const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" ); error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
test_error( error, "Unable to create kernel to test against" ); test_error( error, "Unable to create kernel to test against" );
// Create an output buffer // Create an output buffer

View File

@@ -107,6 +107,7 @@ static const char *kernelpattern_image_read_2darray_depth =
"}\n"; "}\n";
static const char *kernelpattern_image_multisample_read_2d = static const char *kernelpattern_image_multisample_read_2d =
"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
"__kernel void sample_test( read_only image2d_msaa_t source, sampler_t sampler, __global %s4 *results )\n" "__kernel void sample_test( read_only image2d_msaa_t source, sampler_t sampler, __global %s4 *results )\n"
"{\n" "{\n"
" int tidX = get_global_id(0);\n" " int tidX = get_global_id(0);\n"
@@ -121,6 +122,7 @@ static const char *kernelpattern_image_multisample_read_2d =
"}\n"; "}\n";
static const char *kernelpattern_image_multisample_read_2d_depth = static const char *kernelpattern_image_multisample_read_2d_depth =
"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
"__kernel void sample_test( read_only image2d_msaa_depth_t source, sampler_t sampler, __global %s *results )\n" "__kernel void sample_test( read_only image2d_msaa_depth_t source, sampler_t sampler, __global %s *results )\n"
"{\n" "{\n"
" int tidX = get_global_id(0);\n" " int tidX = get_global_id(0);\n"
@@ -135,6 +137,7 @@ static const char *kernelpattern_image_multisample_read_2d_depth =
"}\n"; "}\n";
static const char *kernelpattern_image_multisample_read_2darray = static const char *kernelpattern_image_multisample_read_2darray =
"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
"__kernel void sample_test( read_only image2d_array_msaa_t source, sampler_t sampler, __global %s4 *results )\n" "__kernel void sample_test( read_only image2d_array_msaa_t source, sampler_t sampler, __global %s4 *results )\n"
"{\n" "{\n"
" int tidX = get_global_id(0);\n" " int tidX = get_global_id(0);\n"
@@ -151,6 +154,7 @@ static const char *kernelpattern_image_multisample_read_2darray =
"}\n"; "}\n";
static const char *kernelpattern_image_multisample_read_2darray_depth = static const char *kernelpattern_image_multisample_read_2darray_depth =
"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
"__kernel void sample_test( read_only image2d_array_msaa_depth_t source, sampler_t sampler, __global %s *results )\n" "__kernel void sample_test( read_only image2d_array_msaa_depth_t source, sampler_t sampler, __global %s *results )\n"
"{\n" "{\n"
" int tidX = get_global_id(0);\n" " int tidX = get_global_id(0);\n"
@@ -248,8 +252,8 @@ int test_cl_image_read( cl_context context, cl_command_queue queue,
get_kernel_suffix( outFormat ) ); get_kernel_suffix( outFormat ) );
programPtr = kernelSource; programPtr = kernelSource;
if( create_single_kernel_helper( context, &program, &kernel, 1, if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1,
(const char **)&programPtr, "sample_test" ) ) (const char **)&programPtr, "sample_test", "-cl-std=CL2.0" ) )
{ {
return -1; return -1;
} }

View File

@@ -333,8 +333,8 @@ int test_cl_image_write( cl_context context, cl_command_queue queue,
get_explicit_type_name( *outType ), suffix, convert); get_explicit_type_name( *outType ), suffix, convert);
programPtr = kernelSource; programPtr = kernelSource;
if( create_single_kernel_helper( context, &program, &kernel, 1, if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1,
(const char **)&programPtr, "sample_test" ) ) (const char **)&programPtr, "sample_test", "-cl-std=CL2.0" ) )
{ {
return -1; return -1;
} }

View File

@@ -101,16 +101,14 @@ const char *basefn_names[] = {
"images_write_cube", "images_write_cube",
"renderbuffer_read", "renderbuffer_read",
"renderbuffer_write", "renderbuffer_write",
"renderbuffer_getinfo", "renderbuffer_getinfo"
"all"
}; };
const char *basefn_names32[] = { const char *basefn_names32[] = {
"fence_sync", "fence_sync"
"all"
}; };
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *); int num_fns = sizeof(basefn_names) / sizeof(char *);
int num_fns32 = sizeof(basefn_names32) / sizeof(char *); int num_fns32 = sizeof(basefn_names32) / sizeof(char *);
@@ -386,17 +384,17 @@ int main(int argc, const char *argv[])
// Intentional falling through // Intentional falling through
cleanup: cleanup:
// Cleanup EGL
glEnv->terminate_egl_display();
// Always make sure that OpenCL context is released properly when the test exit // Always make sure that OpenCL context is released properly when the test exit
if(sCurrentContext) if(sCurrentContext)
{ {
clReleaseContext( sCurrentContext ); clReleaseContext( sCurrentContext );
sCurrentContext = NULL; sCurrentContext = NULL;
} }
delete glEnv;
// Cleanup EGL
glEnv->terminate_egl_display();
delete glEnv;
return error; return error;
} }

View File

@@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES
../../test_common/harness/kernelHelpers.c ../../test_common/harness/kernelHelpers.c
../../test_common/harness/ThreadPool.c ../../test_common/harness/ThreadPool.c
../../test_common/harness/testHarness.c ../../test_common/harness/testHarness.c
../../test_common/harness/parseParameters.cpp
) )
include(../CMakeCommon.txt) include(../CMakeCommon.txt)

View File

@@ -161,7 +161,7 @@ int Test_roundTrip( cl_device_id deviceID, cl_context context, cl_command_queue
// Figure out how many elements are in a work block // Figure out how many elements are in a work block
size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
size_t blockCount = (size_t)getBufferSize(gDevice) / elementSize; //elementSize is a power of two size_t blockCount = (size_t)gBufferSize / elementSize; //elementSize is a power of two
uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half
size_t stride = blockCount; size_t stride = blockCount;

View File

@@ -454,7 +454,7 @@ int Test_vLoadHalf_private( bool aligned )
// Figure out how many elements are in a work block // Figure out how many elements are in a work block
size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float)); size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
size_t blockCount = getBufferSize(gDevice) / elementSize; // elementSize is power of 2 size_t blockCount = gBufferSize / elementSize; // elementSize is power of 2
uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half
// we handle 64-bit types a bit differently. // we handle 64-bit types a bit differently.
@@ -504,7 +504,7 @@ int Test_vLoadHalf_private( bool aligned )
continue; continue;
} }
*/ */
memset_pattern4( gOut_single, &pattern, getBufferSize(gDevice)); memset_pattern4( gOut_single, &pattern, gBufferSize);
if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) ) if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) )
{ {
vlog_error( "Failure in clWriteArray\n" ); vlog_error( "Failure in clWriteArray\n" );

View File

@@ -1045,7 +1045,7 @@ int Test_vStoreHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const c
size_t stride = blockCount; size_t stride = blockCount;
if (gWimpyMode) if (gWimpyMode)
stride = 0x10000000U; stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
// we handle 64-bit types a bit differently. // we handle 64-bit types a bit differently.
if( lastCase == 0 ) if( lastCase == 0 )
@@ -1654,7 +1654,7 @@ int Test_vStoreaHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const
size_t stride = blockCount; size_t stride = blockCount;
if (gWimpyMode) if (gWimpyMode)
stride = 0x10000000U; stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
// we handle 64-bit types a bit differently. // we handle 64-bit types a bit differently.
if( lastCase == 0 ) if( lastCase == 0 )

View File

@@ -61,8 +61,10 @@ size_t gMaxThreadGroupSize = 0;
size_t gWorkGroupSize = 0; size_t gWorkGroupSize = 0;
int gFailCount = 0; int gFailCount = 0;
bool gWimpyMode = false; bool gWimpyMode = false;
int gWimpyReductionFactor = 512;
int gTestDouble = 0; int gTestDouble = 0;
uint32_t gDeviceIndex = 0; uint32_t gDeviceIndex = 0;
size_t gBufferSize = 0;
#if defined( __APPLE__ ) #if defined( __APPLE__ )
int gReportTimes = 1; int gReportTimes = 1;
@@ -178,17 +180,19 @@ int InitCL( void )
#if defined( __APPLE__ ) #if defined( __APPLE__ )
// FIXME: use clProtectedArray // FIXME: use clProtectedArray
#endif #endif
gBufferSize = getBufferSize(gDevice);
//Allocate buffers //Allocate buffers
gIn_half = malloc( getBufferSize(gDevice)/2 ); gIn_half = malloc( gBufferSize/2 );
gOut_half = malloc( BUFFER_SIZE/2 ); gOut_half = malloc( BUFFER_SIZE/2 );
gOut_half_reference = malloc( BUFFER_SIZE/2 ); gOut_half_reference = malloc( BUFFER_SIZE/2 );
gOut_half_reference_double = malloc( BUFFER_SIZE/2 ); gOut_half_reference_double = malloc( BUFFER_SIZE/2 );
gIn_single = malloc( BUFFER_SIZE ); gIn_single = malloc( BUFFER_SIZE );
gOut_single = malloc( getBufferSize(gDevice) ); gOut_single = malloc( gBufferSize );
gOut_single_reference = malloc( getBufferSize(gDevice) ); gOut_single_reference = malloc( gBufferSize );
gIn_double = malloc( 2*BUFFER_SIZE ); gIn_double = malloc( 2*BUFFER_SIZE );
// gOut_double = malloc( (2*getBufferSize(gDevice)) ); // gOut_double = malloc( (2*gBufferSize) );
// gOut_double_reference = malloc( (2*getBufferSize(gDevice)) ); // gOut_double_reference = malloc( (2*gBufferSize) );
if ( NULL == gIn_half || if ( NULL == gIn_half ||
NULL == gOut_half || NULL == gOut_half ||
@@ -201,7 +205,7 @@ int InitCL( void )
) )
return -3; return -3;
gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(gDevice) / 2, NULL, &error); gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, gBufferSize / 2, NULL, &error);
if( gInBuffer_half == NULL ) if( gInBuffer_half == NULL )
{ {
vlog_error( "clCreateArray failed for input (%d)\n", error ); vlog_error( "clCreateArray failed for input (%d)\n", error );
@@ -229,7 +233,7 @@ int InitCL( void )
return -5; return -5;
} }
gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(gDevice), NULL, &error ); gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, gBufferSize, NULL, &error );
if( gOutBuffer_single == NULL ) if( gOutBuffer_single == NULL )
{ {
vlog_error( "clCreateArray failed for output (%d)\n", error ); vlog_error( "clCreateArray failed for output (%d)\n", error );
@@ -237,7 +241,7 @@ int InitCL( void )
} }
#if 0 #if 0
gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(gDevice)), NULL, &error ); gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*gBufferSize), NULL, &error );
if( gOutBuffer_double == NULL ) if( gOutBuffer_double == NULL )
{ {
vlog_error( "clCreateArray failed for output (%d)\n", error ); vlog_error( "clCreateArray failed for output (%d)\n", error );
@@ -310,6 +314,15 @@ void ReleaseCL(void)
// clReleaseMemObject(gOutBuffer_double); // clReleaseMemObject(gOutBuffer_double);
clReleaseCommandQueue(gQueue); clReleaseCommandQueue(gQueue);
clReleaseContext(gContext); clReleaseContext(gContext);
free(gIn_half);
free(gOut_half);
free(gOut_half_reference);
free(gOut_half_reference_double);
free(gIn_single);
free(gOut_single);
free(gOut_single_reference);
free(gIn_double);
} }
cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) { cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) {
@@ -427,21 +440,30 @@ size_t getBufferSize(cl_device_id device_id)
if(s_initialized == 0 || s_device_id != device_id) if(s_initialized == 0 || s_device_id != device_id)
{ {
cl_ulong result; cl_ulong result, maxGlobalSize;
cl_int err = clGetDeviceInfo (device_id, cl_int err = clGetDeviceInfo (device_id,
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
sizeof(result), (void *)&result, sizeof(result), (void *)&result,
NULL); NULL);
if(err) if(err)
{ {
vlog_error("clGetDeviceInfo() failed\n"); vlog_error("clGetDeviceInfo(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) failed\n");
s_result = 64*1024; s_result = 64*1024;
goto exit; goto exit;
} }
log_info("Const buffer size is %llx (%llu)\n", result, result);
err = clGetDeviceInfo (device_id,
CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(maxGlobalSize), (void *)&maxGlobalSize,
NULL);
if(err)
{
vlog_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed\n");
goto exit;
}
result = result / 2; result = result / 2;
if (result > BUFFER_SIZE) if(maxGlobalSize < result * 10)
result = BUFFER_SIZE; result = result / 10;
log_info("Using const buffer size 0x%lx (%lu)\n", (unsigned long)result, (unsigned long)result);
s_initialized = 1; s_initialized = 1;
s_device_id = device_id; s_device_id = device_id;
s_result = result; s_result = result;

View File

@@ -71,11 +71,13 @@ extern size_t gWorkGroupSize;
extern int gFailCount; extern int gFailCount;
extern int gTestDouble; extern int gTestDouble;
extern int gReportTimes; extern int gReportTimes;
extern size_t gBufferSize;
// gWimpyMode indicates if we run the test in wimpy mode where we limit the // gWimpyMode indicates if we run the test in wimpy mode where we limit the
// size of 32 bit ranges to a much smaller set. This is meant to be used // size of 32 bit ranges to a much smaller set. This is meant to be used
// as a smoke test // as a smoke test
extern bool gWimpyMode; extern bool gWimpyMode;
extern int gWimpyReductionFactor;
uint64_t ReadTime( void ); uint64_t ReadTime( void );
double SubtractTime( uint64_t endTime, uint64_t startTime ); double SubtractTime( uint64_t endTime, uint64_t startTime );

View File

@@ -28,6 +28,7 @@
#include "../../test_common/harness/testHarness.h" #include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/mingw_compat.h" #include "../../test_common/harness/mingw_compat.h"
#include "../../test_common/harness/parseParameters.h"
#if defined (__MINGW32__) #if defined (__MINGW32__)
#include <sys/param.h> #include <sys/param.h>
#endif #endif
@@ -260,7 +261,9 @@ static int ParseArgs( int argc, const char **argv )
case 'w': // Wimpy mode case 'w': // Wimpy mode
gWimpyMode = true; gWimpyMode = true;
break; break;
case '[':
parseWimpyReductionFactor( arg, gWimpyReductionFactor);
break;
default: default:
vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg ); vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
PrintUsage(); PrintUsage();
@@ -302,6 +305,7 @@ static int ParseArgs( int argc, const char **argv )
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" ); vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" ); vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" ); vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
} }
return 0; return 0;
} }
@@ -312,6 +316,7 @@ static void PrintUsage( void )
vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" ); vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
vlog( "\t\t-t\tToggle reporting performance data.\n" ); vlog( "\t\t-t\tToggle reporting performance data.\n" );
vlog( "\t\t-w\tRun in wimpy mode\n" ); vlog( "\t\t-w\tRun in wimpy mode\n" );
vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
vlog( "\t\t-h\tHelp\n" ); vlog( "\t\t-h\tHelp\n" );
for( int i = 0; i < num_fns; i++ ) for( int i = 0; i < num_fns; i++ )
{ {
@@ -334,6 +339,8 @@ static void PrintArch( void )
vlog( "ARCH:\tx86_64\n" ); vlog( "ARCH:\tx86_64\n" );
#elif defined( __arm__ ) #elif defined( __arm__ )
vlog( "ARCH:\tarm\n" ); vlog( "ARCH:\tarm\n" );
#elif defined( __aarch64__ )
vlog( "\tARCH:\taarch64\n" );
#else #else
#error unknown arch #error unknown arch
#endif #endif

View File

@@ -3,6 +3,7 @@ set(HEADERS_SOURCES
../../test_common/harness/errorHelpers.c ../../test_common/harness/errorHelpers.c
../../test_common/harness/kernelHelpers.c ../../test_common/harness/kernelHelpers.c
../../test_common/harness/testHarness.c ../../test_common/harness/testHarness.c
../../test_common/harness/msvc9.c
) )
set_source_files_properties(${HEADERS_SOURCES} PROPERTIES LANGUAGE CXX) set_source_files_properties(${HEADERS_SOURCES} PROPERTIES LANGUAGE CXX)
@@ -24,6 +25,8 @@ set(CL_H_SOURCES
test_cl.h.c test_cl.h.c
) )
set_source_files_properties(${CL_H_SOURCES} PROPERTIES LANGUAGE CXX)
set(CL_H_OUT ${CONFORMANCE_PREFIX}cl_h${CONFORMANCE_SUFFIX}) set(CL_H_OUT ${CONFORMANCE_PREFIX}cl_h${CONFORMANCE_SUFFIX})
add_executable( add_executable(
@@ -41,6 +44,8 @@ set(CL_PLATFORM_H_SOURCES
test_cl_platform.h.c test_cl_platform.h.c
) )
set_source_files_properties(${CL_PLATFORM_H_SOURCES} PROPERTIES LANGUAGE CXX)
set(CL_PLATFORM_H_OUT ${CONFORMANCE_PREFIX}cl_platform_h${CONFORMANCE_SUFFIX}) set(CL_PLATFORM_H_OUT ${CONFORMANCE_PREFIX}cl_platform_h${CONFORMANCE_SUFFIX})
add_executable( add_executable(
@@ -58,6 +63,8 @@ set(CL_GL_H_SOURCES
test_cl_gl.h.c test_cl_gl.h.c
) )
set_source_files_properties(${CL_GL_H_SOURCES} PROPERTIES LANGUAGE CXX)
set(CL_GL_H_OUT ${CONFORMANCE_PREFIX}cl_gl_h${CONFORMANCE_SUFFIX}) set(CL_GL_H_OUT ${CONFORMANCE_PREFIX}cl_gl_h${CONFORMANCE_SUFFIX})
add_executable( add_executable(
@@ -75,6 +82,8 @@ set(OPENCL_H_SOURCES
test_opencl.h.c test_opencl.h.c
) )
set_source_files_properties(${OPENCL_H_SOURCES} PROPERTIES LANGUAGE CXX)
set(OPENCL_H_OUT ${CONFORMANCE_PREFIX}opencl_h${CONFORMANCE_SUFFIX}) set(OPENCL_H_OUT ${CONFORMANCE_PREFIX}opencl_h${CONFORMANCE_SUFFIX})
add_executable( add_executable(
@@ -88,5 +97,4 @@ TARGET_LINK_LIBRARIES(${OPENCL_H_OUT} ${CLConform_LIBRARIES})
######################################################################################## ########################################################################################
# end of file # # end of file #

View File

@@ -152,9 +152,27 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding); imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] ); // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
if ( gDebugTrace ) // image, the result array, plus offset arrays, will fit in the global ram space
log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] ); cl_ulong size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
while (size > maxAllocSize || (size * 3) > memSize) {
if (imageInfo.arraySize == 1) {
// arraySize cannot be 0.
break;
}
imageInfo.arraySize--;
size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
}
while (size > maxAllocSize || (size * 3) > memSize) {
imageInfo.height--;
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
}
log_info( "Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
if ( test_fill_image_2D_array( device, &imageInfo, outputType, seed ) ) if ( test_fill_image_2D_array( device, &imageInfo, outputType, seed ) )
return -1; return -1;
} }

View File

@@ -57,11 +57,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i
return test_image_set( device, CL_MEM_OBJECT_IMAGE3D ); return test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
} }
int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY ); return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
} }
int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY ); return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
} }
@@ -70,16 +70,16 @@ basefn basefn_list[] = {
test_1D, test_1D,
test_2D, test_2D,
test_3D, test_3D,
test_1DArray, test_1Darray,
test_2DArray, test_2Darray,
}; };
const char *basefn_names[] = { const char *basefn_names[] = {
"1D", "1D",
"2D", "2D",
"3D", "3D",
"1DArray", "1Darray",
"2DArray", "2Darray",
}; };
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));

View File

@@ -52,11 +52,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE3D ); return test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
} }
int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY ); return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
} }
int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY ); return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
} }
@@ -65,16 +65,16 @@ basefn basefn_list[] = {
test_1D, test_1D,
test_2D, test_2D,
test_3D, test_3D,
test_1DArray, test_1Darray,
test_2DArray, test_2Darray,
}; };
const char *basefn_names[] = { const char *basefn_names[] = {
"1D", "1D",
"2D", "2D",
"3D", "3D",
"1DArray", "1Darray",
"2DArray", "2Darray",
}; };
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));

View File

@@ -52,11 +52,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE3D ); return test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
} }
int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY ); return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
} }
int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY ); return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
} }
@@ -65,16 +65,16 @@ basefn basefn_list[] = {
test_1D, test_1D,
test_2D, test_2D,
test_3D, test_3D,
test_1DArray, test_1Darray,
test_2DArray, test_2Darray,
}; };
const char *basefn_names[] = { const char *basefn_names[] = {
"1D", "1D",
"2D", "2D",
"3D", "3D",
"1DArray", "1Darray",
"2DArray", "2Darray",
}; };
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));

View File

@@ -65,11 +65,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE3D ); return test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
} }
int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY ); return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
} }
int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY ); return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
} }
@@ -78,16 +78,16 @@ basefn basefn_list[] = {
test_1D, test_1D,
test_2D, test_2D,
test_3D, test_3D,
test_1DArray, test_1Darray,
test_2DArray, test_2Darray,
}; };
const char *basefn_names[] = { const char *basefn_names[] = {
"1D", "1D",
"2D", "2D",
"3D", "3D",
"1DArray", "1Darray",
"2DArray", "2Darray",
}; };
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0]))); ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));

View File

@@ -23,6 +23,10 @@ set(${MODULE_NAME}_SOURCES
../../test_common/harness/ThreadPool.c ../../test_common/harness/ThreadPool.c
../../test_common/harness/mt19937.c ../../test_common/harness/mt19937.c
../../test_common/harness/msvc9.c ../../test_common/harness/msvc9.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/testHarness.c
../../test_common/harness/parseParameters.cpp
) )
@@ -46,26 +50,7 @@ set_source_files_properties(
endif(NOT ANDROID) endif(NOT ANDROID)
set_source_files_properties( set_source_files_properties(
FunctionList.c ${MODULE_NAME}_SOURCES
Sleep.c
binary.c
binaryOperator.c
Utility.c
binary_i.c
binary_two_results_i.c
i_unary.c
macro_binary.c
macro_unary.c
mad.c
main.c
reference_math.c
ternary.c
unary.c
unary_two_results.c
unary_two_results_i.c unary_u.c
../../test_common/harness/rounding_mode.c
../../test_common/harness/ThreadPool.c
../../test_common/harness/msvc9.c
PROPERTIES LANGUAGE CXX) PROPERTIES LANGUAGE CXX)
if(CMAKE_COMPILER_IS_GNUCC) if(CMAKE_COMPILER_IS_GNUCC)

View File

@@ -85,7 +85,7 @@ typedef struct Func
float relaxed_error; float relaxed_error;
int ftz; int ftz;
int relaxed; int relaxed;
const ::vtbl *vtbl; const vtbl *vtbl_ptr;
}Func; }Func;

View File

@@ -26,11 +26,7 @@
#include <stdio.h> #include <stdio.h>
#include "../../test_common/harness/rounding_mode.h" #include "../../test_common/harness/rounding_mode.h"
#include "../../test_common/harness/fpcontrol.h" #include "../../test_common/harness/fpcontrol.h"
#if defined( _WIN32) && defined (_MSC_VER)
#include "../../test_common/harness/testHarness.h" #include "../../test_common/harness/testHarness.h"
#endif
#include "../../test_common/harness/ThreadPool.h" #include "../../test_common/harness/ThreadPool.h"
#define BUFFER_SIZE (1024*1024*2) #define BUFFER_SIZE (1024*1024*2)
@@ -112,7 +108,7 @@ extern "C" {
float Abs_Error( float test, double reference ); float Abs_Error( float test, double reference );
float Ulp_Error( float test, double reference ); float Ulp_Error( float test, double reference );
//float Ulp_Error_Half( float test, double reference ); //float Ulp_Error_Half( float test, double reference );
float Ulp_Error_Double( double test, long double reference ); float Bruteforce_Ulp_Error_Double( double test, long double reference );
#ifdef __cplusplus #ifdef __cplusplus
} //extern "C" } //extern "C"
#endif #endif

View File

@@ -233,6 +233,7 @@ typedef struct TestInfo
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
ThreadInfo *tinfo; // An array of thread specific information for each worker thread ThreadInfo *tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps float ulps; // max_allowed ulps
@@ -268,6 +269,16 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -341,7 +352,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
// Run the kernels // Run the kernels
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
// Accumulate the arithmetic errors // Accumulate the arithmetic errors
for( i = 0; i < test_info.threadCount; i++ ) for( i = 0; i < test_info.threadCount; i++ )
@@ -991,6 +1002,16 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ulps = f->double_ulps; test_info.ulps = f->double_ulps;
test_info.ftz = f->ftz || gForceFTZ; test_info.ftz = f->ftz || gForceFTZ;
@@ -1063,7 +1084,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
// Accumulate the arithmetic errors // Accumulate the arithmetic errors
for( i = 0; i < test_info.threadCount; i++ ) for( i = 0; i < test_info.threadCount; i++ )
@@ -1359,7 +1380,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
cl_double test = ((cl_double*) q)[j]; cl_double test = ((cl_double*) q)[j];
long double correct = func.f_ff( s[j], s2[j] ); long double correct = func.f_ff( s[j], s2[j] );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
int fail = ! (fabsf(err) <= ulps); int fail = ! (fabsf(err) <= ulps);
if( fail && ftz ) if( fail && ftz )
@@ -1399,8 +1420,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
long double correct2 = func.f_ff( 0.0, s2[j] ); long double correct2 = func.f_ff( 0.0, s2[j] );
long double correct3 = func.f_ff( -0.0, s2[j] ); long double correct3 = func.f_ff( -0.0, s2[j] );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;
@@ -1422,10 +1443,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
correct3 = func.f_ff( -0.0, 0.0 ); correct3 = func.f_ff( -0.0, 0.0 );
long double correct4 = func.f_ff( 0.0, -0.0 ); long double correct4 = func.f_ff( 0.0, -0.0 );
long double correct5 = func.f_ff( -0.0, -0.0 ); long double correct5 = func.f_ff( -0.0, -0.0 );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err4 = Ulp_Error_Double( test, correct4 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err5 = Ulp_Error_Double( test, correct5 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) && fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
(!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps))); (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
@@ -1451,8 +1472,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
long double correct2 = func.f_ff( s[j], 0.0 ); long double correct2 = func.f_ff( s[j], 0.0 );
long double correct3 = func.f_ff( s[j], -0.0 ); long double correct3 = func.f_ff( s[j], -0.0 );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;

View File

@@ -207,6 +207,7 @@ typedef struct TestInfo
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
ThreadInfo *tinfo; // An array of thread specific information for each worker thread ThreadInfo *tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps float ulps; // max_allowed ulps
@@ -260,6 +261,16 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
} }
test_info.step = test_info.subBufferSize * test_info.scale; test_info.step = test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -329,7 +340,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
// Accumulate the arithmetic errors // Accumulate the arithmetic errors
for( i = 0; i < test_info.threadCount; i++ ) for( i = 0; i < test_info.threadCount; i++ )
@@ -501,38 +512,32 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount; int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
int indx = (totalSpecialValueCount - 1) / buffer_elements; int indx = (totalSpecialValueCount - 1) / buffer_elements;
if( job_id <= (cl_uint)indx )
{ // test edge cases if( job_id <= (cl_uint)indx ) {
float *fp = (float *)p; // Insert special values
float *fp2 = (float *)p2;
uint32_t x, y; uint32_t x, y;
x = (job_id * buffer_elements) % specialValuesFloatCount; x = (job_id * buffer_elements) % specialValuesFloatCount;
y = (job_id * buffer_elements) / specialValuesFloatCount; y = (job_id * buffer_elements) / specialValuesFloatCount;
for( ; j < buffer_elements; j++ ) for( ; j < buffer_elements; j++ ) {
{ p[j] = ((cl_uint *)specialValuesFloat)[x];
fp[j] = specialValuesFloat[x]; p2[j] = ((cl_uint *)specialValuesFloat)[y];
fp2[j] = specialValuesFloat[y]; ++x;
if( ++x >= specialValuesFloatCount ) if (x >= specialValuesFloatCount) {
{
x = 0; x = 0;
y++; y++;
if (y >= specialValuesFloatCount) if (y >= specialValuesFloatCount)
break; break;
} }
if(gTestFastRelaxed && strcmp(name,"divide") == 0 ) if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
{ cl_uint pj = p[j] & 0x7fffffff;
float fpj = *(float*)&fp[j]; cl_uint p2j = p2[j] & 0x7fffffff;
float fpj2 = *(float*)&fp2[j]; // Replace values outside [2^-62, 2^62] with QNaN
if(fabs(fpj) > 0x5E800000 ) //[2^-62,2^62] if (pj < 0x20800000 || pj > 0x5e800000)
{ p[j] = 0x7fc00000;
fp[j] = NAN; if (p2j < 0x20800000 || p2j > 0x5e800000)
} p2[j] = 0x7fc00000;
if( fabs(fpj2) > 0x5E800000 ) //[2^-62,2^62]
{
fp2[j] = NAN;
}
} }
} }
} }
@@ -543,20 +548,14 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
p[j] = genrand_int32(d); p[j] = genrand_int32(d);
p2[j] = genrand_int32(d); p2[j] = genrand_int32(d);
if(gTestFastRelaxed) if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
{ cl_uint pj = p[j] & 0x7fffffff;
if( strcmp(name,"divide")==0){ cl_uint p2j = p2[j] & 0x7fffffff;
float pj = *(float*)&p[j]; // Replace values outside [2^-62, 2^62] with QNaN
float pj2 = *(float*)&p2[j]; if (pj < 0x20800000 || pj > 0x5e800000)
if(fabs(pj) > 0x5E800000 ) //[2^-62,2^62] p[j] = 0x7fc00000;
{ if (p2j < 0x20800000 || p2j > 0x5e800000)
p[j] = NAN; p2[j] = 0x7fc00000;
}
if( fabs(pj2) > 0x5E800000 ) //[2^-62,2^62]
{
p2[j] = NAN;
}
}
} }
} }
@@ -950,6 +949,16 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ulps = f->double_ulps; test_info.ulps = f->double_ulps;
test_info.ftz = f->ftz || gForceFTZ; test_info.ftz = f->ftz || gForceFTZ;
@@ -1020,7 +1029,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
// Accumulate the arithmetic errors // Accumulate the arithmetic errors
for( i = 0; i < test_info.threadCount; i++ ) for( i = 0; i < test_info.threadCount; i++ )
@@ -1315,7 +1324,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
cl_double test = ((cl_double*) q)[j]; cl_double test = ((cl_double*) q)[j];
long double correct = func.f_ff( s[j], s2[j] ); long double correct = func.f_ff( s[j], s2[j] );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
int fail = ! (fabsf(err) <= ulps); int fail = ! (fabsf(err) <= ulps);
if( fail && ftz ) if( fail && ftz )
@@ -1334,8 +1343,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
long double correct2 = func.f_ff( 0.0, s2[j] ); long double correct2 = func.f_ff( 0.0, s2[j] );
long double correct3 = func.f_ff( -0.0, s2[j] ); long double correct3 = func.f_ff( -0.0, s2[j] );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;
@@ -1357,10 +1366,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
correct3 = func.f_ff( -0.0, 0.0 ); correct3 = func.f_ff( -0.0, 0.0 );
long double correct4 = func.f_ff( 0.0, -0.0 ); long double correct4 = func.f_ff( 0.0, -0.0 );
long double correct5 = func.f_ff( -0.0, -0.0 ); long double correct5 = func.f_ff( -0.0, -0.0 );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err4 = Ulp_Error_Double( test, correct4 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err5 = Ulp_Error_Double( test, correct5 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) && fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
(!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps))); (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
@@ -1386,8 +1395,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
long double correct2 = func.f_ff( s[j], 0.0 ); long double correct2 = func.f_ff( s[j], 0.0 );
long double correct3 = func.f_ff( s[j], -0.0 ); long double correct3 = func.f_ff( s[j], -0.0 );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;

View File

@@ -230,6 +230,7 @@ typedef struct TestInfo
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
ThreadInfo *tinfo; // An array of thread specific information for each worker thread ThreadInfo *tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps float ulps; // max_allowed ulps
@@ -262,6 +263,16 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -330,7 +341,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
} }
// Run the kernels // Run the kernels
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
// Accumulate the arithmetic errors // Accumulate the arithmetic errors
@@ -758,6 +769,16 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ulps = f->double_ulps; test_info.ulps = f->double_ulps;
test_info.ftz = f->ftz || gForceFTZ; test_info.ftz = f->ftz || gForceFTZ;
@@ -831,7 +852,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
// Run the kernels // Run the kernels
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
// Accumulate the arithmetic errors // Accumulate the arithmetic errors
@@ -1128,7 +1149,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
cl_double test = ((cl_double*) q)[j]; cl_double test = ((cl_double*) q)[j];
long double correct = func.f_fi( s[j], s2[j] ); long double correct = func.f_fi( s[j], s2[j] );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
int fail = ! (fabsf(err) <= ulps); int fail = ! (fabsf(err) <= ulps);
if( fail && ftz ) if( fail && ftz )
@@ -1146,8 +1167,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
long double correct2 = func.f_fi( 0.0, s2[j] ); long double correct2 = func.f_fi( 0.0, s2[j] );
long double correct3 = func.f_fi( -0.0, s2[j] ); long double correct3 = func.f_fi( -0.0, s2[j] );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;

View File

@@ -871,7 +871,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
double test = ((double*) q)[j]; double test = ((double*) q)[j];
int correct2 = INT_MIN; int correct2 = INT_MIN;
long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 ); long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
int64_t iErr; int64_t iErr;
// in case of remquo, we only care about the sign and last seven bits of // in case of remquo, we only care about the sign and last seven bits of
@@ -907,8 +907,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
int correct3i, correct4i; int correct3i, correct4i;
long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i ); long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i );
long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i ); long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i );
float err2 = Ulp_Error_Double( test, correct3 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err3 = Ulp_Error_Double( test, correct4 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct4 );
int64_t iErr3 = (long long) q2[j] - (long long) correct3i; int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
int64_t iErr4 = (long long) q2[j] - (long long) correct4i; int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0))); fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
@@ -937,10 +937,10 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i ); correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i );
long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i ); long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i );
long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i ); long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i );
err2 = Ulp_Error_Double( test, correct3 ); err2 = Bruteforce_Ulp_Error_Double( test, correct3 );
err3 = Ulp_Error_Double( test, correct4 ); err3 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err4 = Ulp_Error_Double( test, correct7 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct7 );
float err5 = Ulp_Error_Double( test, correct8 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct8 );
iErr3 = (long long) q2[j] - (long long) correct3i; iErr3 = (long long) q2[j] - (long long) correct3i;
iErr4 = (long long) q2[j] - (long long) correct4i; iErr4 = (long long) q2[j] - (long long) correct4i;
int64_t iErr7 = (long long) q2[j] - (long long) correct7i; int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
@@ -979,8 +979,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
int correct3i, correct4i; int correct3i, correct4i;
long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i ); long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i );
long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i ); long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i );
float err2 = Ulp_Error_Double( test, correct3 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err3 = Ulp_Error_Double( test, correct4 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct4 );
int64_t iErr3 = (long long) q2[j] - (long long) correct3i; int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
int64_t iErr4 = (long long) q2[j] - (long long) correct4i; int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0))); fail = fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));

View File

@@ -222,6 +222,7 @@ typedef struct TestInfo
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
ThreadInfo *tinfo; // An array of thread specific information for each worker thread ThreadInfo *tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values cl_uint scale; // stride between individual test values
int ftz; // non-zero if running in flush to zero mode int ftz; // non-zero if running in flush to zero mode
@@ -249,6 +250,16 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -319,7 +330,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
// Run the kernels // Run the kernels
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
if( error ) if( error )
goto exit; goto exit;
@@ -749,6 +760,16 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ftz = f->ftz || gForceFTZ; test_info.ftz = f->ftz || gForceFTZ;
@@ -820,7 +841,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
if( error ) if( error )
goto exit; goto exit;

View File

@@ -193,6 +193,7 @@ typedef struct TestInfo
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
ThreadInfo *tinfo; // An array of thread specific information for each worker thread ThreadInfo *tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values cl_uint scale; // stride between individual test values
int ftz; // non-zero if running in flush to zero mode int ftz; // non-zero if running in flush to zero mode
@@ -220,6 +221,16 @@ int TestMacro_Int_Float(const Func *f, MTdata d)
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
// cl_kernels aren't thread safe, so we make one for each vector size for every thread // cl_kernels aren't thread safe, so we make one for each vector size for every thread
@@ -279,7 +290,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d)
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
if( error ) if( error )
goto exit; goto exit;
@@ -602,6 +613,16 @@ int TestMacro_Int_Double(const Func *f, MTdata d)
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ftz = f->ftz || gForceFTZ; test_info.ftz = f->ftz || gForceFTZ;
@@ -664,7 +685,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d)
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
if( error ) if( error )
goto exit; goto exit;

View File

@@ -785,7 +785,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
{ {
double test = ((double*) q)[j]; double test = ((double*) q)[j];
long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] ); long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
int fail = ! (fabsf(err) <= f->double_ulps); int fail = ! (fabsf(err) <= f->double_ulps);
if( fail && ftz ) if( fail && ftz )
@@ -803,8 +803,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
{ // look at me, { // look at me,
long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] ); long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] ); long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;
@@ -826,10 +826,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] ); correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] ); long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] ); long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err4 = Ulp_Error_Double( test, correct4 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err5 = Ulp_Error_Double( test, correct5 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
@@ -860,14 +860,14 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f ); long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f ); long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f ); long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
err4 = Ulp_Error_Double( test, correct4 ); err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
err5 = Ulp_Error_Double( test, correct5 ); err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
float err6 = Ulp_Error_Double( test, correct6 ); float err6 = Bruteforce_Ulp_Error_Double( test, correct6 );
float err7 = Ulp_Error_Double( test, correct7 ); float err7 = Bruteforce_Ulp_Error_Double( test, correct7 );
float err8 = Ulp_Error_Double( test, correct8 ); float err8 = Bruteforce_Ulp_Error_Double( test, correct8 );
float err9 = Ulp_Error_Double( test, correct9 ); float err9 = Bruteforce_Ulp_Error_Double( test, correct9 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
(!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
@@ -907,10 +907,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 ); correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 ); long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 );
long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 ); long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err4 = Ulp_Error_Double( test, correct4 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err5 = Ulp_Error_Double( test, correct5 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
@@ -936,8 +936,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
{ {
long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] ); long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] ); long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;
@@ -959,10 +959,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 ); correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 ); long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 ); long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err4 = Ulp_Error_Double( test, correct4 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err5 = Ulp_Error_Double( test, correct5 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
@@ -988,8 +988,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
{ {
long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 ); long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 ); long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;

File diff suppressed because it is too large Load Diff

View File

@@ -1790,7 +1790,7 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
// *signgamp = 1; // *signgamp = 1;
ix = hx&0x7fffffff; ix = hx&0x7fffffff;
if(ix>=0x7ff00000) return x*x; if(ix>=0x7ff00000) return x*x;
if((ix|lx)==0) return one/zero; if((ix|lx)==0) return INFINITY;
if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */ if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */
if(hx<0) { if(hx<0) {
// *signgamp = -1; // *signgamp = -1;
@@ -1799,9 +1799,9 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
} }
if(hx<0) { if(hx<0) {
if(ix>=0x43300000) /* |x|>=2**52, must be -integer */ if(ix>=0x43300000) /* |x|>=2**52, must be -integer */
return one/zero; return INFINITY;
t = reference_sinpi(x); t = reference_sinpi(x);
if(t==zero) return one/zero; /* -integer */ if(t==zero) return INFINITY; /* -integer */
nadj = reference_log(pi/reference_fabs(t*x)); nadj = reference_log(pi/reference_fabs(t*x));
// if(t<zero) *signgamp = -1; // if(t<zero) *signgamp = -1;
x = -x; x = -x;

View File

View File

@@ -1010,7 +1010,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
{ {
double test = ((double*) q)[j]; double test = ((double*) q)[j];
long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] ); long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
int fail = ! (fabsf(err) <= f->double_ulps); int fail = ! (fabsf(err) <= f->double_ulps);
if( fail && ftz ) if( fail && ftz )
@@ -1028,8 +1028,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
{ // look at me, { // look at me,
long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] ); long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] ); long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;
@@ -1051,10 +1051,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] ); correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] ); long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] ); long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err4 = Ulp_Error_Double( test, correct4 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err5 = Ulp_Error_Double( test, correct5 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
@@ -1085,14 +1085,14 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f ); long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f ); long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f ); long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
err4 = Ulp_Error_Double( test, correct4 ); err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
err5 = Ulp_Error_Double( test, correct5 ); err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
float err6 = Ulp_Error_Double( test, correct6 ); float err6 = Bruteforce_Ulp_Error_Double( test, correct6 );
float err7 = Ulp_Error_Double( test, correct7 ); float err7 = Bruteforce_Ulp_Error_Double( test, correct7 );
float err8 = Ulp_Error_Double( test, correct8 ); float err8 = Bruteforce_Ulp_Error_Double( test, correct8 );
float err9 = Ulp_Error_Double( test, correct9 ); float err9 = Bruteforce_Ulp_Error_Double( test, correct9 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
(!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
@@ -1132,10 +1132,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 ); correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 ); long double correct4 = f->dfunc.f_fff( 0.0, s2[j], -0.0 );
long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 ); long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err4 = Ulp_Error_Double( test, correct4 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err5 = Ulp_Error_Double( test, correct5 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
@@ -1161,8 +1161,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
{ {
long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] ); long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] ); long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;
@@ -1184,10 +1184,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 ); correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 ); long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 ); long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
err2 = Ulp_Error_Double( test, correct2 ); err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
err3 = Ulp_Error_Double( test, correct3 ); err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err4 = Ulp_Error_Double( test, correct4 ); float err4 = Bruteforce_Ulp_Error_Double( test, correct4 );
float err5 = Ulp_Error_Double( test, correct5 ); float err5 = Bruteforce_Ulp_Error_Double( test, correct5 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) && fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
(!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps))); (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
@@ -1213,8 +1213,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
{ {
long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 ); long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 ); long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps))); fail = fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;

View File

@@ -200,6 +200,7 @@ typedef struct TestInfo
cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id] cl_kernel *k[VECTOR_SIZE_COUNT ]; // arrays of thread-specific kernels for each worker thread: k[vector_size][thread_id]
ThreadInfo *tinfo; // An array of thread specific information for each worker thread ThreadInfo *tinfo; // An array of thread specific information for each worker thread
cl_uint threadCount; // Number of worker threads cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next. cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values cl_uint scale; // stride between individual test values
float ulps; // max_allowed ulps float ulps; // max_allowed ulps
@@ -234,6 +235,16 @@ int TestFunc_Float_Float(const Func *f, MTdata d)
test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor; test_info.scale = (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -309,7 +320,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d)
if( !gSkipCorrectnessTesting || skipTestingRelaxed) if( !gSkipCorrectnessTesting || skipTestingRelaxed)
{ {
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
// Accumulate the arithmetic errors // Accumulate the arithmetic errors
for( i = 0; i < test_info.threadCount; i++ ) for( i = 0; i < test_info.threadCount; i++ )
@@ -892,7 +903,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
cl_double test = ((cl_double*) q)[j]; cl_double test = ((cl_double*) q)[j];
long double correct = func.f_f( s[j] ); long double correct = func.f_f( s[j] );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
int fail = ! (fabsf(err) <= ulps); int fail = ! (fabsf(err) <= ulps);
if( fail ) if( fail )
@@ -912,8 +923,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
{ {
long double correct2 = func.f_f( 0.0L ); long double correct2 = func.f_f( 0.0L );
long double correct3 = func.f_f( -0.0L ); long double correct3 = func.f_f( -0.0L );
float err2 = Ulp_Error_Double( test, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct2 );
float err3 = Ulp_Error_Double( test, correct3 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct3 );
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps))); fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
if( fabsf( err2 ) < fabsf(err ) ) if( fabsf( err2 ) < fabsf(err ) )
err = err2; err = err2;
@@ -998,6 +1009,15 @@ int TestFunc_Double_Double(const Func *f, MTdata d)
test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor; test_info.scale = (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
} }
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale; test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
if (test_info.step / test_info.subBufferSize != test_info.scale)
{
//there was overflow
test_info.jobCount = 1;
}
else
{
test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
}
test_info.f = f; test_info.f = f;
test_info.ulps = f->double_ulps; test_info.ulps = f->double_ulps;
@@ -1062,7 +1082,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d)
if( !gSkipCorrectnessTesting ) if( !gSkipCorrectnessTesting )
{ {
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info ); error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
// Accumulate the arithmetic errors // Accumulate the arithmetic errors
for( i = 0; i < test_info.threadCount; i++ ) for( i = 0; i < test_info.threadCount; i++ )

View File

@@ -800,8 +800,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d)
double test2 = ((double*) q2)[j]; double test2 = ((double*) q2)[j];
long double correct2; long double correct2;
long double correct = f->dfunc.f_fpf( s[j], &correct2 ); long double correct = f->dfunc.f_fpf( s[j], &correct2 );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
float err2 = Ulp_Error_Double( test2, correct2 ); float err2 = Bruteforce_Ulp_Error_Double( test2, correct2 );
int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps); int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps);
if( ftz ) if( ftz )
{ {
@@ -837,10 +837,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d)
long double correct2p, correct2n; long double correct2p, correct2n;
long double correctp = f->dfunc.f_fpf( 0.0, &correct2p ); long double correctp = f->dfunc.f_fpf( 0.0, &correct2p );
long double correctn = f->dfunc.f_fpf( -0.0, &correct2n ); long double correctn = f->dfunc.f_fpf( -0.0, &correct2n );
float errp = Ulp_Error_Double( test, correctp ); float errp = Bruteforce_Ulp_Error_Double( test, correctp );
float err2p = Ulp_Error_Double( test, correct2p ); float err2p = Bruteforce_Ulp_Error_Double( test, correct2p );
float errn = Ulp_Error_Double( test, correctn ); float errn = Bruteforce_Ulp_Error_Double( test, correctn );
float err2n = Ulp_Error_Double( test, correct2n ); float err2n = Bruteforce_Ulp_Error_Double( test, correct2n );
fail = fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps)) && fail = fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps)) &&
((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) ); ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) );
if( fabsf( errp ) < fabsf(err ) ) if( fabsf( errp ) < fabsf(err ) )

View File

@@ -633,7 +633,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d)
double test = ((double*) q)[j]; double test = ((double*) q)[j];
int correct2 = INT_MIN; int correct2 = INT_MIN;
long double correct = f->dfunc.f_fpI( s[j], &correct2 ); long double correct = f->dfunc.f_fpI( s[j], &correct2 );
float err = Ulp_Error_Double( test, correct ); float err = Bruteforce_Ulp_Error_Double( test, correct );
cl_long iErr = (long long) q2[j] - (long long) correct2; cl_long iErr = (long long) q2[j] - (long long) correct2;
int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError ); int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError );
if( ftz ) if( ftz )
@@ -652,8 +652,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d)
int correct5, correct6; int correct5, correct6;
long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 ); long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 );
long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 ); long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 );
float err2 = Ulp_Error_Double( test, correct3 ); float err2 = Bruteforce_Ulp_Error_Double( test, correct3 );
float err3 = Ulp_Error_Double( test, correct4 ); float err3 = Bruteforce_Ulp_Error_Double( test, correct4 );
cl_long iErr2 = (long long) q2[j] - (long long) correct5; cl_long iErr2 = (long long) q2[j] - (long long) correct5;
cl_long iErr3 = (long long) q2[j] - (long long) correct6; cl_long iErr3 = (long long) q2[j] - (long long) correct6;

View File

@@ -567,7 +567,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d)
{ {
double test = ((double*) q)[j]; double test = ((double*) q)[j];
long double correct = f->dfunc.f_u( s[j] ); long double correct = f->dfunc.f_u( s[j] );
float err = Ulp_Error_Double(test, correct); float err = Bruteforce_Ulp_Error_Double(test, correct);
int fail = ! (fabsf(err) <= f->double_ulps); int fail = ! (fabsf(err) <= f->double_ulps);
// half_sin/cos/tan are only valid between +-2**16, Inf, NaN // half_sin/cos/tan are only valid between +-2**16, Inf, NaN

View File

@@ -613,6 +613,28 @@ void TestNonUniformWorkGroup::showTestInfo () {
} }
} }
size_t TestNonUniformWorkGroup::adjustLocalArraySize (size_t localArraySize) {
// In case if localArraySize is too big, sometimes we can not run kernel because of lack
// of resources due to kernel itself requires some local memory to run
int err;
cl_ulong kernelLocalMemSize = 0;
err = clGetKernelWorkGroupInfo(_testKernel, _device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalMemSize), &kernelLocalMemSize, NULL);
test_error(err, "clGetKernelWorkGroupInfo failed");
cl_ulong deviceLocalMemSize = 0;
err = clGetDeviceInfo(_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMemSize), &deviceLocalMemSize, NULL);
test_error(err, "clGetDeviceInfo failed");
if (kernelLocalMemSize + localArraySize > deviceLocalMemSize) {
size_t adjustedLocalArraySize = deviceLocalMemSize - kernelLocalMemSize;
log_info("localArraySize was adjusted from %lu to %lu\n", localArraySize, adjustedLocalArraySize);
localArraySize = adjustedLocalArraySize;
}
return localArraySize;
}
int TestNonUniformWorkGroup::runKernel () { int TestNonUniformWorkGroup::runKernel () {
int err; int err;
@@ -630,7 +652,8 @@ int TestNonUniformWorkGroup::runKernel () {
test_error(err, "clSetKernelArg failed"); test_error(err, "clSetKernelArg failed");
//creating local buffer //creating local buffer
err = clSetKernelArg(_testKernel, 1, localArraySize*sizeof(unsigned int), NULL); localArraySize = adjustLocalArraySize(localArraySize*sizeof(unsigned int));
err = clSetKernelArg(_testKernel, 1, localArraySize, NULL);
test_error(err, "clSetKernelArg failed"); test_error(err, "clSetKernelArg failed");
clMemWrapper testGlobalArray = clCreateBuffer(_context, CL_MEM_READ_WRITE, _numOfGlobalWorkItems*sizeof(cl_uint), NULL, &err); clMemWrapper testGlobalArray = clCreateBuffer(_context, CL_MEM_READ_WRITE, _numOfGlobalWorkItems*sizeof(cl_uint), NULL, &err);

View File

@@ -116,6 +116,7 @@ private:
void verifyData (DataContainerAttrib * reference, DataContainerAttrib * results, short regionNumber); void verifyData (DataContainerAttrib * reference, DataContainerAttrib * results, short regionNumber);
void calculateExpectedValues (); void calculateExpectedValues ();
void showTestInfo (); void showTestInfo ();
size_t adjustLocalArraySize(size_t localArraySize);
}; };
// Class responsible for running subtest scenarios in test function // Class responsible for running subtest scenarios in test function

View File

@@ -17,6 +17,7 @@
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
#include <memory>
#if ! defined( _WIN32) #if ! defined( _WIN32)
#if ! defined( __ANDROID__ ) #if ! defined( __ANDROID__ )
@@ -356,9 +357,6 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
//----------------------------------------- //-----------------------------------------
static bool isLongSupported(cl_device_id device_id) static bool isLongSupported(cl_device_id device_id)
{ {
//profile type && device extention for long support checking
char *profileType = NULL,*devExt = NULL;
size_t tempSize = 0; size_t tempSize = 0;
cl_int status; cl_int status;
bool extSupport = true; bool extSupport = true;
@@ -377,7 +375,7 @@ static bool isLongSupported(cl_device_id device_id)
return false; return false;
} }
profileType = new char[tempSize]; std::unique_ptr<char[]> profileType(new char[tempSize]);
if(profileType == NULL) if(profileType == NULL)
{ {
log_error("Failed to allocate memory(profileType)"); log_error("Failed to allocate memory(profileType)");
@@ -388,11 +386,11 @@ static bool isLongSupported(cl_device_id device_id)
device_id, device_id,
CL_DEVICE_PROFILE, CL_DEVICE_PROFILE,
sizeof(char) * tempSize, sizeof(char) * tempSize,
profileType, profileType.get(),
NULL); NULL);
if(!strcmp("EMBEDDED_PROFILE",profileType)) if(!strcmp("EMBEDDED_PROFILE",profileType.get()))
{ {
// Device extention // Device extention
status = clGetDeviceInfo( status = clGetDeviceInfo(
@@ -408,7 +406,7 @@ static bool isLongSupported(cl_device_id device_id)
return false; return false;
} }
devExt = new char[tempSize]; std::unique_ptr<char[]> devExt(new char[tempSize]);
if(devExt == NULL) if(devExt == NULL)
{ {
log_error("Failed to allocate memory(devExt)"); log_error("Failed to allocate memory(devExt)");
@@ -419,13 +417,10 @@ static bool isLongSupported(cl_device_id device_id)
device_id, device_id,
CL_DEVICE_EXTENSIONS, CL_DEVICE_EXTENSIONS,
sizeof(char) * tempSize, sizeof(char) * tempSize,
devExt, devExt.get(),
NULL); NULL);
extSupport = (strstr(devExt,"cles_khr_int64") != NULL); extSupport = (strstr(devExt.get(),"cles_khr_int64") != NULL);
delete devExt;
delete profileType;
} }
return extSupport; return extSupport;
} }
@@ -501,7 +496,8 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
int err; int err;
cl_program program; cl_program program;
cl_kernel kernel; cl_kernel kernel;
cl_mem d_out; cl_mem d_out = NULL;
cl_mem d_a = NULL;
char _analysisBuffer[ANALYSIS_BUFFER_SIZE]; char _analysisBuffer[ANALYSIS_BUFFER_SIZE];
cl_uint out32 = 0; cl_uint out32 = 0;
cl_ulong out64 = 0; cl_ulong out64 = 0;
@@ -523,7 +519,7 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
if(isKernelArgument(allTestCase[testId],testNum)) if(isKernelArgument(allTestCase[testId],testNum))
{ {
int a = 2; int a = 2;
cl_mem d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
sizeof(int), &a, &err); sizeof(int), &a, &err);
if(err!= CL_SUCCESS || d_a == NULL) { if(err!= CL_SUCCESS || d_a == NULL) {
log_error("clCreateBuffer failed\n"); log_error("clCreateBuffer failed\n");
@@ -613,6 +609,10 @@ exit:
log_error("clReleaseKernel failed\n"); log_error("clReleaseKernel failed\n");
if(clReleaseProgram(program) != CL_SUCCESS) if(clReleaseProgram(program) != CL_SUCCESS)
log_error("clReleaseProgram failed\n"); log_error("clReleaseProgram failed\n");
if(d_out)
clReleaseMemObject(d_out);
if(d_a)
clReleaseMemObject(d_a);
++s_test_cnt; ++s_test_cnt;
@@ -639,6 +639,8 @@ static void printArch( void )
log_info( "ARCH:\tx86_64\n" ); log_info( "ARCH:\tx86_64\n" );
#elif defined( __arm__ ) #elif defined( __arm__ )
log_info( "ARCH:\tarm\n" ); log_info( "ARCH:\tarm\n" );
#elif defined( __aarch64__ )
log_info( "ARCH:\taarch64\n" );
#else #else
#error unknown arch #error unknown arch
#endif #endif
@@ -757,7 +759,6 @@ int test_float_15(cl_device_id deviceID, cl_context context, cl_command_queue qu
{ {
return doTest(gQueue, gContext, TYPE_FLOAT, 15, gDevice); return doTest(gQueue, gContext, TYPE_FLOAT, 15, gDevice);
} }
#if ! defined( __ANDROID__ )
int test_float_16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_float_16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return doTest(gQueue, gContext, TYPE_FLOAT, 16, gDevice); return doTest(gQueue, gContext, TYPE_FLOAT, 16, gDevice);
@@ -766,7 +767,6 @@ int test_float_17(cl_device_id deviceID, cl_context context, cl_command_queue qu
{ {
return doTest(gQueue, gContext, TYPE_FLOAT, 17, gDevice); return doTest(gQueue, gContext, TYPE_FLOAT, 17, gDevice);
} }
#endif
int test_float_18(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_float_18(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return doTest(gQueue, gContext, TYPE_FLOAT, 18, gDevice); return doTest(gQueue, gContext, TYPE_FLOAT, 18, gDevice);
@@ -857,10 +857,6 @@ int test_string_2(cl_device_id deviceID, cl_context context, cl_command_queue qu
{ {
return doTest(gQueue, gContext, TYPE_STRING, 2, gDevice); return doTest(gQueue, gContext, TYPE_STRING, 2, gDevice);
} }
int test_string_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(gQueue, gContext, TYPE_STRING, 3, gDevice);
}
int test_vector_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_vector_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
@@ -933,10 +929,8 @@ basefn basefn_list[] = {
test_float_13, test_float_13,
test_float_14, test_float_14,
test_float_15, test_float_15,
#if ! defined( __ANDROID__ )
test_float_16, test_float_16,
test_float_17, test_float_17,
#endif
test_float_18, test_float_18,
test_float_19, test_float_19,
test_float_20, test_float_20,
@@ -962,7 +956,6 @@ basefn basefn_list[] = {
test_string_0, test_string_0,
test_string_1, test_string_1,
test_string_2, test_string_2,
test_string_3,
test_vector_0, test_vector_0,
test_vector_1, test_vector_1,
@@ -1004,10 +997,8 @@ const char *basefn_names[] = {
"float_13", "float_13",
"float_14", "float_14",
"float_15", "float_15",
#if ! defined( __ANDROID__ )
"float_16", "float_16",
"float_17", "float_17",
#endif
"float_18", "float_18",
"float_19", "float_19",
"float_20", "float_20",
@@ -1033,7 +1024,6 @@ const char *basefn_names[] = {
"string_0", "string_0",
"string_1", "string_1",
"string_2", "string_2",
"string_3",
"vector_0", "vector_0",
"vector_1", "vector_1",

View File

@@ -212,8 +212,6 @@ struct printDataGenParameters printFloatGenParameters[] = {
{"%+#21.15E","789456123.0"}, {"%+#21.15E","789456123.0"},
#if ! defined( __ANDROID__ )
//Double argument representing floating-point,in [-]xh.hhhhpAd style //Double argument representing floating-point,in [-]xh.hhhhpAd style
{"%.6a","0.1"}, {"%.6a","0.1"},
@@ -222,8 +220,6 @@ struct printDataGenParameters printFloatGenParameters[] = {
{"%10.2a","9990.235"}, {"%10.2a","9990.235"},
#endif
//Infinity (1.0/0.0) //Infinity (1.0/0.0)
{"%f","1.0f/0.0f"}, {"%f","1.0f/0.0f"},
@@ -275,14 +271,10 @@ const char* correctBufferFloat[] = {
"+7.894561230000000E+8", "+7.894561230000000E+8",
#if ! defined( __ANDROID__ )
"0x1.99999ap-4", "0x1.99999ap-4",
"0x1.38p+13", "0x1.38p+13",
#endif
"inf", "inf",
"-nan", "-nan",
@@ -622,10 +614,6 @@ struct printDataGenParameters printStringGenParameters[] = {
{"%s","\"%%\""}, {"%s","\"%%\""},
//null string
{"%s","(void*)0"}
}; };
//--------------------------------------------------------- //---------------------------------------------------------
@@ -887,7 +875,7 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
if(!strcmp(pTestCase->_correctBuffer[testId],"inf")) if(!strcmp(pTestCase->_correctBuffer[testId],"inf"))
return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf"); return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf");
if(!strcmp(pTestCase->_correctBuffer[testId],"nan") || !strcmp(pTestCase->_correctBuffer[testId],"-nan")) { if(!strcmp(pTestCase->_correctBuffer[testId],"nan") || !strcmp(pTestCase->_correctBuffer[testId],"-nan")) {
return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)"); return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)")&&strcmp(analysisBuffer,"-nan(ind)");
} }
return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]); return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]);
} }

0
test_conformance/run_conformance.py Normal file → Executable file
View File

View File

@@ -8,6 +8,7 @@ set(${MODULE_NAME}_SOURCES
../../test_common/harness/msvc9.c ../../test_common/harness/msvc9.c
../../test_common/harness/kernelHelpers.c ../../test_common/harness/kernelHelpers.c
../../test_common/harness/errorHelpers.c ../../test_common/harness/errorHelpers.c
../../test_common/harness/parseParameters.cpp
) )
include(../CMakeCommon.txt) include(../CMakeCommon.txt)

View File

@@ -27,12 +27,10 @@
#include <limits.h> #include <limits.h>
#include "test_select.h" #include "test_select.h"
#include "../../test_common/harness/testHarness.h" #include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/kernelHelpers.h" #include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/mt19937.h" #include "../../test_common/harness/mt19937.h"
#include "../../test_common/harness/parseParameters.h"
//----------------------------------------- //-----------------------------------------
@@ -57,6 +55,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont
static int doTest(cl_command_queue queue, cl_context context, static int doTest(cl_command_queue queue, cl_context context,
Type stype, Type cmptype, cl_device_id device); Type stype, Type cmptype, cl_device_id device);
static void printUsage( void );
//----------------------------------------- //-----------------------------------------
// Definitions and initializations // Definitions and initializations
//----------------------------------------- //-----------------------------------------
@@ -71,6 +72,7 @@ static int doTest(cl_command_queue queue, cl_context context,
// range. Otherwise, we test a subset of the range // range. Otherwise, we test a subset of the range
// [-min_short, min_short] // [-min_short, min_short]
static bool s_wimpy_mode = false; static bool s_wimpy_mode = false;
static int s_wimpy_reduction_factor = 256;
// Tests are broken into the major test which is based on the // Tests are broken into the major test which is based on the
// src and cmp type and their corresponding vector types and // src and cmp type and their corresponding vector types and
@@ -344,7 +346,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE;
size_t block_elements = BUFFER_SIZE / type_size[stype]; size_t block_elements = BUFFER_SIZE / type_size[stype];
size_t step = s_wimpy_mode ? 256 : 1; size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1;
cl_ulong cmp_stride = block_elements * step; cl_ulong cmp_stride = block_elements * step;
// It is more efficient to create the tests all at once since we // It is more efficient to create the tests all at once since we
@@ -506,99 +508,152 @@ exit:
return err; return err;
} }
static void printUsage( void ) int test_select_uchar_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
log_info("test_select: [-cghw] [test_name|start_test_num] \n"); return doTest(queue, context, kuchar, kuchar, deviceID);
log_info(" default is to run the full test on the default device\n");
log_info(" -w run in wimpy mode (smoke test)\n");
log_info(" test_name will run only one test of that name\n");
log_info(" start_test_num will start running from that num\n");
} }
int test_select_uchar_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
static void printArch( void )
{ {
log_info( "sizeof( void*) = %d\n", (int) sizeof( void *) ); return doTest(queue, context, kuchar, kchar, deviceID);
#if defined( __APPLE__ )
#if defined( __ppc__ )
log_info( "ARCH:\tppc\n" );
#elif defined( __ppc64__ )
log_info( "ARCH:\tppc64\n" );
#elif defined( __i386__ )
log_info( "ARCH:\ti386\n" );
#elif defined( __x86_64__ )
log_info( "ARCH:\tx86_64\n" );
#elif defined( __arm__ )
log_info( "ARCH:\tarm\n" );
#else
#error unknown arch
#endif
int type = 0;
size_t typeSize = sizeof( type );
sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
log_info( "cpu type:\t%d\n", type );
typeSize = sizeof( type );
sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
log_info( "cpu subtype:\t%d\n", type );
#endif
} }
int test_select_char_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
//-----------------------------------------
// main
//-----------------------------------------
int main(int argc, char* argv[]) {
int i;
cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
cl_platform_id platform_id;
long test_start_num = 0; // start test number
const char* exec_testname = NULL;
cl_device_id device_id;
uint32_t device_frequency = 0;
uint32_t compute_devices = 0;
test_start();
// Maybe we want turn off sleep
// Check the environmental to see if there is device preference
char *device_env = getenv("CL_DEVICE_TYPE");
if (device_env != NULL) {
if( strcmp( device_env, "gpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_GPU" ) == 0 )
device_type = CL_DEVICE_TYPE_GPU;
else if( strcmp( device_env, "cpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_CPU" ) == 0 )
device_type = CL_DEVICE_TYPE_CPU;
else if( strcmp( device_env, "accelerator" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
device_type = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( device_env, "default" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
device_type = CL_DEVICE_TYPE_DEFAULT;
else
{ {
log_error( "Unknown CL_DEVICE_TYPE environment variable: %s.\nAborting...\n", device_env ); return doTest(queue, context, kchar, kuchar, deviceID);
abort();
} }
int test_select_char_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kchar, kchar, deviceID);
}
int test_select_ushort_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kushort, kushort, deviceID);
}
int test_select_ushort_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kushort, kshort, deviceID);
}
int test_select_short_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kshort, kushort, deviceID);
}
int test_select_short_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kshort, kshort, deviceID);
}
int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kuint, kuint, deviceID);
}
int test_select_uint_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kuint, kint, deviceID);
}
int test_select_int_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kint, kuint, deviceID);
}
int test_select_int_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kint, kint, deviceID);
}
int test_select_float_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kfloat, kuint, deviceID);
}
int test_select_float_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kfloat, kint, deviceID);
}
int test_select_ulong_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kulong, kulong, deviceID);
}
int test_select_ulong_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kulong, klong, deviceID);
}
int test_select_long_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, klong, kulong, deviceID);
}
int test_select_long_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, klong, klong, deviceID);
}
int test_select_double_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kdouble, kulong, deviceID);
}
int test_select_double_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kdouble, klong, deviceID);
} }
// Check for the wimpy mode environment variable basefn basefn_list[] = {
if (getenv("CL_WIMPY_MODE")) { test_select_uchar_uchar,
log_info("*** Detected CL_WIMPY_MODE env\n"); test_select_uchar_char,
s_wimpy_mode = 1; test_select_char_uchar,
test_select_char_char,
test_select_ushort_ushort,
test_select_ushort_short,
test_select_short_ushort,
test_select_short_short,
test_select_uint_uint,
test_select_uint_int,
test_select_int_uint,
test_select_int_int,
test_select_float_uint,
test_select_float_int,
test_select_ulong_ulong,
test_select_ulong_long,
test_select_long_ulong,
test_select_long_long,
test_select_double_ulong,
test_select_double_long,
};
const char *basefn_names[] = {
"select_uchar_uchar",
"select_uchar_char",
"select_char_uchar",
"select_char_char",
"select_ushort_ushort",
"select_ushort_short",
"select_short_ushort",
"select_short_short",
"select_uint_uint",
"select_uint_int",
"select_int_uint",
"select_int_int",
"select_float_uint",
"select_float_int",
"select_ulong_ulong",
"select_ulong_long",
"select_long_ulong",
"select_long_long",
"select_double_ulong",
"select_double_long",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, char* argv[])
{
const char ** argList = (const char **)calloc( argc, sizeof( char*) );
if( NULL == argList )
{
log_error( "Failed to allocate memory for argList array.\n" );
return 1;
} }
// Determine if we want to run a particular test or if we want to argList[0] = argv[0];
// start running from a certain point and if we want to run on cpu/gpu size_t argCount = 1;
// usage: test_selects [test_name] [start test num] [run_long]
// default is to run all tests on the gpu and be short for( int i = 1; i < argc; ++i )
// test names are of the form select_[src/dest type]_[cmp_type] {
// In the long test, we run the full range for any type >= 32 bits
// and 32 bits subset for the 64 bit value.
for (i=1; i < argc; ++i) {
const char *arg = argv[i]; const char *arg = argv[i];
if (arg == NULL) if (arg == NULL)
break; break;
@@ -612,79 +667,28 @@ int main(int argc, char* argv[]) {
case 'h': case 'h':
printUsage(); printUsage();
return 0; return 0;
case 'w': // Wimpy mode case 'w':
s_wimpy_mode = true; s_wimpy_mode = true;
break; break;
case '[':
parseWimpyReductionFactor(arg, s_wimpy_reduction_factor);
break;
default: default:
log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg ); break;
printUsage();
return 0;
} }
arg++; arg++;
} }
} }
else { else
char* t = NULL;
long num = strtol(argv[i], &t, 0);
if (t != argv[i])
test_start_num = num;
else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_CPU" ) )
device_type = CL_DEVICE_TYPE_CPU;
else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_GPU" ) )
device_type = CL_DEVICE_TYPE_GPU;
else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_ACCELERATOR" ) )
device_type = CL_DEVICE_TYPE_ACCELERATOR;
else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_DEFAULT" ) )
device_type = CL_DEVICE_TYPE_DEFAULT;
else if( 0 == strcmp( argv[i], "randomize" ) ) {
gRandomSeed = (cl_uint) time( NULL );
log_info("\nRandom seed: %u.\n", gRandomSeed );
} else {
// assume it is a test name that we want to execute
exec_testname = argv[i];
}
}
}
int err;
// Get platform
err = clGetPlatformIDs(1, &platform_id, NULL);
checkErr(err,"clGetPlatformIDs failed");
// Get Device information
err = clGetDeviceIDs(platform_id, device_type, 1, &device_id, 0);
checkErr(err,"clGetComputeDevices");
err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, NULL);
checkErr(err,"clGetComputeConfigInfo 1");
size_t config_size = sizeof( device_frequency );
#if MULTITHREAD
if( (err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, config_size, &compute_devices, NULL )) )
#endif
compute_devices = 1;
config_size = sizeof(device_frequency);
if((err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, config_size, &device_frequency, NULL )))
device_frequency = 1;
//detect whether profile of the device is embedded
char profile[1024] = "";
if( (err = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) ){}
else if( strstr(profile, "EMBEDDED_PROFILE" ) )
{ {
gIsEmbedded = 1; argList[argCount] = arg;
argCount++;
}
} }
if (getenv("CL_WIMPY_MODE")) {
log_info( "\nCompute Device info:\n" ); s_wimpy_mode = true;
log_info( "\tProcessing with %d devices\n", compute_devices ); }
log_info( "\tDevice Frequency: %d MHz\n", device_frequency );
printDeviceHeader( device_id );
printArch();
log_info( "Test binary built %s %s\n", __DATE__, __TIME__ ); log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
if (s_wimpy_mode) { if (s_wimpy_mode) {
@@ -692,97 +696,26 @@ int main(int argc, char* argv[]) {
log_info("*** WARNING: Testing in Wimpy mode! ***\n"); log_info("*** WARNING: Testing in Wimpy mode! ***\n");
log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n"); log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n");
log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n"); log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n");
log_info("*** Wimpy Reduction Factor: %-27u ***\n\n", s_wimpy_reduction_factor);
} }
cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL); int err = runTestHarness( argCount, argList, num_fns, basefn_list, basefn_names, false, false, 0 );
checkNull(context, "clCreateContext");
cl_command_queue queue = clCreateCommandQueueWithProperties(context, device_id, 0, NULL); free( argList );
checkNull(queue, "clCreateCommandQueue");
return err;
if (exec_testname) {
// Parse name
// Skip the first part of the name
bool success = false;
if (strncmp(exec_testname, "select_", 7) == 0) {
int i;
Type src_type = kTypeCount;
Type cmp_type = kTypeCount;
char* sptr = (char *)strchr(exec_testname, '_');
if (sptr) {
for (++sptr, i=0; i < kTypeCount; i++) {
if (strncmp(sptr, type_name[i], strlen(type_name[i])) == 0) {
src_type = (Type)i;
break;
}
}
sptr = strchr(sptr, '_');
if (sptr) {
for (++sptr, i=0; i < kTypeCount; i++) {
if (strncmp(sptr, type_name[i], strlen(type_name[i])) == 0) {
cmp_type = (Type)i;
break;
}
}
}
}
if (src_type != kTypeCount && cmp_type != kTypeCount) {
success = true;
log_info("Testing only select_%s_%s\n",
type_name[src_type], type_name[cmp_type]);
if (doTest(queue, context, src_type, cmp_type, device_id) != 0)
log_error("*** select_%s_%s FAILED ***\n\n",
type_name[src_type], type_name[cmp_type]);
}
}
if (!success) {
log_error("can not find test:%s", exec_testname);
return -1;
}
}
else {
int src_type, j;
int test_num;
test_num = 0;
for (src_type = 0; src_type < kTypeCount; ++src_type) {
for (j = 0; j < 2; ++j) {
Type cmp_type = ctype[src_type][j];
if (++test_num < test_start_num) {
log_info("%d) skipping select_%s_%s\n", test_num,
type_name[src_type], type_name[cmp_type]);
}
else {
log_info("%d) Testing select_%s_%s\n",
test_num, type_name[src_type], type_name[cmp_type]);
if (doTest(queue, context, (Type)src_type, cmp_type, device_id) != 0)
log_error("*** %d) select_%s_%s FAILED ***\n\n", test_num,
type_name[src_type], type_name[cmp_type]);
}
}
}
} }
int error = clFinish(queue); static void printUsage( void )
if (error) { {
log_error("clFinish failed: %d\n", error); log_info("test_select: [-w] <optional: test_names> \n");
log_info("\tdefault is to run the full test on the default device\n");
log_info("\t-w run in wimpy mode (smoke test)\n");
log_info("\t-[2^n] Set wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", s_wimpy_reduction_factor);
log_info("\n");
log_info("Test names:\n");
for( int i = 0; i < num_fns; i++ )
{
log_info( "\t%s\n", basefn_names[i] );
} }
clReleaseContext(context);
clReleaseCommandQueue(queue);
if (s_test_fail == 0) {
if (s_test_cnt > 1)
log_info("PASSED %d of %d tests.\n", s_test_cnt, s_test_cnt);
else
log_info("PASSED test.\n");
} else if (s_test_fail > 0) {
if (s_test_cnt > 1)
log_error("FAILED %d of %d tests.\n", s_test_fail, s_test_cnt);
else
log_error("FAILED test.\n");
}
test_finish();
return s_test_fail;
} }

Binary file not shown.

Binary file not shown.

View File

@@ -346,9 +346,12 @@ bool test_suite(cl_device_id device, cl_uint size_t_width, const char *folder,
{ {
std::cout << "Failed tests:" << std::endl; std::cout << "Failed tests:" << std::endl;
std::for_each(ErrList.begin(), ErrList.end(), printError); std::for_each(ErrList.begin(), ErrList.end(), printError);
}
std::cout << std::endl; std::cout << std::endl;
return tests_passed == number_of_tests; return false;
}
std::cout << std::endl;
return true;
} }
static std::string getTestFolder(const std::string& TS) static std::string getTestFolder(const std::string& TS)
@@ -840,7 +843,6 @@ bool test_basic (cl_device_id device, cl_uint size_t_width, const char *folder)
"test_sizeof.sizeof_uintptr_t", "test_sizeof.sizeof_uintptr_t",
"test_sizeof.sizeof_image2d_t", "test_sizeof.sizeof_image2d_t",
"test_sizeof.sizeof_image3d_t", "test_sizeof.sizeof_image3d_t",
"test_sizeof.sizeof_sampler_t",
"test_sizeof.sizeof_double", "test_sizeof.sizeof_double",
"test_sizeof.sizeof_double2", "test_sizeof.sizeof_double2",
"test_sizeof.sizeof_double4", "test_sizeof.sizeof_double4",

Some files were not shown because too many files have changed in this diff Show More