Initial open source release of OpenCL 2.1 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:48:39 +05:30
parent 6911ba5116
commit c3a61c6bdc
902 changed files with 319106 additions and 0 deletions

View File

@@ -0,0 +1,18 @@
project
: requirements <include>.
<toolset>gcc:<cflags>"-xc++"
<toolset>msvc:<cflags>"/TP"
<warnings-as-errors>off
: usage-requirements <include>.
;
local harness.objs ;
for source in [ glob *.c *.cpp ]
{
harness.objs += [ obj $(source:B).obj : $(source) ] ;
}
alias harness : $(harness.objs)
: <use>/Runtime//OpenCL.lib :
: <library>/Runtime//OpenCL.lib
;

View File

@@ -0,0 +1,41 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = conversions.c \
errorHelpers.c \
genericThread.cpp \
imageHelpers.cpp \
kernelHelpers.c \
mt19937.c \
rounding_mode.c \
testHarness.c \
testHarness.cpp \
ThreadPool.c \
threadTesting.c \
typeWrappers.cpp
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
HEADERS =
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
all: $(OBJECTS)
clean:
rm -f $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,931 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "ThreadPool.h"
#include "errorHelpers.h"
#include "fpcontrol.h"
#include <stdio.h>
#include <stdlib.h>
#if defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 ) // or any other POSIX system
#if defined( _WIN32 )
#include <windows.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#include "mingw_compat.h"
#include <process.h>
#else // !_WIN32
#include <pthread.h>
#include <unistd.h>
#include <sys/errno.h>
#ifdef __linux__
#include <sched.h>
#endif
#endif // !_WIN32
// declarations
#ifdef _WIN32
void ThreadPool_WorkerFunc( void *p );
#else
void *ThreadPool_WorkerFunc( void *p );
#endif
void ThreadPool_Init(void);
void ThreadPool_Exit(void);
#if defined (__MINGW32__)
// Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
CRITICAL_SECTION gAtomicLock;
#elif defined( __GNUC__ ) || defined( _MSC_VER)
#else
pthread_mutex_t gAtomicLock;
#endif
// Atomic add operator with mem barrier. Mem barrier needed to protect state modified by the worker functions.
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
{
#if defined (__MINGW32__)
// No atomics on Mingw32
EnterCriticalSection(&gAtomicLock);
cl_int old = *a;
*a = old + b;
LeaveCriticalSection(&gAtomicLock);
return old;
#elif defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
return __sync_fetch_and_add( a, b );
// do we need __sync_synchronize() here, too? GCC docs are unclear whether __sync_fetch_and_add does a synchronize
#elif defined( _MSC_VER )
return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
#else
#warning Please add a atomic add implementation here, with memory barrier. Fallback code is slow.
if( pthread_mutex_lock(&gAtomicLock) )
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
cl_int old = *a;
*a = old + b;
if( pthread_mutex_unlock(&gAtomicLock) )
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
return old;
#endif
}
#if defined( _WIN32 )
// Uncomment the following line if Windows XP support is not required.
// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
#define _INIT_ONCE INIT_ONCE
#define _PINIT_ONCE PINIT_ONCE
#define _InitOnceExecuteOnce InitOnceExecuteOnce
#else // !HAS_INIT_ONCE_EXECUTE_ONCE
typedef volatile LONG _INIT_ONCE;
typedef _INIT_ONCE *_PINIT_ONCE;
typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
#define _INIT_ONCE_UNINITIALIZED 0
#define _INIT_ONCE_IN_PROGRESS 1
#define _INIT_ONCE_DONE 2
static BOOL _InitOnceExecuteOnce(
_PINIT_ONCE InitOnce,
_PINIT_ONCE_FN InitFn,
PVOID Parameter,
LPVOID *Context
)
{
while ( *InitOnce != _INIT_ONCE_DONE )
{
if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
{
InitFn( InitOnce, Parameter, Context );
*InitOnce = _INIT_ONCE_DONE;
return TRUE;
}
Sleep( 1 );
}
return TRUE;
}
#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
// Uncomment the following line if Windows XP support is not required.
// #define HAS_CONDITION_VARIABLE 1
#if defined(HAS_CONDITION_VARIABLE)
#define _CONDITION_VARIABLE CONDITION_VARIABLE
#define _InitializeConditionVariable InitializeConditionVariable
#define _SleepConditionVariableCS SleepConditionVariableCS
#define _WakeAllConditionVariable WakeAllConditionVariable
#else // !HAS_CONDITION_VARIABLE
typedef struct
{
HANDLE mEvent; // Used to park the thread.
CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
volatile cl_int mWaiters; // Number of threads waiting on this cond var.
volatile cl_int mGeneration; // Wait generation count.
volatile cl_int mReleaseCount; // Number of releases to execute before reseting the event.
} _CONDITION_VARIABLE;
typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
{
cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
InitializeCriticalSection( cond_var->mLock );
cond_var->mWaiters = 0;
cond_var->mGeneration = 0;
#if !defined ( NDEBUG )
cond_var->mReleaseCount = 0;
#endif // !NDEBUG
}
static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
{
EnterCriticalSection( cond_var->mLock );
cl_int generation = cond_var->mGeneration;
++cond_var->mWaiters;
LeaveCriticalSection( cond_var->mLock );
LeaveCriticalSection( cond_lock );
while ( TRUE )
{
WaitForSingleObject( cond_var->mEvent, INFINITE );
EnterCriticalSection( cond_var->mLock );
BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
LeaveCriticalSection( cond_var->mLock );
if ( done )
{
break;
}
}
EnterCriticalSection( cond_lock );
EnterCriticalSection( cond_var->mLock );
if ( --cond_var->mReleaseCount == 0 )
{
ResetEvent( cond_var->mEvent );
}
--cond_var->mWaiters;
LeaveCriticalSection( cond_var->mLock );
}
static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
{
EnterCriticalSection( cond_var->mLock );
if (cond_var->mWaiters > 0 )
{
++cond_var->mGeneration;
cond_var->mReleaseCount = cond_var->mWaiters;
SetEvent( cond_var->mEvent );
}
LeaveCriticalSection( cond_var->mLock );
}
#endif // !HAS_CONDITION_VARIABLE
#endif // _WIN32
#define MAX_COUNT (1<<29)
// Global state to coordinate whether the threads have been launched successfully or not
#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
static _INIT_ONCE threadpool_init_control;
#elif defined (_WIN32) // MingW of XP
static int threadpool_init_control;
#else // Posix platforms
pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
#endif
cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
// critical region lock around ThreadPool_Do. We can only run one ThreadPool_Do at a time,
// because we are too lazy to set up a queue here, and don't expect to need one.
#if defined( _WIN32 )
CRITICAL_SECTION gThreadPoolLock[1];
#else // !_WIN32
pthread_mutex_t gThreadPoolLock;
#endif // !_WIN32
// Condition variable to park ThreadPool threads when not working
#if defined( _WIN32 )
CRITICAL_SECTION cond_lock[1];
_CONDITION_VARIABLE cond_var[1];
#else // !_WIN32
pthread_mutex_t cond_lock;
pthread_cond_t cond_var;
#endif // !_WIN32
volatile cl_int gRunCount = 0; // Condition variable state. How many iterations on the function left to run.
// set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
// State that only changes when the threadpool is not working.
volatile TPFuncPtr gFunc_ptr = NULL;
volatile void *gUserInfo = NULL;
volatile cl_int gJobCount = 0;
// State that may change while the thread pool is working
volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
// Condition variable to park caller while waiting
#if defined( _WIN32 )
HANDLE caller_event;
#else // !_WIN32
pthread_mutex_t caller_cond_lock;
pthread_cond_t caller_cond_var;
#endif // !_WIN32
volatile cl_int gRunning = 0; // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
// The total number of threads launched.
volatile cl_int gThreadCount = 0;
#ifdef _WIN32
void ThreadPool_WorkerFunc( void *p )
#else
void *ThreadPool_WorkerFunc( void *p )
#endif
{
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
while( MAX_COUNT > item )
{
cl_int err;
// check for more work to do
if( 0 >= item )
{
// log_info( "Thread %d has run out of work.\n", threadID );
// No work to do. Attempt to block waiting for work
#if defined( _WIN32 )
EnterCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_lock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
goto exit;
}
#endif // !_WIN32
cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
// log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
if( 1 == remaining )
{ // last thread out signal the main thread to wake up
#if defined( _WIN32 )
SetEvent( caller_event );
#else // !_WIN32
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
goto exit;
}
if( (err = pthread_cond_broadcast( &caller_cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
goto exit;
}
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
goto exit;
}
#endif // !_WIN32
}
// loop in case we are woken only to discover that some other thread already did all the work
while( 0 >= item )
{
#if defined( _WIN32 )
_SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
pthread_mutex_unlock( &cond_lock);
goto exit;
}
#endif // !_WIN32
// try again to get a valid item id
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
if( MAX_COUNT <= item ) // exit if we are done
{
#if defined( _WIN32 )
LeaveCriticalSection( cond_lock );
#else // !_WIN32
pthread_mutex_unlock( &cond_lock);
#endif // !_WIN32
goto exit;
}
}
ThreadPool_AtomicAdd( &gRunning, 1 );
// log_info( "Thread %d has found work.\n", threadID);
#if defined( _WIN32 )
LeaveCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_unlock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
goto exit;
}
#endif // !_WIN32
}
// we have a valid item, so do the work
if( CL_SUCCESS == jobError ) // but only if we haven't already encountered an error
{
// log_info( "Thread %d doing job %d\n", threadID, item - 1);
#if defined(__APPLE__) && defined(__arm__)
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
// Call the user's function with this item ID
err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state
RestoreFPState( &oldMode );
#endif
if( err )
{
#if (__MINGW32__)
EnterCriticalSection(&gAtomicLock);
if( jobError == CL_SUCCESS );
jobError = err;
gRunCount = 0;
LeaveCriticalSection(&gAtomicLock);
#elif defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
// set the new error if we are the first one there.
__sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
// drop run count to 0
gRunCount = 0;
__sync_synchronize();
#elif defined( _MSC_VER )
// set the new error if we are the first one there.
_InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
// drop run count to 0
gRunCount = 0;
_mm_mfence();
#else
if( pthread_mutex_lock(&gAtomicLock) )
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
if( jobError == CL_SUCCESS );
jobError = err;
gRunCount = 0;
if( pthread_mutex_unlock(&gAtomicLock) )
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
#endif
}
}
// get the next item
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
}
exit:
log_info( "ThreadPool: thread %d exiting.\n", threadID );
ThreadPool_AtomicAdd( &gThreadCount, -1 );
#if !defined(_WIN32)
return NULL;
#endif
}
// SetThreadCount() may be used to artifically set the number of worker threads
// If the value is 0 (the default) the number of threads will be determined based on
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
// that we still get some testing for thread safety.
//
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
// code will run single threaded, but will report an error to indicate that the test
// is invalid. This option is intended for debugging purposes only. It is suggested
// as a convention that test apps set the thread count to 1 in response to the -m flag.
//
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
// otherwise the behavior is indefined.
void SetThreadCount( int count )
{
if( threadPoolInitErr == CL_SUCCESS )
{
log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
abort();
}
gThreadCount = count;
}
void ThreadPool_Init(void)
{
cl_int i;
int err;
volatile cl_uint threadID = 0;
// Check for manual override of multithreading code. We add this for better debuggability.
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
{
gThreadCount = 1;
return;
}
// Figure out how many threads to run -- check first for non-zero to give the implementation the chance
if( 0 == gThreadCount )
{
#if defined(_MSC_VER) || defined (__MINGW64__)
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
DWORD length = 0;
GetLogicalProcessorInformation( NULL, &length );
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
if( buffer != NULL )
{
if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
{
if( ptr->Relationship == RelationProcessorCore )
{
// Count the number of bits in ProcessorMask (number of logical cores)
ULONG mask = ptr->ProcessorMask;
while( mask )
{
++gThreadCount;
mask &= mask - 1; // Remove 1 bit at a time
}
}
++ptr;
}
}
free(buffer);
}
#elif defined (__MINGW32__)
{
#warning How about this, instead of hard coding it to 2?
SYSTEM_INFO sysinfo;
GetSystemInfo( &sysinfo );
gThreadCount = sysinfo.dwNumberOfProcessors;
}
#elif defined (__linux__) && !defined(__ANDROID__)
cpu_set_t affinity;
if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
{
#if !(defined(CPU_COUNT))
gThreadCount = 1;
#else
gThreadCount = CPU_COUNT(&affinity);
#endif
}
else
{
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
}
#else // !_WIN32
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
#endif // !_WIN32
// Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
if( 1 == gThreadCount )
gThreadCount = 2;
}
// When working in 32 bit limit the thread number to 12
// This fix was made due to memory issues in integer_ops test
// When running integer_ops, the test opens as many threads as the
// machine has and each thread allocates a fixed amount of memory
// When running this test on dual socket machine in 32-bit, the
// process memory is not sufficient and the test fails
#if defined(_WIN32) && !defined(_M_X64)
if (gThreadCount > 12) {
gThreadCount = 12;
}
#endif
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
if( gThreadCount < 2 )
{
log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
gThreadCount = 1;
return;
}
#if defined( _WIN32 )
InitializeCriticalSection( gThreadPoolLock );
InitializeCriticalSection( cond_lock );
_InitializeConditionVariable( cond_var );
caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
#elif defined (__GNUC__)
// Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
// with some flavors of gcc compilers.
pthread_cond_init(&cond_var, NULL);
pthread_mutex_init(&cond_lock ,NULL);
pthread_cond_init(&caller_cond_var, NULL);
pthread_mutex_init(&caller_cond_lock, NULL);
pthread_mutex_init(&gThreadPoolLock, NULL);
#endif
#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
pthread_mutex_initialize(gAtomicLock);
#elif defined (__MINGW32__)
InitializeCriticalSection(&gAtomicLock);
#endif
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
// That would cause a deadlock.
#if !defined( _WIN32 )
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
gThreadCount = 1;
return;
}
#endif // !_WIN32
gRunning = gThreadCount;
// init threads
for( i = 0; i < gThreadCount; i++ )
{
#if defined( _WIN32 )
uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
err = ( handle == 0 );
#else // !_WIN32
pthread_t tid = 0;
err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
#endif // !_WIN32
if( err )
{
log_error( "Error %d launching thread %d\n", err, i );
threadPoolInitErr = err;
gThreadCount = i;
break;
}
}
atexit( ThreadPool_Exit );
// block until they are done launching.
do
{
#if defined( _WIN32 )
WaitForSingleObject( caller_event, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
pthread_mutex_unlock( &caller_cond_lock);
return;
}
#endif // !_WIN32
}
while( gRunCount != -gThreadCount );
#if !defined( _WIN32 )
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
return;
}
#endif // !_WIN32
threadPoolInitErr = CL_SUCCESS;
}
#if defined(_MSC_VER)
static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
{
ThreadPool_Init();
return TRUE;
}
#endif
void ThreadPool_Exit(void)
{
int err, count;
gRunCount = CL_INT_MAX;
#if defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
__sync_synchronize();
#elif defined( _MSC_VER )
_mm_mfence();
#else
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
#endif
// spin waiting for threads to die
for (count = 0; 0 != gThreadCount && count < 1000; count++)
{
#if defined( _WIN32 )
_WakeAllConditionVariable( cond_var );
Sleep(1);
#else // !_WIN32
if( (err = pthread_cond_broadcast( &cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
break;
}
usleep(1000);
#endif // !_WIN32
}
if( gThreadCount )
log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
else
log_info( "Thread pool exited in a orderly fashion.\n" );
}
// Blocking API that farms out count jobs to a thread pool.
// It may return with some work undone if func_ptr() returns a non-zero
// result.
//
// This function obviously has its shortcommings. Only one call to ThreadPool_Do
// can be running at a time. It is not intended for general purpose use.
// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
// all available then it would make more sense to use those features.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo )
{
cl_int newErr;
cl_int err = 0;
// Lazily set up our threads
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
#elif defined (_WIN32)
if (threadpool_init_control == 0) {
#warning This is buggy and race prone. Find a better way.
ThreadPool_Init();
threadpool_init_control = 1;
}
#else //posix platform
err = pthread_once( &threadpool_init_control, ThreadPool_Init );
if( err )
{
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
return err;
}
#endif
// Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
if( threadPoolInitErr )
{
cl_uint currentJob = 0;
cl_int result = CL_SUCCESS;
#if defined(__APPLE__) && defined(__arm__)
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
for( currentJob = 0; currentJob < count; currentJob++ )
if((result = func_ptr( currentJob, 0, userInfo )))
{
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state before leaving
RestoreFPState( &oldMode );
#endif
return result;
}
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state before leaving
RestoreFPState( &oldMode );
#endif
return CL_SUCCESS;
}
if( count >= MAX_COUNT )
{
log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
return -1;
}
// Enter critical region
#if defined( _WIN32 )
EnterCriticalSection( gThreadPoolLock );
#else // !_WIN32
if( (err = pthread_mutex_lock( &gThreadPoolLock )))
{
switch (err)
{
case EDEADLK:
log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
break;
case EINVAL:
log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
break;
default:
break;
}
return err;
}
#endif // !_WIN32
// Start modifying the job state observable by worker threads
#if defined( _WIN32 )
EnterCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_lock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
// That would cause a deadlock.
#if !defined( _WIN32 )
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// Prime the worker threads to get going
jobError = CL_SUCCESS;
gRunCount = gJobCount = count;
gFunc_ptr = func_ptr;
gUserInfo = userInfo;
#if defined( _WIN32 )
ResetEvent(caller_event);
_WakeAllConditionVariable( cond_var );
LeaveCriticalSection( cond_lock );
#else // !_WIN32
if( (err = pthread_cond_broadcast( &cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
if((err = pthread_mutex_unlock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// block until they are done. It would be slightly more efficient to do some of the work here though.
do
{
#if defined( _WIN32 )
WaitForSingleObject( caller_event, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
pthread_mutex_unlock( &caller_cond_lock);
goto exit;
}
#endif // !_WIN32
}
while( gRunning );
#if !defined(_WIN32)
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
err = jobError;
exit:
// exit critical region
#if defined( _WIN32 )
LeaveCriticalSection( gThreadPoolLock );
#else // !_WIN32
newErr = pthread_mutex_unlock( &gThreadPoolLock );
if( newErr)
{
log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
return err;
}
#endif // !_WIN32
return err;
}
cl_uint GetThreadCount( void )
{
// Lazily set up our threads
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
#elif defined (_WIN32)
if (threadpool_init_control == 0) {
#warning This is buggy and race prone. Find a better way.
ThreadPool_Init();
threadpool_init_control = 1;
}
#else
cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
if( err )
{
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
return err;
}
#endif // !_WIN32
if( gThreadCount < 1 )
return 1;
return gThreadCount;
}
#else
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
#error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
#endif
//
// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
// of OpenCL API, while also checking
//
// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
// It is not okay to use this for conformance testing!!!
//
// Exception: If your operating system does not support multithreaded execution of any kind, then you may use this code.
//
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
{
cl_uint r = *a;
// since this fallback code path is not multithreaded, we just do a regular add here
// If your operating system supports memory-barrier-atomics, use those here
*a = r + b;
return r;
}
// Blocking API that farms out count jobs to a thread pool.
// It may return with some work undone if func_ptr() returns a non-zero
// result.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo )
{
cl_uint currentJob = 0;
cl_int result = CL_SUCCESS;
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
// THIS FUNCTION IS NOT INTENDED FOR USE!!
log_error( "ERROR: Test must be multithreaded!\n" );
exit(-1);
#else
static int spewCount = 0;
if( 0 == spewCount )
{
log_info( "\nWARNING: The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
spewCount = 1;
}
#endif
// The multithreaded code should mimic this behavior:
for( currentJob = 0; currentJob < count; currentJob++ )
if((result = func_ptr( currentJob, 0, userInfo )))
return result;
return CL_SUCCESS;
}
cl_uint GetThreadCount( void )
{
return 1;
}
void SetThreadCount( int count )
{
if( count > 1 )
log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
}
#endif

View File

@@ -0,0 +1,76 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef THREAD_POOL_H
#define THREAD_POOL_H
#if defined( __APPLE__ )
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#if defined(__cplusplus)
extern "C" {
#endif
//
// An atomic add operator
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ); // returns old value
// Your function prototype
//
// A function pointer to the function you want to execute in a multithreaded context. No
// synchronization primitives are provided, other than the atomic add above. You may not
// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
// work, however.
//
// job ids and thread ids are 0 based. If number of jobs or threads was 8, they will numbered be 0 through 7.
// Note that while every job will be run, it is not guaranteed that every thread will wake up before
// the work is done.
typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
// Some workitems may not run if a non-zero result is returned from func_ptr().
// This function may not be called from a TPFuncPtr.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo );
// Returns the number of worker threads that underlie the threadpool. The value passed
// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
// This is safe to call from a TPFuncPtr.
cl_uint GetThreadCount( void );
// SetThreadCount() may be used to artifically set the number of worker threads
// If the value is 0 (the default) the number of threads will be determined based on
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
// that we still get some testing for thread safety.
//
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
// code will run single threaded, but will report an error to indicate that the test
// is invalid. This option is intended for debugging purposes only. It is suggested
// as a convention that test apps set the thread count to 1 in response to the -m flag.
//
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
void SetThreadCount( int count );
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* THREAD_POOL_H */

View File

@@ -0,0 +1,290 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef test_conformance_clImageHelper_h
#define test_conformance_clImageHelper_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include <stdio.h>
#include "errorHelpers.h"
#ifdef __cplusplus
extern "C" {
#endif
// helper function to replace clCreateImage2D , to make the existing code use
// the functions of version 1.2 and veriosn 1.1 respectively
static inline cl_mem create_image_2d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_row_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage = NULL;
#ifdef CL_VERSION_1_2
cl_image_desc image_desc_dest;
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
image_desc_dest.image_width = image_width;
image_desc_dest.image_height = image_height;
image_desc_dest.image_depth= 0;// not usedfor 2d
image_desc_dest.image_array_size = 0;// not used for 2d
image_desc_dest.image_row_pitch = image_row_pitch;
image_desc_dest.image_slice_pitch = 0;
image_desc_dest.num_mip_levels = 0;
image_desc_dest.num_samples = 0;
image_desc_dest.mem_object = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
#else
mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
}
#endif
return mImage;
}
// helper function to replace clCreateImage2D , to make the existing code use
// the functions of version 1.2 and veriosn 1.1 respectively
static inline cl_mem create_image_2d_buffer (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_row_pitch,
cl_mem buffer,
cl_int *errcode_ret)
{
cl_mem mImage = NULL;
cl_image_desc image_desc_dest;
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
image_desc_dest.image_width = image_width;
image_desc_dest.image_height = image_height;
image_desc_dest.image_depth= 0;// not usedfor 2d
image_desc_dest.image_array_size = 0;// not used for 2d
image_desc_dest.image_row_pitch = image_row_pitch;
image_desc_dest.image_slice_pitch = 0;
image_desc_dest.num_mip_levels = 0;
image_desc_dest.num_samples = 0;
image_desc_dest.mem_object = buffer;
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, NULL, errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
static inline cl_mem create_image_3d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_depth,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
#ifdef CL_VERSION_1_2
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
image_desc.image_width = image_width;
image_desc.image_height = image_height;
image_desc.image_depth = image_depth;
image_desc.image_array_size = 0;// not used for one image
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.mem_object = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
#else
mImage = clCreateImage3D( context,
flags, image_format,
image_width,
image_height,
image_depth,
image_row_pitch,
image_slice_pitch,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
}
#endif
return mImage;
}
static inline cl_mem create_image_2d_array (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_array_size,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
image_desc.image_width = image_width;
image_desc.image_height = image_height;
image_desc.image_depth = 1;
image_desc.image_array_size = image_array_size;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.mem_object = NULL;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
static inline cl_mem create_image_1d_array (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_array_size,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
image_desc.image_width = image_width;
image_desc.image_height = 1;
image_desc.image_depth = 1;
image_desc.image_array_size = image_array_size;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.mem_object = NULL;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
static inline cl_mem create_image_1d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_row_pitch,
void *host_ptr,
cl_mem buffer,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
image_desc.image_width = image_width;
image_desc.image_height = 1;
image_desc.image_depth = 1;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = 0;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.mem_object = buffer;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,382 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _COMPAT_H_
#define _COMPAT_H_
#if defined(_WIN32) && defined (_MSC_VER)
#include <Windows.h>
#endif
#ifdef __cplusplus
#define EXTERN_C extern "C"
#else
#define EXTERN_C
#endif
//
// stdlib.h
//
#include <stdlib.h> // On Windows, _MAX_PATH defined there.
// llabs appeared in MS C v16 (VS 10/2010).
#if defined( _MSC_VER ) && _MSC_VER <= 1500
EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
#endif
//
// stdbool.h
//
// stdbool.h appeared in MS C v18 (VS 12/2013).
#if defined( _MSC_VER ) && MSC_VER <= 1700
#if !defined(__cplusplus)
typedef char bool;
#define true 1
#define false 0
#endif
#else
#include <stdbool.h>
#endif
//
// stdint.h
//
// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
typedef unsigned char uint8_t;
typedef char int8_t;
typedef unsigned short uint16_t;
typedef short int16_t;
typedef unsigned int uint32_t;
typedef int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#else
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
#include <stdint.h>
#endif
//
// float.h
//
#include <float.h>
//
// fenv.h
//
// fenv.h appeared in MS C v18 (VS 12/2013).
#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
// reimplement fenv.h because windows doesn't have it
#define FE_INEXACT 0x0020
#define FE_UNDERFLOW 0x0010
#define FE_OVERFLOW 0x0008
#define FE_DIVBYZERO 0x0004
#define FE_INVALID 0x0001
#define FE_ALL_EXCEPT 0x003D
int fetestexcept(int excepts);
int feclearexcept(int excepts);
#else
#include <fenv.h>
#endif
//
// math.h
//
#if defined( __INTEL_COMPILER )
#include <mathimf.h>
#else
#include <math.h>
#endif
#if defined( _MSC_VER )
#ifdef __cplusplus
extern "C" {
#endif
#ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288
#endif
#if ! defined( __INTEL_COMPILER )
#ifndef NAN
#define NAN (INFINITY - INFINITY)
#endif
#ifndef HUGE_VALF
#define HUGE_VALF (float)HUGE_VAL
#endif
#ifndef INFINITY
#define INFINITY (FLT_MAX + FLT_MAX)
#endif
#ifndef isfinite
#define isfinite(x) _finite(x)
#endif
#ifndef isnan
#define isnan( x ) ((x) != (x))
#endif
#ifndef isinf
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
#endif
double rint( double x);
float rintf( float x);
long double rintl( long double x);
float cbrtf( float );
double cbrt( double );
int ilogb( double x);
int ilogbf (float x);
int ilogbl(long double x);
double fmax(double x, double y);
double fmin(double x, double y);
float fmaxf( float x, float y );
float fminf(float x, float y);
double log2(double x);
long double log2l(long double x);
double exp2(double x);
long double exp2l(long double x);
double fdim(double x, double y);
float fdimf(float x, float y);
long double fdiml(long double x, long double y);
double remquo( double x, double y, int *quo);
float remquof( float x, float y, int *quo);
long double remquol( long double x, long double y, int *quo);
long double scalblnl(long double x, long n);
float hypotf(float x, float y);
long double hypotl(long double x, long double y) ;
double lgamma(double x);
float lgammaf(float x);
double trunc(double x);
float truncf(float x);
double log1p(double x);
float log1pf(float x);
long double log1pl(long double x);
double copysign(double x, double y);
float copysignf(float x, float y);
long double copysignl(long double x, long double y);
long lround(double x);
long lroundf(float x);
//long lroundl(long double x)
double round(double x);
float roundf(float x);
long double roundl(long double x);
int cf_signbit(double x);
int cf_signbitf(float x);
static int signbit(double x) { return cf_signbit(x); }
static int signbitf(float x) { return cf_signbitf(x); }
long int lrint (double flt);
long int lrintf (float flt);
float int2float (int32_t ix);
int32_t float2int (float fx);
#endif
#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
// These functions appeared in Intel C v13.
float nanf( const char* str);
double nan( const char* str);
long double nanl( const char* str);
#endif
#ifdef __cplusplus
}
#endif
#endif
#if defined( __ANDROID__ )
#define log2(X) (log(X)/log(2))
#endif
//
// stdio.h
//
#if defined( _MSC_VER )
#define snprintf sprintf_s
#endif
//
// unistd.h
//
#if defined( _MSC_VER )
EXTERN_C unsigned int sleep( unsigned int sec );
EXTERN_C int usleep( int usec );
#endif
//
// syscall.h
//
#if defined( __ANDROID__ )
// Android bionic's isn't providing SYS_sysctl wrappers.
#define SYS__sysctl __NR__sysctl
#endif
// Some tests use _malloca which defined in malloc.h.
#if !defined (__APPLE__)
#include <malloc.h>
#endif
//
// ???
//
#if defined( _MSC_VER )
#define MAXPATHLEN _MAX_PATH
EXTERN_C uint64_t ReadTime( void );
EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
/** Returns the number of leading 0-bits in x,
starting at the most significant bit position.
If x is 0, the result is undefined.
*/
EXTERN_C int __builtin_clz(unsigned int pattern);
#endif
#ifndef MIN
#define MIN(x,y) (((x)<(y))?(x):(y))
#endif
#ifndef MAX
#define MAX(x,y) (((x)>(y))?(x):(y))
#endif
/*
------------------------------------------------------------------------------------------------
WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
This is a typical usage of the macros:
double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
(taken from math_brute_force/reference_math.c). There are two problems:
1. There is an error here. On Windows in will produce incorrect result
`0x1.5555555555555p+50'. To have a correct result it should be written as
`MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
third argument is not obvious -- sometimes it should be the same as exponent of the
first argument, but sometimes not.
2. Information is duplicated. It is easy to make a mistake.
Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
------------------------------------------------------------------------------------------------
*/
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
#else
// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
#define MAKE_HEX_FLOAT(x,y,z) x
#define MAKE_HEX_DOUBLE(x,y,z) x
#define MAKE_HEX_LONG(x,y,z) x
#endif
/*
------------------------------------------------------------------------------------------------
HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
double respectively. Arguments:
sm -- sign of number,
int -- integer part of mantissa (without `0x' prefix),
fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
se -- sign of exponent,
exp -- absolute value of (binary) exponent.
Example:
double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
Note:
We have to pass signs as separate arguments because gcc pass negative integer values
(e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
`0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
literal.
------------------------------------------------------------------------------------------------
*/
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
// If compiler does not support hex floating point literals:
#define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) )
#define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) )
#define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
#else
// If compiler supports hex floating point literals: just concatenate all the parts into a literal.
#define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
#define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
#define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
#endif
#if defined(__MINGW32__)
#include <Windows.h>
#define sleep(sec) Sleep((sec) * 1000)
#endif
#endif // _COMPAT_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _conversions_h
#define _conversions_h
#include "compat.h"
#include "errorHelpers.h"
#include "mt19937.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#if defined(__cplusplus)
extern "C" {
#endif
/* Note: the next three all have to match in size and order!! */
enum ExplicitTypes
{
kBool = 0,
kChar,
kUChar,
kUnsignedChar,
kShort,
kUShort,
kUnsignedShort,
kInt,
kUInt,
kUnsignedInt,
kLong,
kULong,
kUnsignedLong,
kFloat,
kHalf,
kDouble,
kNumExplicitTypes
};
typedef enum ExplicitTypes ExplicitType;
enum RoundingTypes
{
kRoundToEven = 0,
kRoundToZero,
kRoundToPosInf,
kRoundToNegInf,
kRoundToNearest,
kNumRoundingTypes,
kDefaultRoundingType = kRoundToNearest
};
typedef enum RoundingTypes RoundingType;
extern void print_type_to_string(ExplicitType type, void *data, char* string);
extern size_t get_explicit_type_size( ExplicitType type );
extern const char * get_explicit_type_name( ExplicitType type );
extern void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
extern void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
extern cl_long read_upscale_signed( void *inRaw, ExplicitType inType );
extern cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType );
extern float read_as_float( void *inRaw, ExplicitType inType );
extern float get_random_float(float low, float high, MTdata d);
extern double get_random_double(double low, double high, MTdata d);
extern float any_float( MTdata d );
extern double any_double( MTdata d );
extern int random_in_range( int minV, int maxV, MTdata d );
size_t get_random_size_t(size_t low, size_t high, MTdata d);
// Note: though this takes a double, this is for use with single precision tests
static inline int IsFloatSubnormal( float x )
{
#if 2 == FLT_RADIX
// Do this in integer to avoid problems with FTZ behavior
union{ float d; uint32_t u;}u;
u.d = fabsf(x);
return (u.u-1) < 0x007fffffU;
#else
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
return fabs(x) < (double) FLT_MIN && x != 0.0;
#endif
}
static inline int IsDoubleSubnormal( double x )
{
#if 2 == FLT_RADIX
// Do this in integer to avoid problems with FTZ behavior
union{ double d; uint64_t u;}u;
u.d = fabs( x);
return (u.u-1) < 0x000fffffffffffffULL;
#else
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
return fabs(x) < (double) DBL_MIN && x != 0.0;
#endif
}
#if defined(__cplusplus)
}
#endif
#endif // _conversions_h

View File

@@ -0,0 +1,813 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "errorHelpers.h"
extern bool gOfflineCompiler;
const char *IGetErrorString( int clErrorCode )
{
switch( clErrorCode )
{
case CL_SUCCESS: return "CL_SUCCESS";
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
default: return "(unknown)";
}
}
const char *GetChannelOrderName( cl_channel_order order )
{
switch( order )
{
case CL_R: return "CL_R";
case CL_A: return "CL_A";
case CL_Rx: return "CL_Rx";
case CL_RG: return "CL_RG";
case CL_RA: return "CL_RA";
case CL_RGx: return "CL_RGx";
case CL_RGB: return "CL_RGB";
case CL_RGBx: return "CL_RGBx";
case CL_RGBA: return "CL_RGBA";
case CL_ARGB: return "CL_ARGB";
case CL_BGRA: return "CL_BGRA";
case CL_INTENSITY: return "CL_INTENSITY";
case CL_LUMINANCE: return "CL_LUMINANCE";
#if defined CL_1RGB_APPLE
case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
#endif
#if defined CL_BGR1_APPLE
case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
#endif
#if defined CL_ABGR_APPLE
case CL_ABGR_APPLE: return "CL_ABGR_APPLE";
#endif
case CL_DEPTH: return "CL_DEPTH";
case CL_DEPTH_STENCIL: return "CL_DEPTH_STENCIL";
case CL_sRGB: return "CL_sRGB";
case CL_sRGBA: return "CL_sRGBA";
case CL_sRGBx: return "CL_sRGBx";
case CL_sBGRA: return "CL_sBGRA";
case CL_ABGR: return "CL_ABGR";
default: return NULL;
}
}
int IsChannelOrderSupported( cl_channel_order order )
{
switch( order )
{
case CL_R:
case CL_A:
case CL_Rx:
case CL_RG:
case CL_RA:
case CL_RGx:
case CL_RGB:
case CL_RGBx:
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
case CL_INTENSITY:
case CL_LUMINANCE:
case CL_ABGR:
case CL_sRGB:
case CL_sRGBx:
case CL_sBGRA:
case CL_sRGBA:
case CL_DEPTH:
return 1;
#if defined CL_1RGB_APPLE
case CL_1RGB_APPLE:
return 1;
#endif
#if defined CL_BGR1_APPLE
case CL_BGR1_APPLE:
return 1;
#endif
default:
return 0;
}
}
const char *GetChannelTypeName( cl_channel_type type )
{
switch( type )
{
case CL_SNORM_INT8: return "CL_SNORM_INT8";
case CL_SNORM_INT16: return "CL_SNORM_INT16";
case CL_UNORM_INT8: return "CL_UNORM_INT8";
case CL_UNORM_INT16: return "CL_UNORM_INT16";
case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
case CL_FLOAT: return "CL_FLOAT";
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
#endif
case CL_UNORM_INT24: return "CL_UNORM_INT24";
default: return NULL;
}
}
int IsChannelTypeSupported( cl_channel_type type )
{
switch( type )
{
case CL_SNORM_INT8:
case CL_SNORM_INT16:
case CL_UNORM_INT8:
case CL_UNORM_INT16:
case CL_UNORM_INT24:
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
case CL_UNORM_INT_101010:
case CL_SIGNED_INT8:
case CL_SIGNED_INT16:
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT8:
case CL_UNSIGNED_INT16:
case CL_UNSIGNED_INT32:
case CL_HALF_FLOAT:
case CL_FLOAT:
return 1;
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
return 1;
#endif
default:
return 0;
}
}
const char *GetAddressModeName( cl_addressing_mode mode )
{
switch( mode )
{
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
default: return NULL;
}
}
const char *GetDeviceTypeName( cl_device_type type )
{
switch( type )
{
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
default: return NULL;
}
}
const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
{
static char scratch[ 1024 ];
size_t i, j;
if( buffer == NULL )
buffer = scratch;
unsigned char *p = (unsigned char *)dataBuffer;
char *bPtr;
buffer[ 0 ] = 0;
bPtr = buffer;
for( i = 0; i < vecSize; i++ )
{
if( i > 0 )
{
bPtr[ 0 ] = ' ';
bPtr++;
}
for( j = 0; j < typeSize; j++ )
{
sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
bPtr += 2;
}
p += typeSize;
}
bPtr[ 0 ] = 0;
return buffer;
}
#ifndef MAX
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
#endif
#if defined( _MSC_VER )
#define scalbnf(_a, _i ) ldexpf( _a, _i )
#define scalbn(_a, _i ) ldexp( _a, _i )
#define scalbnl(_a, _i ) ldexpl( _a, _i )
#endif
static float Ulp_Error_Half_Float( float test, double reference );
static inline float half2float( cl_ushort half );
// taken from math tests
#define HALF_MIN_EXP -13
#define HALF_MANT_DIG 11
static float Ulp_Error_Half_Float( float test, double reference )
{
union{ double d; uint64_t u; }u; u.d = reference;
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
double testVal = test;
if( u.u & 0x000fffffffffffffULL )
{ // Non-power of two and NaN
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
if( isinf( reference ) )
{
if( (double) test == reference )
return 0.0f;
return (float) (testVal - reference );
}
// reference is a normal power of two or a zero
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
// Taken from vLoadHalf test
static inline float half2float( cl_ushort us )
{
uint32_t u = us;
uint32_t sign = (u << 16) & 0x80000000;
int32_t exponent = (u & 0x7c00) >> 10;
uint32_t mantissa = (u & 0x03ff) << 13;
union{ unsigned int u; float f;}uu;
if( exponent == 0 )
{
if( mantissa == 0 )
return sign ? -0.0f : 0.0f;
int shift = __builtin_clz( mantissa ) - 8;
exponent -= shift-1;
mantissa <<= shift;
mantissa &= 0x007fffff;
}
else
if( exponent == 31)
{
uu.u = mantissa | sign;
if( mantissa )
uu.u |= 0x7fc00000;
else
uu.u |= 0x7f800000;
return uu.f;
}
exponent += 127 - 15;
exponent <<= 23;
exponent |= mantissa;
uu.u = exponent | sign;
return uu.f;
}
float Ulp_Error_Half( cl_ushort test, float reference )
{
return Ulp_Error_Half_Float( half2float(test), reference );
}
float Ulp_Error( float test, double reference )
{
union{ double d; uint64_t u; }u; u.d = reference;
double testVal = test;
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
if( isinf( reference ) )
{
if( testVal == reference )
return 0.0f;
return (float) (testVal - reference );
}
if( isinf( testVal) )
{ // infinite test value, but finite (but possibly overflowing in float) reference.
//
// The function probably overflowed prematurely here. Formally, the spec says this is
// an infinite ulp error and should not be tolerated. Unfortunately, this would mean
// that the internal precision of some half_pow implementations would have to be 29+ bits
// at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
// is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
// after rounding to single is 4*32 = 128, which will ultimately result in premature
// overflow, even though a good faith representation would be correct to within 2**-29
// interally.
// In the interest of not requiring the implementation go to extraordinary lengths to
// deliver a half precision function, we allow premature overflow within the limit
// of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
// 2**128, the next value that would appear in the number line if float had sufficient range.
testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
// Note that the same hack may not work in long double, which is not guaranteed to have
// more range than double. It is not clear that premature overflow should be tolerated for
// double.
}
if( u.u & 0x000fffffffffffffULL )
{ // Non-power of two and NaN
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
float Ulp_Error_Double( double test, long double reference )
{
// Deal with long double = double
// On most systems long double is a higher precision type than double. They provide either
// a 80-bit or greater floating point type, or they provide a head-tail double double format.
// That is sufficient to represent the accuracy of a floating point result to many more bits
// than double and we can calculate sub-ulp errors. This is the standard system for which this
// test suite is designed.
//
// On some systems double and long double are the same thing. Then we run into a problem,
// because our representation of the infinitely precise result (passed in as reference above)
// can be off by as much as a half double precision ulp itself. In this case, we inflate the
// reported error by half an ulp to take this into account. A more correct and permanent fix
// would be to undertake refactoring the reference code to return results in this format:
//
// typedef struct DoubleReference
// { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise)
// double correctlyRoundedResult; // as best we can
// double ulps; // plus a fractional amount to account for the difference
// }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps.
//
// This would provide a useful higher-than-double precision format for everyone that we can use,
// and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
// that use a head to tail double double for long double.
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
int x;
long double testVal = test;
if( 0.5L != frexpl( reference, &x) )
{ // Non-power of two and NaN
if( isinf( reference ) )
{
if( testVal == reference )
return 0.0f;
return (float) ( testVal - reference );
}
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
// Scale the exponent of the error
float result = (float) scalbnl( testVal - reference, ulp_exp );
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
if( sizeof(long double) == sizeof( double ) )
result += copysignf( 0.5f, result);
return result;
}
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
// Scale the exponent of the error
float result = (float) scalbnl( testVal - reference, ulp_exp );
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
if( sizeof(long double) == sizeof( double ) )
result += copysignf( 0.5f, result);
return result;
}
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
{
int error;
size_t size_ret;
// Does the program object exist?
if (program != NULL) {
// Was the number of devices given
if (num_devices == 0) {
// If zero devices were specified then allocate and query the device list from the context
cl_context context;
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
test_error( error, "Unable to query program's context" );
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
test_error( error, "Unable to query context's device size" );
num_devices = size_ret / sizeof(cl_device_id);
device_list = (cl_device_id *) malloc(size_ret);
if (device_list == NULL) {
print_error( error, "malloc failed" );
return CL_OUT_OF_HOST_MEMORY;
}
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
test_error( error, "Unable to query context's devices" );
}
// For each device in the device_list
unsigned int i;
for (i = 0; i < num_devices; i++) {
// Get the build status
cl_build_status build_status;
error = clGetProgramBuildInfo(program,
device_list[i],
CL_PROGRAM_BUILD_STATUS,
sizeof(build_status),
&build_status,
&size_ret);
test_error( error, "Unable to query build status" );
// If the build failed then log the status, and allocate the build log, log it and free it
if (build_status != CL_BUILD_SUCCESS) {
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
test_error( error, "Unable to query build log size" );
char *build_log = (char *) malloc(size_ret);
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
test_error( error, "Unable to query build log" );
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
free(build_log);
}
}
// Was the number of devices given
if (num_devices == 0) {
// If zero devices were specified then free the device list
free(device_list);
}
}
return CL_SUCCESS;
}
const char * subtests_requiring_opencl_1_2[] = {
"device_partition_equally",
"device_partition_by_counts",
"device_partition_by_affinity_domain_numa",
"device_partition_by_affinity_domain_l4_cache",
"device_partition_by_affinity_domain_l3_cache",
"device_partition_by_affinity_domain_l2_cache",
"device_partition_by_affinity_domain_l1_cache",
"device_partition_by_affinity_domain_next_partitionable",
"device_partition_all",
"buffer_fill_int",
"buffer_fill_uint",
"buffer_fill_short",
"buffer_fill_ushort",
"buffer_fill_char",
"buffer_fill_uchar",
"buffer_fill_long",
"buffer_fill_ulong",
"buffer_fill_float",
"buffer_fill_struct",
"test_mem_host_write_only_buffer",
"test_mem_host_write_only_subbuffer",
"test_mem_host_no_access_buffer",
"test_mem_host_no_access_subbuffer",
"test_mem_host_read_only_image",
"test_mem_host_write_only_image",
"test_mem_host_no_access_image",
// CL_MEM_HOST_{READ|WRITE}_ONLY api/
"get_buffer_info",
"get_image1d_info",
"get_image1d_array_info",
"get_image2d_array_info",
// gl/
"images_read_1D",
"images_write_1D",
"images_1D_getinfo",
"images_read_1Darray",
"images_write_1Darray",
"images_1Darray_getinfo",
"images_read_2Darray",
"images_write_2Darray",
"images_2Darray_getinfo",
"buffer_migrate",
"image_migrate",
// compiler/
"load_program_source",
"load_multistring_source",
"load_two_kernel_source",
"load_null_terminated_source",
"load_null_terminated_multi_line_source",
"load_null_terminated_partial_multi_line_source",
"load_discreet_length_source",
"get_program_source",
"get_program_build_info",
"get_program_info",
"large_compile",
"async_build",
"options_build_optimizations",
"options_build_macro",
"options_build_macro_existence",
"options_include_directory",
"options_denorm_cache",
"preprocessor_define_udef",
"preprocessor_include",
"preprocessor_line_error",
"preprocessor_pragma",
"compiler_defines_for_extensions",
"image_macro",
"simple_compile_only",
"simple_static_compile_only",
"simple_extern_compile_only",
"simple_compile_with_callback",
"simple_embedded_header_compile",
"simple_link_only",
"two_file_regular_variable_access",
"two_file_regular_struct_access",
"two_file_regular_function_access",
"simple_link_with_callback",
"simple_embedded_header_link",
"execute_after_simple_compile_and_link",
"execute_after_simple_compile_and_link_no_device_info",
"execute_after_simple_compile_and_link_with_defines",
"execute_after_simple_compile_and_link_with_callbacks",
"execute_after_simple_library_with_link",
"execute_after_two_file_link",
"execute_after_two_file_link",
"execute_after_embedded_header_link",
"execute_after_included_header_link",
"execute_after_serialize_reload_object",
"execute_after_serialize_reload_library",
"simple_library_only",
"simple_library_with_callback",
"simple_library_with_link",
"two_file_link",
"multi_file_libraries",
"multiple_files",
"multiple_libraries",
"multiple_files_multiple_libraries",
"multiple_embedded_headers",
"program_binary_type",
"compile_and_link_status_options_log",
// CL_PROGRAM_NUM_KERNELS, in api/
"get_kernel_arg_info",
"create_kernels_in_program",
// clEnqueue..WithWaitList, in events/
"event_enqueue_marker_with_event_list",
"event_enqueue_barrier_with_event_list",
"popcount"
};
const char * subtests_to_skip_with_offline_compiler[] = {
"get_kernel_arg_info",
"binary_create",
"load_program_source",
"load_multistring_source",
"load_two_kernel_source",
"load_null_terminated_source",
"load_null_terminated_multi_line_source",
"load_null_terminated_partial_multi_line_source",
"load_discreet_length_source",
"get_program_source",
"get_program_build_info",
"options_build_optimizations",
"options_build_macro",
"options_build_macro_existence",
"options_include_directory",
"options_denorm_cache",
"preprocessor_define_udef",
"preprocessor_include",
"preprocessor_line_error",
"preprocessor_pragma",
"compiler_defines_for_extensions",
"image_macro",
"simple_extern_compile_only",
"simple_embedded_header_compile",
"two_file_regular_variable_access",
"two_file_regular_struct_access",
"two_file_regular_function_access",
"simple_embedded_header_link",
"execute_after_simple_compile_and_link_with_defines",
"execute_after_simple_compile_and_link_with_callbacks",
"execute_after_embedded_header_link",
"execute_after_included_header_link",
"multi_file_libraries",
"multiple_files",
"multiple_libraries",
"multiple_files_multiple_libraries",
"multiple_embedded_headers",
"program_binary_type",
"compile_and_link_status_options_log",
};
int check_opencl_version_with_testname(const char *subtestname, cl_device_id device)
{
int nRequiring12 = sizeof(subtests_requiring_opencl_1_2)/sizeof(char *);
size_t i;
for(i=0; i < nRequiring12; ++i) {
if(!strcmp(subtestname, subtests_requiring_opencl_1_2[i])) {
return check_opencl_version(device, 1, 2);
}
}
return 0;
}
int check_opencl_version(cl_device_id device, cl_uint requestedMajorVersion, cl_uint requestedMinorVersion) {
int error;
char device_version[1024];
cl_uint majorVersion = 0, minorVersion = 0;
const char * required_version_ocl_12="OpenCL 1.2 ";
memset( device_version, 0, sizeof( device_version ) );
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
test_error(error, "unable to get CL_DEVICE_VERSION");
if ( strncmp( device_version, "OpenCL 1.2", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
majorVersion = 1;
minorVersion = 2;
} else if ( strncmp( device_version, "OpenCL 1.1", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
majorVersion = 1;
minorVersion = 1;
} else if ( strncmp( device_version, "OpenCL 2.0", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
majorVersion = 2;
minorVersion = 0;
} else if ( strncmp( device_version, "OpenCL 2.1", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
majorVersion = 2;
minorVersion = 1;
} else {
log_error( "ERROR: Unexpected version string: `%s'.\n", device_version );
return 1;
};
if (majorVersion >= requestedMajorVersion)
return 0;
if (minorVersion >= requestedMinorVersion)
return 0;
return 1;
}
int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device)
{
if(gOfflineCompiler)
{
int nNotRequiredWithOfflineCompiler = sizeof(subtests_to_skip_with_offline_compiler)/sizeof(char *);
size_t i;
for(i=0; i < nNotRequiredWithOfflineCompiler; ++i) {
if(!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i])) {
return 1;
}
}
}
return 0;
}

View File

@@ -0,0 +1,164 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _errorHelpers_h
#define _errorHelpers_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#define LOWER_IS_BETTER 0
#define HIGHER_IS_BETTER 1
// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
// functions as described below. This is helpful for integration into an automated testing
// system.
#if USE_ATF
// export BUILD_WITH_ATF=1
#include <ATF/ATF.h>
#define test_start() ATFTestStart()
#define log_info ATFLogInfo
#define log_error ATFLogError
#define log_missing_feature ATFLogMissingFeature
#define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
#define test_finish() ATFTestFinish()
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
#define vlog ATFLogInfo
#define vlog_error ATFLogError
#else
#include <stdio.h>
#define test_start()
#define log_info printf
#define log_error printf
#define log_missing_feature printf
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
_higherBetter?"higher is better":"lower is better", _number )
#define test_finish()
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
_higherBetter?"higher is better":"lower is better" , _number)
#ifdef _WIN32
#ifdef __MINGW32__
// Use __mingw_printf since it supports "%a" format specifier
#define vlog __mingw_printf
#define vlog_error __mingw_printf
#else
// Use home-baked function that treats "%a" as "%f"
static int vlog_win32(const char *format, ...);
#define vlog vlog_win32
#define vlog_error vlog_win32
#endif
#else
#define vlog_error printf
#define vlog printf
#endif
#endif
#define ct_assert(b) ct_assert_i(b, __LINE__)
#define ct_assert_i(b, line) ct_assert_ii(b, line)
#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
#define test_missing_feature(errCode, msg) test_missing_feature_ret(errCode, msg, errCode)
// this macro should always return CL_SUCCESS, but print the missing feature message
#define test_missing_feature_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_missing_feature( errCode, msg ); return CL_SUCCESS ; } }
#define print_missing_feature(errCode, msg) log_missing_feature("ERROR: Subtest %s tests a feature not supported by the device version! (from %s:%d)\n", msg, __FILE__, __LINE__ );
#define test_missing_support_offline_cmpiler(errCode, msg) test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
// this macro should always return CL_SUCCESS, but print the skip message on test not supported with offline compiler
#define test_missing_support_offline_cmpiler_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { log_info( "INFO: Subtest %s tests is not supported in offline compiler execution path! (from %s:%d)\n", msg, __FILE__, __LINE__ ); return CL_SUCCESS ; } }
// expected error code vs. what we got
#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
extern const char *IGetErrorString( int clErrorCode );
extern float Ulp_Error_Half( cl_ushort test, float reference );
extern float Ulp_Error( float test, double reference );
extern float Ulp_Error_Double( double test, long double reference );
extern const char *GetChannelTypeName( cl_channel_type type );
extern int IsChannelTypeSupported( cl_channel_type type );
extern const char *GetChannelOrderName( cl_channel_order order );
extern int IsChannelOrderSupported( cl_channel_order order );
extern const char *GetAddressModeName( cl_addressing_mode mode );
extern const char *GetDeviceTypeName( cl_device_type type );
int check_opencl_version_with_testname(const char *subtestname, cl_device_id device);
int check_opencl_version(cl_device_id device, cl_uint requestedMajorVersion, cl_uint requestedMinorVersion);
int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device);
// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
#if defined (_WIN32) && !defined(__MINGW32__)
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
static int vlog_win32(const char *format, ...)
{
const char *new_format = format;
if (strstr(format, "%a")) {
char *temp;
if ((temp = strdup(format)) == NULL) {
printf("vlog_win32: Failed to allocate memory for strdup\n");
return -1;
}
new_format = temp;
while (*temp) {
// replace %a with %f
if ((*temp == '%') && (*(temp+1) == 'a')) {
*(temp+1) = 'f';
}
temp++;
}
}
va_list args;
va_start(args, format);
vprintf(new_format, args);
va_end(args);
if (new_format != format) {
free((void*)new_format);
}
return 0;
}
#endif
#ifdef __cplusplus
}
#endif
#endif // _errorHelpers_h

View File

@@ -0,0 +1,104 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _fpcontrol_h
#define _fpcontrol_h
// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
// in integer code, and have found this is the only way to correctly verify operation.
//
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
typedef int FPU_mode_type;
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#endif
// Set the reference hardware floating point unit to FTZ mode
static inline void ForceFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode | 0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
// Add 64 bit support
#elif defined (__aarch64__)
unsigned fpscr;
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
#else
#error ForceFTZ needs an implentation
#endif
}
// Disable the denorm flush to zero
static inline void DisableFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode & ~0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
// Add 64 bit support
#elif defined (__aarch64__)
unsigned fpscr;
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
#else
#error DisableFTZ needs an implentation
#endif
}
// Restore the reference hardware to floating point state indicated by *mode
static inline void RestoreFPState( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
_mm_setcsr( *mode );
#elif defined( __PPC__)
fpu_control = *mode;
#elif defined (__arm__)
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
// Add 64 bit support
#elif defined (__aarch64__)
__asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
#else
#error RestoreFPState needs an implementation
#endif
}
#else
#error ForceFTZ and RestoreFPState need implentations
#endif
#endif

View File

@@ -0,0 +1,53 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "genericThread.h"
#if defined(_WIN32)
#include <windows.h>
#else // !_WIN32
#include <pthread.h>
#endif
void * genericThread::IStaticReflector( void * data )
{
genericThread *t = (genericThread *)data;
return t->IRun();
}
bool genericThread::Start( void )
{
#if defined(_WIN32)
mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
return ( mHandle != NULL );
#else // !_WIN32
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
return ( error == 0 );
#endif // !_WIN32
}
void * genericThread::Join( void )
{
#if defined(_WIN32)
WaitForSingleObject( (HANDLE)mHandle, INFINITE );
return NULL;
#else // !_WIN32
void * retVal;
int error = pthread_join( (pthread_t)mHandle, &retVal );
if( error != 0 )
retVal = NULL;
return retVal;
#endif // !_WIN32
}

View File

@@ -0,0 +1,42 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _genericThread_h
#define _genericThread_h
#include <stdio.h>
class genericThread
{
public:
virtual ~genericThread() {}
bool Start( void );
void * Join( void );
protected:
virtual void * IRun( void ) = 0;
private:
void* mHandle;
static void * IStaticReflector( void * data );
};
#endif // _genericThread_h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,646 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _imageHelpers_h
#define _imageHelpers_h
#include "compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include <time.h>
#include "errorHelpers.h"
#include "conversions.h"
#include "typeWrappers.h"
#include "kernelHelpers.h"
#include "errorHelpers.h"
#include "mt19937.h"
#include "rounding_mode.h"
#include "clImageHelper.h"
extern int gTestCount;
extern int gTestFailure;
extern cl_device_type gDeviceType;
// Number of iterations per image format to test if not testing max images, rounding, or small images
#define NUM_IMAGE_ITERATIONS 3
// Definition for our own sampler type, to mirror the cl_sampler internals
#define MAX_sRGB_TO_lRGB_CONVERSION_ERROR 0.5
#define MAX_lRGB_TO_sRGB_CONVERSION_ERROR 0.6
// Definition for our own sampler type, to mirror the cl_sampler internals
typedef struct {
cl_addressing_mode addressing_mode;
cl_filter_mode filter_mode;
bool normalized_coords;
} image_sampler_data;
int round_to_even( float v );
#define NORMALIZE( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : round_to_even( v * max ) ) )
#define NORMALIZE_UNROUNDED( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : v * max ) )
#define NORMALIZE_SIGNED( v, min, max ) ( v < -1.0f ? min : ( v > 1.f ? max : round_to_even( v * max ) ) )
#define NORMALIZE_SIGNED_UNROUNDED( v, min, max ) ( v < -1.0f ? min : ( v > 1.f ? max : v * max ) )
#define CONVERT_INT( v, min, max, max_val) ( v < min ? min : ( v > max ? max_val : round_to_even( v ) ) )
#define CONVERT_UINT( v, max, max_val) ( v < 0 ? 0 : ( v > max ? max_val : round_to_even( v ) ) )
extern void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0 );
extern void print_write_header( cl_image_format *format, bool err);
extern void print_header( cl_image_format *format, bool err );
extern bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind );
extern bool check_minimum_supported( cl_image_format *formatList, unsigned int numFormats, cl_mem_flags flags );
extern size_t get_format_type_size( const cl_image_format *format );
extern size_t get_channel_data_type_size( cl_channel_type channelType );
extern size_t get_format_channel_count( const cl_image_format *format );
extern size_t get_channel_order_channel_count( cl_channel_order order );
cl_channel_type get_channel_type_from_name( const char *name );
cl_channel_order get_channel_order_from_name( const char *name );
extern int is_format_signed( const cl_image_format *format );
extern size_t get_pixel_size( cl_image_format *format );
/* Helper to get any ol image format as long as it is 8-bits-per-channel */
extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
/* Helper to get any ol image format as long as it is 32-bits-per-channel */
extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
int random_in_range( int minV, int maxV, MTdata d );
int random_log_in_range( int minV, int maxV, MTdata d );
typedef struct
{
size_t width;
size_t height;
size_t depth;
size_t rowPitch;
size_t slicePitch;
size_t arraySize;
cl_image_format *format;
cl_mem buffer;
cl_mem_object_type type;
cl_uint num_mip_levels;
} image_descriptor;
typedef struct
{
float p[4];
}FloatPixel;
void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSize=0);
extern size_t get_format_max_int( cl_image_format *format );
extern cl_ulong get_image_size( image_descriptor const *imageInfo );
extern cl_ulong get_image_size_mb( image_descriptor const *imageInfo );
extern char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &Owner, MTdata d );
extern int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod = 0 );
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
float *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] );
int has_alpha(cl_image_format *format);
extern bool alpha_is_x(cl_image_format *format);
extern bool is_sRGBA_order(cl_channel_order image_channel_order);
inline float calculate_array_index( float coord, float extent );
cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth);
cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo);
size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod);
template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
int x, int y, int z, T *outData, int lod )
{
float convert_half_to_float( unsigned short halfValue );
size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth, slice_pitch_lod = 0/*imageInfo->slicePitch*/ , row_pitch_lod = 0/*imageInfo->rowPitch*/;
width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod):1;
if ( imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod):1;
if(imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
depth_lod = ( imageInfo->depth >> lod) ? ( imageInfo->depth >> lod) : 1;
row_pitch_lod = (imageInfo->num_mip_levels > 0)? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
slice_pitch_lod = (imageInfo->num_mip_levels > 0)? (row_pitch_lod * height_lod): imageInfo->slicePitch;
// correct depth_lod and height_lod for array image types in order to avoid
// return
if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1 && depth_lod == 1) {
depth_lod = 0;
height_lod = 0;
}
if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1) {
depth_lod = 0;
}
if ( x < 0 || x >= (int)width_lod
|| ( height_lod != 0 && ( y < 0 || y >= (int)height_lod ) )
|| ( depth_lod != 0 && ( z < 0 || z >= (int)depth_lod ) )
|| ( imageInfo->arraySize != 0 && ( z < 0 || z >= (int)imageInfo->arraySize ) ) )
{
// Border color
if (imageInfo->format->image_channel_order == CL_DEPTH)
{
outData[ 0 ] = 1;
}
else {
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
if (!has_alpha(imageInfo->format))
outData[3] = 1;
}
return;
}
cl_image_format *format = imageInfo->format;
unsigned int i;
T tempData[ 4 ];
// Advance to the right spot
char *ptr = (char *)imageData;
size_t pixelSize = get_pixel_size( format );
ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
// OpenCL only supports reading floats from certain formats
switch( format->image_channel_data_type )
{
case CL_SNORM_INT8:
{
cl_char *dPtr = (cl_char *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_UNORM_INT8:
{
cl_uchar *dPtr = (cl_uchar *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_SIGNED_INT8:
{
cl_char *dPtr = (cl_char *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_UNSIGNED_INT8:
{
cl_uchar *dPtr = (cl_uchar*)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_SNORM_INT16:
{
cl_short *dPtr = (cl_short *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_UNORM_INT16:
{
cl_ushort *dPtr = (cl_ushort *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_SIGNED_INT16:
{
cl_short *dPtr = (cl_short *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_UNSIGNED_INT16:
{
cl_ushort *dPtr = (cl_ushort *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_HALF_FLOAT:
{
cl_ushort *dPtr = (cl_ushort *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)convert_half_to_float( dPtr[ i ] );
break;
}
case CL_SIGNED_INT32:
{
cl_int *dPtr = (cl_int *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_UNSIGNED_INT32:
{
cl_uint *dPtr = (cl_uint *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
case CL_UNORM_SHORT_565:
{
cl_ushort *dPtr = (cl_ushort*)ptr;
tempData[ 0 ] = (T)( dPtr[ 0 ] >> 11 );
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
break;
}
#ifdef OBSOLETE_FORMAT
case CL_UNORM_SHORT_565_REV:
{
unsigned short *dPtr = (unsigned short *)ptr;
tempData[ 2 ] = (T)( dPtr[ 0 ] >> 11 );
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
break;
}
case CL_UNORM_SHORT_555_REV:
{
unsigned short *dPtr = (unsigned short *)ptr;
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
break;
}
case CL_UNORM_INT_8888:
{
unsigned int *dPtr = (unsigned int *)ptr;
tempData[ 3 ] = (T)( dPtr[ 0 ] >> 24 );
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
tempData[ 0 ] = (T)( dPtr[ 0 ] & 0xff );
break;
}
case CL_UNORM_INT_8888_REV:
{
unsigned int *dPtr = (unsigned int *)ptr;
tempData[ 0 ] = (T)( dPtr[ 0 ] >> 24 );
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
tempData[ 3 ] = (T)( dPtr[ 0 ] & 0xff );
break;
}
case CL_UNORM_INT_101010_REV:
{
unsigned int *dPtr = (unsigned int *)ptr;
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
tempData[ 0 ] = (T)( dPtr[ 0 ] & 0x3ff );
break;
}
#endif
case CL_UNORM_SHORT_555:
{
cl_ushort *dPtr = (cl_ushort *)ptr;
tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
break;
}
case CL_UNORM_INT_101010:
{
cl_uint *dPtr = (cl_uint *)ptr;
tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
tempData[ 2 ] = (T)( dPtr[ 0 ] & 0x3ff );
break;
}
case CL_FLOAT:
{
cl_float *dPtr = (cl_float *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ];
break;
}
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
{
cl_float *dPtr = (cl_float *)ptr;
for( i = 0; i < get_format_channel_count( format ); i++ )
tempData[ i ] = (T)dPtr[ i ] + 0x4000;
break;
}
#endif
}
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
outData[ 3 ] = 1;
if( format->image_channel_order == CL_A )
{
outData[ 3 ] = tempData[ 0 ];
}
else if( format->image_channel_order == CL_R )
{
outData[ 0 ] = tempData[ 0 ];
}
else if( format->image_channel_order == CL_Rx )
{
outData[ 0 ] = tempData[ 0 ];
}
else if( format->image_channel_order == CL_RA )
{
outData[ 0 ] = tempData[ 0 ];
outData[ 3 ] = tempData[ 1 ];
}
else if( format->image_channel_order == CL_RG )
{
outData[ 0 ] = tempData[ 0 ];
outData[ 1 ] = tempData[ 1 ];
}
else if( format->image_channel_order == CL_RGx )
{
outData[ 0 ] = tempData[ 0 ];
outData[ 1 ] = tempData[ 1 ];
}
else if(( format->image_channel_order == CL_RGB ) || ( format->image_channel_order == CL_sRGB ))
{
outData[ 0 ] = tempData[ 0 ];
outData[ 1 ] = tempData[ 1 ];
outData[ 2 ] = tempData[ 2 ];
}
else if(( format->image_channel_order == CL_RGBx ) || ( format->image_channel_order == CL_sRGBx ))
{
outData[ 0 ] = tempData[ 0 ];
outData[ 1 ] = tempData[ 1 ];
outData[ 2 ] = tempData[ 2 ];
outData[ 3 ] = 0;
}
else if(( format->image_channel_order == CL_RGBA ) || ( format->image_channel_order == CL_sRGBA ))
{
outData[ 0 ] = tempData[ 0 ];
outData[ 1 ] = tempData[ 1 ];
outData[ 2 ] = tempData[ 2 ];
outData[ 3 ] = tempData[ 3 ];
}
else if( format->image_channel_order == CL_ARGB )
{
outData[ 0 ] = tempData[ 1 ];
outData[ 1 ] = tempData[ 2 ];
outData[ 2 ] = tempData[ 3 ];
outData[ 3 ] = tempData[ 0 ];
}
else if(( format->image_channel_order == CL_BGRA ) || ( format->image_channel_order == CL_sBGRA ))
{
outData[ 0 ] = tempData[ 2 ];
outData[ 1 ] = tempData[ 1 ];
outData[ 2 ] = tempData[ 0 ];
outData[ 3 ] = tempData[ 3 ];
}
else if( format->image_channel_order == CL_INTENSITY )
{
outData[ 1 ] = tempData[ 0 ];
outData[ 2 ] = tempData[ 0 ];
outData[ 3 ] = tempData[ 0 ];
}
else if( format->image_channel_order == CL_LUMINANCE )
{
outData[ 1 ] = tempData[ 0 ];
outData[ 2 ] = tempData[ 0 ];
}
else if( format->image_channel_order == CL_DEPTH )
{
outData[ 0 ] = tempData[ 0 ];
}
#ifdef CL_1RGB_APPLE
else if( format->image_channel_order == CL_1RGB_APPLE )
{
outData[ 0 ] = tempData[ 1 ];
outData[ 1 ] = tempData[ 2 ];
outData[ 2 ] = tempData[ 3 ];
outData[ 3 ] = 0xff;
}
#endif
#ifdef CL_BGR1_APPLE
else if( format->image_channel_order == CL_BGR1_APPLE )
{
outData[ 0 ] = tempData[ 2 ];
outData[ 1 ] = tempData[ 1 ];
outData[ 2 ] = tempData[ 0 ];
outData[ 3 ] = 0xff;
}
#endif
else
{
log_error("Invalid format:");
print_header(format, true);
}
}
template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
int x, int y, int z, T *outData )
{
read_image_pixel<T>( imageData, imageInfo, x, y, z, outData, 0);
}
// Stupid template rules
bool get_integer_coords( float x, float y, float z,
size_t width, size_t height, size_t depth,
image_sampler_data *imageSampler, image_descriptor *imageInfo,
int &outX, int &outY, int &outZ );
bool get_integer_coords_offset( float x, float y, float z,
float xAddressOffset, float yAddressOffset, float zAddressOffset,
size_t width, size_t height, size_t depth,
image_sampler_data *imageSampler, image_descriptor *imageInfo,
int &outX, int &outY, int &outZ );
template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
image_sampler_data *imageSampler, T *outData, int lod )
{
int iX = 0, iY = 0, iZ = 0;
float max_w = imageInfo->width;
float max_h;
float max_d;
switch (imageInfo->type) {
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
max_h = imageInfo->arraySize;
max_d = 0;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
max_h = imageInfo->height;
max_d = imageInfo->arraySize;
break;
default:
max_h = imageInfo->height;
max_d = imageInfo->depth;
break;
}
if( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
{
switch (imageInfo->type) {
case CL_MEM_OBJECT_IMAGE3D:
max_d = (float)((imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1);
case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
max_h = (float)((imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1);
break;
default:
;
}
max_w = (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
}
get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, max_w, max_h, max_d, imageSampler, imageInfo, iX, iY, iZ );
read_image_pixel<T>( imageData, imageInfo, iX, iY, iZ, outData, lod );
}
template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
image_sampler_data *imageSampler, T *outData)
{
sample_image_pixel_offset<T>( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
imageSampler, outData, 0);
}
template <class T> void sample_image_pixel( void *imageData, image_descriptor *imageInfo,
float x, float y, float z, image_sampler_data *imageSampler, T *outData )
{
return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData);
}
FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
extern void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData );
extern void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData );
extern void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData );
extern void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results, float *errors );
extern char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer = false );
// deprecated
//extern bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue );
extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
extern int issubnormal(float);
#define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
static inline cl_uint abs_diff_uint( cl_uint x, cl_uint y )
{
return y > x ? y - x : x - y;
}
static inline cl_uint abs_diff_int( cl_int x, cl_int y )
{
return (cl_uint) (y > x ? y - x : x - y);
}
static inline cl_float relative_error( float test, float expected )
{
// 0-0/0 is 0 in this case, not NaN
if( test == 0.0f && expected == 0.0f )
return 0.0f;
return (test - expected) / expected;
}
extern float random_float(float low, float high);
class CoordWalker
{
public:
CoordWalker( void * coords, bool useFloats, size_t vecSize );
~CoordWalker();
cl_float Get( size_t idx, size_t el );
protected:
cl_float * mFloatCoords;
cl_int * mIntCoords;
size_t mVecSize;
};
extern int DetectFloatToHalfRoundingMode( cl_command_queue ); // Returns CL_SUCCESS on success
int inline is_half_nan( cl_ushort half ){ return (half & 0x7fff) > 0x7c00; }
cl_ushort convert_float_to_half( cl_float f );
cl_float convert_half_to_float( cl_ushort h );
extern double sRGBmap(float fc);
#endif // _imageHelpers_h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,133 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _kernelHelpers_h
#define _kernelHelpers_h
#include "compat.h"
#include <stdio.h>
#include <stdlib.h>
#if defined (__MINGW32__)
#include <malloc.h>
#endif
#include <string.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
/*
* The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
*
* const char *source = {
* INIT_OPENCL_DEBUG_INFO
* "__kernel void foo( int x )\n"
* "{\n"
* " ...\n"
* "}\n"
* };
*/
#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
#define SET_OPENCL_LINE_INFO(_line, _file) "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
#ifndef STRINGIFY_VALUE
#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
#endif
#ifndef STRINGIFY
#define STRINGIFY(_x) #_x
#endif
const int MAX_LEN_FOR_KERNEL_LIST = 20;
/* Helper that creates a single program and kernel from a single-kernel program source */
extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const char *buildOptions=NULL );
extern int create_single_kernel_helper_with_build_options( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines,
const char **kernelProgram, const char *kernelName, const char *buildOptions );
extern int create_single_kernel_helper_create_program(cl_context context, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, const char *buildOptions = NULL);
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
/* Helper to get major/minor number for a device */
extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
/* Helper to determine if an extension is supported by a device */
extern int is_extension_available( cl_device_id device, const char *extensionName );
/* Helper to determine if a device supports an image format */
extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
/* Helper to get pixel size for a pixel format */
size_t get_pixel_bytes( const cl_image_format *fmt );
/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
extern int verifyImageSupport( cl_device_id device );
/* Checks that the given device supports images. Same as verify, but doesn't print an error */
extern int checkForImageSupport( cl_device_id device );
extern int checkFor3DImageSupport( cl_device_id device );
/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
/* Helper for aligned memory allocation */
void * align_malloc(size_t size, size_t alignment);
void align_free(void *);
/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
size_t get_min_alignment(cl_context context);
/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
cl_device_fp_config get_default_rounding_mode( cl_device_id device );
#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) \
if( checkForImageSupport( device ) ) \
{ \
log_info( "\n\tNote: device does not support images. Skipping test...\n" ); \
return 0; \
}
#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) \
if( checkFor3DImageSupport( device ) ) \
{ \
log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" ); \
return 0; \
}
/* Prints out the standard device header for all tests given the device to print for */
extern int printDeviceHeader( cl_device_id device );
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // _kernelHelpers_h

View File

@@ -0,0 +1,59 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#if defined(__MINGW32__)
#include "mingw_compat.h"
#include <stdio.h>
#include <string.h>
//This function is unavailable on various mingw compilers,
//especially 64 bit so implementing it here
const char *basename_dot=".";
char*
basename(char *path)
{
char *p = path, *b = NULL;
int len = strlen(path);
if (path == NULL) {
return (char*)basename_dot;
}
// Not absolute path on windows
if (path[1] != ':') {
return path;
}
// Trim trailing path seperators
if (path[len - 1] == '\\' ||
path[len - 1] == '/' ) {
len--;
path[len] = '\0';
}
while (len) {
while((*p != '\\' || *p != '/') && len) {
p++;
len--;
}
p++;
b = p;
}
return b;
}
#endif

View File

@@ -0,0 +1,31 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef MINGW_COMPAT_H
#define MINGW_COMPAT_H
#if defined(__MINGW32__)
char *basename(char *path);
#include <malloc.h>
#if defined(__MINGW64__)
//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
#define __mingw_aligned_malloc _aligned_malloc
#define __mingw_aligned_free _aligned_free
#include <stddef.h>
#endif //(__MINGW64__)
#endif //(__MINGW32__)
#endif // MINGW_COMPAT_H

772
test_common/harness/msvc9.c Normal file
View File

@@ -0,0 +1,772 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "compat.h"
#if defined ( _MSC_VER )
#include <limits.h>
#include <stdlib.h>
#include <CL/cl.h>
#include <windows.h>
#if ! defined( __INTEL_COMPILER )
///////////////////////////////////////////////////////////////////
//
// rint, rintf
//
///////////////////////////////////////////////////////////////////
float copysignf( float x, float y )
{
union{ cl_uint u; float f; }ux, uy;
ux.f = x;
uy.f = y;
ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
return ux.f;
}
double copysign( double x, double y )
{
union{ cl_ulong u; double f; }ux, uy;
ux.f = x;
uy.f = y;
ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
return ux.f;
}
long double copysignl( long double x, long double y )
{
union
{
long double f;
struct{ cl_ulong m; cl_ushort sexp; }u;
}ux, uy;
ux.f = x;
uy.f = y;
ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
return ux.f;
}
float rintf(float x)
{
float absx = fabsf(x);
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
float rounded = x + magic;
rounded -= magic;
x = copysignf( rounded, x );
}
return x;
}
double rint(double x)
{
double absx = fabs(x);
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
{
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
double rounded = x + magic;
rounded -= magic;
x = copysign( rounded, x );
}
return x;
}
long double rintl(long double x)
{
double absx = fabs(x);
if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
{
long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
long double rounded = x + magic;
rounded -= magic;
x = copysignl( rounded, x );
}
return x;
}
///////////////////////////////////////////////////////////////////
//
// ilogb, ilogbf, ilogbl
//
///////////////////////////////////////////////////////////////////
#ifndef FP_ILOGB0
#define FP_ILOGB0 INT_MIN
#endif
#ifndef FP_ILOGBNAN
#define FP_ILOGBNAN INT_MIN
#endif
int ilogb (double x)
{
union{ double f; cl_ulong u;} u;
u.f = x;
cl_ulong absx = u.u & CL_LONG_MAX;
if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
{
switch( absx )
{
case 0:
return FP_ILOGB0;
case 0x7ff0000000000000ULL:
return INT_MAX;
default:
if( absx > 0x7ff0000000000000ULL )
return FP_ILOGBNAN;
// subnormal
u.u = absx | 0x3ff0000000000000ULL;
u.f -= 1.0;
return (u.u >> 52) - (1023 + 1022);
}
}
return (absx >> 52) - 1023;
}
int ilogbf (float x)
{
union{ float f; cl_uint u;} u;
u.f = x;
cl_uint absx = u.u & 0x7fffffff;
if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
{
switch( absx )
{
case 0:
return FP_ILOGB0;
case 0x7f800000U:
return INT_MAX;
default:
if( absx > 0x7f800000 )
return FP_ILOGBNAN;
// subnormal
u.u = absx | 0x3f800000U;
u.f -= 1.0f;
return (u.u >> 23) - (127 + 126);
}
}
return (absx >> 23) - 127;
}
int ilogbl (long double x)
{
union
{
long double f;
struct{ cl_ulong m; cl_ushort sexp; }u;
} u;
u.f = x;
int exp = u.u.sexp & 0x7fff;
if( 0 == exp )
{
if( 0 == u.u.m )
return FP_ILOGB0;
//subnormal
u.u.sexp = 0x3fff;
u.f -= 1.0f;
exp = u.u.sexp & 0x7fff;
return exp - (0x3fff + 0x3ffe);
}
else if( 0x7fff == exp )
{
if( u.u.m & CL_LONG_MAX )
return FP_ILOGBNAN;
return INT_MAX;
}
return exp - 0x3fff;
}
///////////////////////////////////////////////////////////////////
//
// fmax, fmin, fmaxf, fminf
//
///////////////////////////////////////////////////////////////////
static void GET_BITS_SP32(float fx, unsigned int* ux)
{
volatile union {float f; unsigned int u;} _bitsy;
_bitsy.f = (fx);
*ux = _bitsy.u;
}
/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
/* { */
/* volatile union {float f; unsigned int i;} _bitsy; */
/* _bitsy.f = (fx); */
/* *ux = _bitsy.i; */
/* } */
static void PUT_BITS_SP32(unsigned int ux, float* fx)
{
volatile union {float f; unsigned int u;} _bitsy;
_bitsy.u = (ux);
*fx = _bitsy.f;
}
/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
/* { */
/* volatile union {float f; unsigned int i;} _bitsy; */
/* _bitsy.i = (ux); */
/* *fx = _bitsy.f; */
/* } */
static void GET_BITS_DP64(double dx, unsigned __int64* lx)
{
volatile union {double d; unsigned __int64 l;} _bitsy;
_bitsy.d = (dx);
*lx = _bitsy.l;
}
static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
{
volatile union {double d; unsigned __int64 l;} _bitsy;
_bitsy.l = (lx);
*dx = _bitsy.d;
}
#if 0
int SIGNBIT_DP64(double x )
{
int hx;
_GET_HIGH_WORD(hx,x);
return((hx>>31));
}
#endif
/* fmax(x, y) returns the larger (more positive) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
/* This works so long as the compiler knows that (x != x) means
that x is NaN; gcc does. */
double fmax(double x, double y)
{
if( isnan(y) )
return x;
return x >= y ? x : y;
}
/* fmin(x, y) returns the smaller (more negative) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
double fmin(double x, double y)
{
if( isnan(y) )
return x;
return x <= y ? x : y;
}
float fmaxf( float x, float y )
{
if( isnan(y) )
return x;
return x >= y ? x : y;
}
/* fminf(x, y) returns the smaller (more negative) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
float fminf(float x, float y)
{
if( isnan(y) )
return x;
return x <= y ? x : y;
}
long double scalblnl(long double x, long n)
{
union
{
long double d;
struct{ cl_ulong m; cl_ushort sexp;}u;
}u;
u.u.m = CL_LONG_MIN;
if( x == 0.0L || n < -2200)
return copysignl( 0.0L, x );
if( n > 2200 )
return INFINITY;
if( n < 0 )
{
u.u.sexp = 0x3fff - 1022;
while( n <= -1022 )
{
x *= u.d;
n += 1022;
}
u.u.sexp = 0x3fff + n;
x *= u.d;
return x;
}
if( n > 0 )
{
u.u.sexp = 0x3fff + 1023;
while( n >= 1023 )
{
x *= u.d;
n -= 1023;
}
u.u.sexp = 0x3fff + n;
x *= u.d;
return x;
}
return x;
}
///////////////////////////////////////////////////////////////////
//
// log2
//
///////////////////////////////////////////////////////////////////
const static cl_double log_e_base2 = 1.4426950408889634074;
const static cl_double log_10_base2 = 3.3219280948873623478;
//double log10(double x);
double log2(double x)
{
return 1.44269504088896340735992468100189214 * log(x);
}
long double log2l(long double x)
{
return 1.44269504088896340735992468100189214L * log(x);
}
double trunc(double x)
{
double absx = fabs(x);
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
{
cl_long rounded = x;
x = copysign( (double) rounded, x );
}
return x;
}
float truncf(float x)
{
float absx = fabsf(x);
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
cl_int rounded = x;
x = copysignf( (float) rounded, x );
}
return x;
}
long lround(double x)
{
double absx = fabs(x);
if( absx < 0.5 )
return 0;
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
{
absx += 0.5;
cl_long rounded = absx;
absx = rounded;
x = copysign( absx, x );
}
if( x >= (double) LONG_MAX )
return LONG_MAX;
return (long) x;
}
long lroundf(float x)
{
float absx = fabsf(x);
if( absx < 0.5f )
return 0;
if( absx < 8388608.0f )
{
absx += 0.5f;
cl_int rounded = absx;
absx = rounded;
x = copysignf( absx, x );
}
if( x >= (float) LONG_MAX )
return LONG_MAX;
return (long) x;
}
double round(double x)
{
double absx = fabs(x);
if( absx < 0.5 )
return copysign( 0.0, x);
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
{
absx += 0.5;
cl_long rounded = absx;
absx = rounded;
x = copysign( absx, x );
}
return x;
}
float roundf(float x)
{
float absx = fabsf(x);
if( absx < 0.5f )
return copysignf( 0.0f, x);
if( absx < 8388608.0f )
{
absx += 0.5f;
cl_int rounded = absx;
absx = rounded;
x = copysignf( absx, x );
}
return x;
}
long double roundl(long double x)
{
long double absx = fabsl(x);
if( absx < 0.5L )
return copysignl( 0.0L, x);
if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
{
absx += 0.5L;
cl_ulong rounded = absx;
absx = rounded;
x = copysignl( absx, x );
}
return x;
}
float cbrtf( float x )
{
float z = pow( fabs((double) x), 1.0 / 3.0 );
return copysignf( z, x );
}
double cbrt( double x )
{
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
}
long int lrint (double x)
{
double absx = fabs(x);
if( x >= (double) LONG_MAX )
return LONG_MAX;
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
{
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
double rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
long int lrintf (float x)
{
float absx = fabsf(x);
if( x >= (float) LONG_MAX )
return LONG_MAX;
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
float rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
///////////////////////////////////////////////////////////////////
//
// fenv functions
//
///////////////////////////////////////////////////////////////////
int fetestexcept(int excepts)
{
unsigned int status = _statusfp();
return excepts & (
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
((status & _SW_INVALID) ? FE_INVALID : 0)
);
}
int feclearexcept(int excepts)
{
_clearfp();
return 0;
}
#endif // __INTEL_COMPILER
#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
float make_nan()
{
/* This is the IEEE 754 single-precision format:
unsigned int mantissa: 22;
unsigned int quiet_nan: 1;
unsigned int exponent: 8;
unsigned int negative: 1;
*/
//const static unsigned
static const int32_t _nan = 0x7fc00000;
return *(const float*)(&_nan);
}
float nanf( const char* str)
{
cl_uint u = atoi( str );
u |= 0x7fc00000U;
return *( float*)(&u);
}
double nan( const char* str)
{
cl_ulong u = atoi( str );
u |= 0x7ff8000000000000ULL;
return *( double*)(&u);
}
// double check this implementatation
long double nanl( const char* str)
{
union
{
long double f;
struct { cl_ulong m; cl_ushort sexp; }u;
}u;
u.u.sexp = 0x7fff;
u.u.m = 0x8000000000000000ULL | atoi( str );
return u.f;
}
#endif
///////////////////////////////////////////////////////////////////
//
// misc functions
//
///////////////////////////////////////////////////////////////////
/*
// This function is commented out because the Windows implementation should never call munmap.
// If it is calling it, we have a bug. Please file a bugzilla.
int munmap(void *addr, size_t len)
{
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
return (int)VirtualAlloc( (LPVOID)addr, len,
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
}
*/
uint64_t ReadTime( void )
{
LARGE_INTEGER current;
QueryPerformanceCounter(&current);
return (uint64_t)current.QuadPart;
}
double SubtractTime( uint64_t endTime, uint64_t startTime )
{
static double PerformanceFrequency = 0.0;
if (PerformanceFrequency == 0.0) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
PerformanceFrequency = (double) frequency.QuadPart;
}
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
}
int cf_signbit(double x)
{
union
{
double f;
cl_ulong u;
}u;
u.f = x;
return u.u >> 63;
}
int cf_signbitf(float x)
{
union
{
float f;
cl_uint u;
}u;
u.f = x;
return u.u >> 31;
}
float int2float (int32_t ix)
{
union {
float f;
int32_t i;
} u;
u.i = ix;
return u.f;
}
int32_t float2int (float fx)
{
union {
float f;
int32_t i;
} u;
u.f = fx;
return u.i;
}
#if !defined(_WIN64)
/** Returns the number of leading 0-bits in x,
starting at the most significant bit position.
If x is 0, the result is undefined.
*/
int __builtin_clz(unsigned int pattern)
{
#if 0
int res;
__asm {
mov eax, pattern
bsr eax, eax
mov res, eax
}
return 31 - res;
#endif
unsigned long index;
unsigned char res = _BitScanReverse( &index, pattern);
if (res) {
return 8*sizeof(int) - 1 - index;
} else {
return 8*sizeof(int);
}
}
#else
int __builtin_clz(unsigned int pattern)
{
int count;
if (pattern == 0u) {
return 32;
}
count = 31;
if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
if (pattern >= 1u<<1) { count -= 1; }
return count;
}
#endif // !defined(_WIN64)
#include <intrin.h>
#include <emmintrin.h>
int usleep(int usec)
{
Sleep((usec + 999) / 1000);
return 0;
}
unsigned int sleep( unsigned int sec )
{
Sleep( sec * 1000 );
return 0;
}
#endif // defined( _MSC_VER )

View File

@@ -0,0 +1,280 @@
/*
A C-program for MT19937, with initialization improved 2002/1/26.
Coded by Takuji Nishimura and Makoto Matsumoto.
Before using, initialize the state by using init_genrand(seed)
or init_by_array(init_key, key_length).
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The names of its contributors may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Any feedback is very welcome.
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
*/
#include <stdio.h>
#include <stdlib.h>
#include "mt19937.h"
#include "mingw_compat.h"
#ifdef __SSE2__
#include <emmintrin.h>
#endif
static void * align_malloc(size_t size, size_t alignment)
{
#if defined(_WIN32) && defined(_MSC_VER)
return _aligned_malloc(size, alignment);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
void * ptr = NULL;
#if defined(__ANDROID__)
ptr = memalign(alignment, size);
if ( ptr )
return ptr;
#else
if (0 == posix_memalign(&ptr, alignment, size))
return ptr;
#endif
return NULL;
#elif defined(__MINGW32__)
return __mingw_aligned_malloc(size, alignment);
#else
#error "Please add support OS for aligned malloc"
#endif
}
static void align_free(void * ptr)
{
#if defined(_WIN32) && defined(_MSC_VER)
_aligned_free(ptr);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
return free(ptr);
#elif defined(__MINGW32__)
return __mingw_aligned_free(ptr);
#else
#error "Please add support OS for aligned free"
#endif
}
/* Period parameters */
#define N 624 /* vector code requires multiple of 4 here */
#define M 397
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
typedef struct _MTdata
{
cl_uint mt[N];
#ifdef __SSE2__
cl_uint cache[N];
#endif
cl_int mti;
}_MTdata;
/* initializes mt[N] with a seed */
MTdata init_genrand(cl_uint s)
{
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
if( NULL != r )
{
cl_uint *mt = r->mt;
int mti = 0;
mt[0]= s; // & 0xffffffffUL;
for (mti=1; mti<N; mti++) {
mt[mti] = (cl_uint)
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
/* In the previous versions, MSBs of the seed affect */
/* only MSBs of the array mt[]. */
/* 2002/01/09 modified by Makoto Matsumoto */
// mt[mti] &= 0xffffffffUL;
/* for >32 bit machines */
}
r->mti = mti;
}
return r;
}
void free_mtdata( MTdata d )
{
if(d)
align_free(d);
}
/* generates a random number on [0,0xffffffff]-interval */
cl_uint genrand_int32( MTdata d)
{
/* mag01[x] = x * MATRIX_A for x=0,1 */
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
#ifdef __SSE2__
static volatile int init = 0;
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
#endif
cl_uint *mt = d->mt;
cl_uint y;
if (d->mti == N)
{ /* generate N words at one time */
int kk;
#ifdef __SSE2__
if( 0 == init )
{
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
init = 1;
}
#endif
kk = 0;
#ifdef __SSE2__
// vector loop
for( ; kk + 4 <= N-M; kk += 4 )
{
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
_mm_store_si128( (__m128i*) (mt + kk ), vr );
}
#endif
for ( ;kk<N-M;kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
#ifdef __SSE2__
// advance to next aligned location
for (;kk<N-1 && (kk & 3);kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
// vector loop
for( ; kk + 4 <= N-1; kk += 4 )
{
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
_mm_store_si128( (__m128i*) (mt + kk ), vr );
}
#endif
for (;kk<N-1;kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
#ifdef __SSE2__
// Do the tempering ahead of time in vector code
for( kk = 0; kk + 4 <= N; kk += 4 )
{
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
}
#endif
d->mti = 0;
}
#ifdef __SSE2__
y = d->cache[d->mti++];
#else
y = mt[d->mti++];
/* Tempering */
y ^= (y >> 11);
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
y ^= (y >> 18);
#endif
return y;
}
cl_ulong genrand_int64( MTdata d)
{
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
}
/* generates a random number on [0,1]-real-interval */
double genrand_real1(MTdata d)
{
return genrand_int32(d)*(1.0/4294967295.0);
/* divided by 2^32-1 */
}
/* generates a random number on [0,1)-real-interval */
double genrand_real2(MTdata d)
{
return genrand_int32(d)*(1.0/4294967296.0);
/* divided by 2^32 */
}
/* generates a random number on (0,1)-real-interval */
double genrand_real3(MTdata d)
{
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
/* divided by 2^32 */
}
/* generates a random number on [0,1) with 53-bit resolution*/
double genrand_res53(MTdata d)
{
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
return(a*67108864.0+b)*(1.0/9007199254740992.0);
}

View File

@@ -0,0 +1,99 @@
/*
* mt19937.h
*
* Mersenne Twister.
*
A C-program for MT19937, with initialization improved 2002/1/26.
Coded by Takuji Nishimura and Makoto Matsumoto.
Before using, initialize the state by using init_genrand(seed)
or init_by_array(init_key, key_length).
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The names of its contributors may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Any feedback is very welcome.
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
*/
#ifndef MT19937_H
#define MT19937_H 1
#if defined( __APPLE__ )
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
* Interfaces here have been modified from original sources so that they
* are safe to call reentrantly, so long as a different MTdata is used
* on each thread.
*/
typedef struct _MTdata *MTdata;
/* Create the random number generator with seed */
MTdata init_genrand( cl_uint /*seed*/ );
/* release memory used by a MTdata private data */
void free_mtdata( MTdata /*data*/ );
/* generates a random number on [0,0xffffffff]-interval */
cl_uint genrand_int32( MTdata /*data*/);
/* generates a random number on [0,0xffffffffffffffffULL]-interval */
cl_ulong genrand_int64( MTdata /*data*/);
/* generates a random number on [0,1]-real-interval */
double genrand_real1( MTdata /*data*/);
/* generates a random number on [0,1)-real-interval */
double genrand_real2( MTdata /*data*/);
/* generates a random number on (0,1)-real-interval */
double genrand_real3( MTdata /*data*/);
/* generates a random number on [0,1) with 53-bit resolution*/
double genrand_res53( MTdata /*data*/ );
#ifdef __cplusplus
}
#endif
#endif /* MT19937_H */

View File

@@ -0,0 +1,564 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "os_helpers.h"
#include "errorHelpers.h"
// =================================================================================================
// C++ interface.
// =================================================================================================
#include <cerrno> // errno, error constants
#include <climits> // PATH_MAX
#include <cstdlib> // abort, _splitpath, _makepath
#include <cstring> // strdup, strerror_r
#include <sstream>
#include <vector>
#define CHECK_PTR( ptr ) \
if ( (ptr) == NULL ) { \
abort(); \
}
typedef std::vector< char > buffer_t;
#if ! defined( PATH_MAX )
#define PATH_MAX 1000
#endif
int const _size = PATH_MAX + 1; // Initial buffer size for path.
int const _count = 8; // How many times we will try to double buffer size.
// -------------------------------------------------------------------------------------------------
// MacOS X
// -------------------------------------------------------------------------------------------------
#if defined( __APPLE__ )
#include <mach-o/dyld.h> // _NSGetExecutablePath
#include <libgen.h> // dirname
static
std::string
_err_msg(
int err, // Error number (e. g. errno).
int level // Nesting level, for avoiding infinite recursion.
) {
/*
There are 3 incompatible versions of strerror_r:
char * strerror_r( int, char *, size_t ); // GNU version
int strerror_r( int, char *, size_t ); // BSD version
int strerror_r( int, char *, size_t ); // XSI version
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
*/
// BSD version of strerror_r.
buffer_t buffer( 100 );
int count = _count;
for ( ; ; ) {
int rc = strerror_r( err, & buffer.front(), buffer.size() );
if ( rc == EINVAL ) {
// Error code is not recognized, but anyway we got the message.
return & buffer.front();
} else if ( rc == ERANGE ) {
// Buffer is not enough.
if ( count > 0 ) {
// Enlarge the buffer.
-- count;
buffer.resize( buffer.size() * 2 );
} else {
std::stringstream ostr;
ostr
<< "Error " << err << " "
<< "(Getting error message failed: "
<< "Buffer of " << buffer.size() << " bytes is still too small"
<< ")";
return ostr.str();
}; // if
} else if ( rc == 0 ) {
// We got the message.
return & buffer.front();
} else {
std::stringstream ostr;
ostr
<< "Error " << err << " "
<< "(Getting error message failed: "
<< ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
<< ")";
return ostr.str();
}; // if
}; // forever
} // _err_msg
std::string
dir_sep(
) {
return "/";
} // dir_sep
std::string
exe_path(
) {
buffer_t path( _size );
int count = _count;
for ( ; ; ) {
uint32_t size = path.size();
int rc = _NSGetExecutablePath( & path.front(), & size );
if ( rc == 0 ) {
break;
}; // if
if ( count > 0 ) {
-- count;
path.resize( size );
} else {
log_error(
"ERROR: Getting executable path failed: "
"_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
(unsigned long) path.size()
);
exit( 2 );
}; // if
}; // forever
return & path.front();
} // exe_path
std::string
exe_dir(
) {
std::string path = exe_path();
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
return dirname( & buffer.front() );
} // exe_dir
#endif // __APPLE__
// -------------------------------------------------------------------------------------------------
// Linux
// -------------------------------------------------------------------------------------------------
#if defined( __linux__ )
#include <cerrno> // errno
#include <libgen.h> // dirname
#include <unistd.h> // readlink
static
std::string
_err_msg(
int err,
int level
) {
/*
There are 3 incompatible versions of strerror_r:
char * strerror_r( int, char *, size_t ); // GNU version
int strerror_r( int, char *, size_t ); // BSD version
int strerror_r( int, char *, size_t ); // XSI version
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
*/
#if defined(__ANDROID__) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
// XSI version of strerror_r.
#warning Not tested!
buffer_t buffer( 200 );
int count = _count;
for ( ; ; ) {
int rc = strerror_r( err, & buffer.front(), buffer.size() );
if ( rc == -1 ) {
int _err = errno;
if ( _err == ERANGE ) {
if ( count > 0 ) {
// Enlarge the buffer.
-- count;
buffer.resize( buffer.size() * 2 );
} else {
std::stringstream ostr;
ostr
<< "Error " << err << " "
<< "(Getting error message failed: "
<< "Buffer of " << buffer.size() << " bytes is still too small"
<< ")";
return ostr.str();
}; // if
} else {
std::stringstream ostr;
ostr
<< "Error " << err << " "
<< "(Getting error message failed: "
<< ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
<< ")";
return ostr.str();
}; // if
} else {
// We got the message.
return & buffer.front();
}; // if
}; // forever
#else
// GNU version of strerror_r.
char buffer[ 2000 ];
return strerror_r( err, buffer, sizeof( buffer ) );
#endif
} // _err_msg
std::string
dir_sep(
) {
return "/";
} // dir_sep
std::string
exe_path(
) {
static std::string const exe = "/proc/self/exe";
buffer_t path( _size );
int count = _count; // Max number of iterations.
for ( ; ; ) {
ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
if ( len < 0 ) {
// Oops.
int err = errno;
log_error(
"ERROR: Getting executable path failed: "
"Reading symlink `%s' failed: %s\n",
exe.c_str(), err_msg( err ).c_str()
);
exit( 2 );
}; // if
if ( len < path.size() ) {
// We got the path.
path.resize( len );
break;
}; // if
// Oops, buffer is too small.
if ( count > 0 ) {
-- count;
// Enlarge the buffer.
path.resize( path.size() * 2 );
} else {
log_error(
"ERROR: Getting executable path failed: "
"Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
exe.c_str(),
(unsigned long) path.size()
);
exit( 2 );
}; // if
}; // forever
return std::string( & path.front(), path.size() );
} // exe_path
std::string
exe_dir(
) {
std::string path = exe_path();
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
return dirname( & buffer.front() );
} // exe_dir
#endif // __linux__
// -------------------------------------------------------------------------------------------------
// MS Windows
// -------------------------------------------------------------------------------------------------
#if defined( _WIN32 )
#include <windows.h>
#if defined( max )
#undef max
#endif
#include <cctype>
#include <algorithm>
static
std::string
_err_msg(
int err,
int level
) {
std::string msg;
LPSTR buffer = NULL;
DWORD flags =
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS;
DWORD len =
FormatMessageA(
flags,
NULL,
err,
LANG_USER_DEFAULT,
reinterpret_cast< LPSTR >( & buffer ),
0,
NULL
);
if ( buffer == NULL || len == 0 ) {
int _err = GetLastError();
char str[1024] = { 0 };
snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
msg = std::string(str);
} else {
// Trim trailing whitespace (including `\r' and `\n').
while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
-- len;
}; // while
// Drop trailing full stop.
if ( len > 0 && buffer[ len - 1 ] == '.' ) {
-- len;
}; // if
msg.assign( buffer, len );
}; //if
if ( buffer != NULL ) {
LocalFree( buffer );
}; // if
return msg;
} // _get_err_msg
std::string
dir_sep(
) {
return "\\";
} // dir_sep
std::string
exe_path(
) {
buffer_t path( _size );
int count = _count;
for ( ; ; ) {
DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
if ( len == 0 ) {
int err = GetLastError();
log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
exit( 2 );
}; // if
if ( len < path.size() ) {
path.resize( len );
break;
}; // if
// Buffer too small.
if ( count > 0 ) {
-- count;
path.resize( path.size() * 2 );
} else {
log_error(
"ERROR: Getting executable path failed: "
"Buffer of %lu bytes is still too small\n",
(unsigned long) path.size()
);
exit( 2 );
}; // if
}; // forever
return std::string( & path.front(), path.size() );
} // exe_path
std::string
exe_dir(
) {
std::string exe = exe_path();
int count = 0;
// Splitting path into components.
buffer_t drv( _MAX_DRIVE );
buffer_t dir( _MAX_DIR );
count = _count;
#if defined(_MSC_VER)
for ( ; ; ) {
int rc =
_splitpath_s(
exe.c_str(),
& drv.front(), drv.size(),
& dir.front(), dir.size(),
NULL, 0, // We need neither name
NULL, 0 // nor extension
);
if ( rc == 0 ) {
break;
} else if ( rc == ERANGE ) {
if ( count > 0 ) {
-- count;
// Buffer is too small, but it is not clear which one.
// So we have to enlarge all.
drv.resize( drv.size() * 2 );
dir.resize( dir.size() * 2 );
} else {
log_error(
"ERROR: Getting executable path failed: "
"Splitting path `%s' to components failed: "
"Buffers of %lu and %lu bytes are still too small\n",
exe.c_str(),
(unsigned long) drv.size(),
(unsigned long) dir.size()
);
exit( 2 );
}; // if
} else {
log_error(
"ERROR: Getting executable path failed: "
"Splitting path `%s' to components failed: %s\n",
exe.c_str(),
err_msg( rc ).c_str()
);
exit( 2 );
}; // if
}; // forever
#else // __MINGW32__
// MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
_splitpath(
exe.c_str(),
& drv.front(),
& dir.front(),
NULL, // We need neither name
NULL // nor extension
);
#endif // __MINGW32__
// Combining components back to path.
// I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
// ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
// So let us try to guess the size of result and go with insecure `_makepath'.
buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
_makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
return & path.front();
} // exe_dir
#endif // _WIN32
std::string
err_msg(
int err
) {
return _err_msg( err, 0 );
} // err_msg
// =================================================================================================
// C interface.
// =================================================================================================
char *
get_err_msg(
int err
) {
char * msg = strdup( err_msg( err ).c_str() );
CHECK_PTR( msg );
return msg;
} // get_err_msg
char *
get_dir_sep(
) {
char * sep = strdup( dir_sep().c_str() );
CHECK_PTR( sep );
return sep;
} // get_dir_sep
char *
get_exe_path(
) {
char * path = strdup( exe_path().c_str() );
CHECK_PTR( path );
return path;
} // get_exe_path
char *
get_exe_dir(
) {
char * dir = strdup( exe_dir().c_str() );
CHECK_PTR( dir );
return dir;
} // get_exe_dir
// end of file //

View File

@@ -0,0 +1,53 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __os_helpers_h__
#define __os_helpers_h__
#include "compat.h"
// -------------------------------------------------------------------------------------------------
// C++ interface.
// -------------------------------------------------------------------------------------------------
#ifdef __cplusplus
#include <string>
std::string err_msg( int err );
std::string dir_sep();
std::string exe_path();
std::string exe_dir();
#endif // __cplusplus
// -------------------------------------------------------------------------------------------------
// C interface.
// -------------------------------------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
char * get_err_msg( int err ); // Returns system error message. Subject to free.
char * get_dir_sep(); // Returns dir separator. Subject to free.
char * get_exe_path(); // Returns path of current executable. Subject to free.
char * get_exe_dir(); // Returns dir of current executable. Subject to free.
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus
#endif // __os_helpers_h__

View File

@@ -0,0 +1,130 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "parseParameters.h"
#include "errorHelpers.h"
#include "testHarness.h"
#include "ThreadPool.h"
#include <iostream>
#include <sstream>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
using namespace std;
bool gOfflineCompiler = false;
bool gForceSpirVCache = false;
bool gForceSpirVGenerate = false;
std::string gSpirVPath = ".";
OfflineCompilerOutputType gOfflineCompilerOutputType;
void helpInfo ()
{
log_info(" '-offlineCompiler <output_type:binary|source|spir_v>': use offline compiler\n");
log_info(" ' output_type binary - \"../build_script_binary.py\" is invoked\n");
log_info(" ' output_type source - \"../build_script_source.py\" is invoked\n");
log_info(" ' output_type spir_v <mode:generate|cache> - \"../cl_build_script_spir_v.py\" is invoked\n, optional modes: generate, cache");
log_info(" ' mode generate <path> - force binary generation");
log_info(" ' mode cache <path> - force reading binary files from cache");
log_info("\n");
}
int parseCustomParam (int argc, const char *argv[], const char *ignore)
{
int delArg = 0;
for (int i=1; i<argc; i++)
{
if(ignore != 0)
{
// skip parameters that require special/different treatment in application
// (generic interpretation and parameter removal will not be performed)
const char * ptr = strstr(ignore, argv[i]);
if(ptr != 0 &&
(ptr == ignore || ptr[-1] == ' ') && //first on list or ' ' before
(ptr[strlen(argv[i])] == 0 || ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
continue;
}
if (i < 0) i = 0;
delArg = 0;
if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
helpInfo ();
else if (!strcmp(argv[i], "-offlineCompiler"))
{
log_info(" Offline Compiler enabled\n");
delArg = 1;
if ((i + 1) < argc)
{
gOfflineCompiler = true;
if (!strcmp(argv[i + 1], "binary"))
{
gOfflineCompilerOutputType = kBinary;
delArg++;
}
else if (!strcmp(argv[i + 1], "source"))
{
gOfflineCompilerOutputType = kSource;
delArg++;
}
else if (!strcmp(argv[i + 1], "spir_v"))
{
gOfflineCompilerOutputType = kSpir_v;
delArg++;
if ((i + 3) < argc)
{
if (!strcmp(argv[i + 2], "cache"))
{
gForceSpirVCache = true;
gSpirVPath = argv[i + 3];
log_info(" SpirV reading from cache enabled.\n");
delArg += 2;
}
else if (!strcmp(argv[i + 2], "generate"))
{
gForceSpirVGenerate = true;
gSpirVPath = argv[i + 3];
log_info(" SpirV force generate binaries enabled.\n");
delArg += 2;
}
}
}
else
{
log_error(" Offline Compiler output type not supported: %s\n", argv[i + 1]);
return -1;
}
}
else
{
log_error(" Offline Compiler parameters are incorrect. Usage:\n");
log_error(" -offlineCompiler <input> <output> <output_type:binary | source | spir_v>\n");
return -1;
}
}
//cleaning parameters from argv tab
for (int j=i; j<argc-delArg; j++)
argv[j] = argv[j+delArg];
argc -= delArg ;
i -= delArg;
}
return argc;
}

View File

@@ -0,0 +1,37 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _parseParameters_h
#define _parseParameters_h
#include <string>
extern bool gOfflineCompiler;
extern bool gForceSpirVCache;
extern bool gForceSpirVGenerate;
extern std::string gSpirVPath;
enum OfflineCompilerOutputType
{
kBinary = 0,
kSource,
kSpir_v
};
extern OfflineCompilerOutputType gOfflineCompilerOutputType;
extern int parseCustomParam (int argc, const char *argv[], const char *ignore = 0 );
#endif // _parseParameters_h

View File

@@ -0,0 +1,49 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _ref_counting_h
#define _ref_counting_h
#define MARK_REF_COUNT_BASE( c, type, bigType ) \
cl_uint c##_refCount; \
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
test_error( error, "Unable to check reference count for " #type );
#define TEST_REF_COUNT_BASE( c, type, bigType ) \
cl_uint c##_refCount_new; \
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
test_error( error, "Unable to check reference count for " #type ); \
if( c##_refCount != c##_refCount_new ) \
{ \
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
return -1; \
}
#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
#endif // _ref_counting_h

View File

@@ -0,0 +1,241 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "rounding_mode.h"
#if (defined( __arm__ ) || defined(__aarch64__))
#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
#define _ARM_FE_FTZ 0x1000000
#define _ARM_FE_NFTZ 0x0
#if defined(__aarch64__)
#define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
#define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
#else
#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
#endif
#endif
#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
#define _ARM_FE_TONEAREST 0x0
#define _ARM_FE_UPWARD 0x400000
#define _ARM_FE_DOWNWARD 0x800000
#define _ARM_FE_TOWARDZERO 0xc00000
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
const int *p = int_rounds;
if( outType == kfloat || outType == kdouble )
p = flt_rounds;
int fpscr = 0;
RoundingMode oldRound = get_round();
_FPU_GETCW(fpscr);
_FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
return oldRound;
}
RoundingMode get_round( void )
{
int fpscr;
int oldRound;
_FPU_GETCW(fpscr);
oldRound = (fpscr & FPSCR_ROUND_MASK);
switch( oldRound )
{
case _ARM_FE_TONEAREST:
return kRoundToNearestEven;
case _ARM_FE_UPWARD:
return kRoundUp;
case _ARM_FE_DOWNWARD:
return kRoundDown;
case _ARM_FE_TOWARDZERO:
return kRoundTowardZero;
}
return kDefaultRoundingMode;
}
#elif !(defined(_WIN32) && defined(_MSC_VER))
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
const int *p = int_rounds;
if( outType == kfloat || outType == kdouble )
p = flt_rounds;
int oldRound = fegetround();
fesetround( p[r] );
switch( oldRound )
{
case FE_TONEAREST:
return kRoundToNearestEven;
case FE_UPWARD:
return kRoundUp;
case FE_DOWNWARD:
return kRoundDown;
case FE_TOWARDZERO:
return kRoundTowardZero;
default:
abort(); // ??!
}
return kDefaultRoundingMode; //never happens
}
RoundingMode get_round( void )
{
int oldRound = fegetround();
switch( oldRound )
{
case FE_TONEAREST:
return kRoundToNearestEven;
case FE_UPWARD:
return kRoundUp;
case FE_DOWNWARD:
return kRoundDown;
case FE_TOWARDZERO:
return kRoundTowardZero;
}
return kDefaultRoundingMode;
}
#else
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
unsigned int oldRound;
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
if (err) {
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
return kDefaultRoundingMode; //what else never happens
}
oldRound &= _MCW_RC;
RoundingMode old =
(oldRound == _RC_NEAR)? kRoundToNearestEven :
(oldRound == _RC_UP)? kRoundUp :
(oldRound == _RC_DOWN)? kRoundDown :
(oldRound == _RC_CHOP)? kRoundTowardZero:
kDefaultRoundingMode;
_controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
return old; //returning old rounding mode
}
RoundingMode get_round( void )
{
unsigned int oldRound;
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
oldRound &= _MCW_RC;
return
(oldRound == _RC_NEAR)? kRoundToNearestEven :
(oldRound == _RC_UP)? kRoundUp :
(oldRound == _RC_DOWN)? kRoundDown :
(oldRound == _RC_CHOP)? kRoundTowardZero:
kDefaultRoundingMode;
}
#endif
//
// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
// software by testing against FLT_MIN or DBL_MIN in that file.
//
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
// operators do (e.g. add, subtract, multiply, divide, etc.)
//
// Configuring hardware to FTZ mode varies by platform.
// CAUTION: Some C implementations may also fail to behave properly in this mode.
//
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
// is used for floating point computation! If your OS uses x87, you'll need to figure out how
// to turn that off for the conversions code in basic_test_conversions.c so that they flush to
// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
// in which case, these function are at liberty to do nothing.
//
#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
#endif
void *FlushToZero( void )
{
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
union{ int i; void *p; }u = { _mm_getcsr() };
_mm_setcsr( u.i | 0x8040 );
return u.p;
#elif defined( __arm__ ) || defined(__aarch64__)
int fpscr;
_FPU_GETCW(fpscr);
_FPU_SETCW(fpscr | FPSCR_FZ);
return NULL;
#elif defined( __PPC__ )
fpu_control_t flags = 0;
_FPU_GETCW(flags);
flags |= _FPU_MASK_NI;
_FPU_SETCW(flags);
return NULL;
#else
#error Unknown arch
#endif
#else
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}
// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
void UnFlushToZero( void *p)
{
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
union{ void *p; int i; }u = { p };
_mm_setcsr( u.i );
#elif defined( __arm__ ) || defined(__aarch64__)
int fpscr;
_FPU_GETCW(fpscr);
_FPU_SETCW(fpscr & ~FPSCR_FZ);
#elif defined( __PPC__)
fpu_control_t flags = 0;
_FPU_GETCW(flags);
flags &= ~_FPU_MASK_NI;
_FPU_SETCW(flags);
#else
#error Unknown arch
#endif
#else
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}

View File

@@ -0,0 +1,69 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __ROUNDING_MODE_H__
#define __ROUNDING_MODE_H__
#include "compat.h"
#if (defined(_WIN32) && defined (_MSC_VER))
#include "errorHelpers.h"
#include "testHarness.h"
#endif
typedef enum
{
kDefaultRoundingMode = 0,
kRoundToNearestEven,
kRoundUp,
kRoundDown,
kRoundTowardZero,
kRoundingModeCount
}RoundingMode;
typedef enum
{
kuchar = 0,
kchar = 1,
kushort = 2,
kshort = 3,
kuint = 4,
kint = 5,
kfloat = 6,
kdouble = 7,
kulong = 8,
klong = 9,
//This goes last
kTypeCount
}Type;
#ifdef __cplusplus
extern "C" {
#endif
extern RoundingMode set_round( RoundingMode r, Type outType );
extern RoundingMode get_round( void );
extern void *FlushToZero( void );
extern void UnFlushToZero( void *p);
#ifdef __cplusplus
}
#endif
#endif /* __ROUNDING_MODE_H__ */

View File

@@ -0,0 +1,851 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testHarness.h"
#include "compat.h"
#include <stdio.h>
#include <string.h>
#include "threadTesting.h"
#include "errorHelpers.h"
#include "kernelHelpers.h"
#include "fpcontrol.h"
#include "typeWrappers.h"
#include "parseParameters.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include <time.h>
#if !defined (__APPLE__)
#include <CL/cl.h>
#endif
int gTestsPassed = 0;
int gTestsFailed = 0;
cl_uint gRandomSeed = 0;
cl_uint gReSeed = 0;
int gFlushDenormsToZero = 0;
int gInfNanSupport = 1;
int gIsEmbedded = 0;
int gIsOpenCL_C_1_0_Device = 0;
int gIsOpenCL_1_0_Device = 0;
int gHasLong = 1;
#define DEFAULT_NUM_ELEMENTS 0x4000
int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
{
return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
( imageSupportRequired ) ? verifyImageSupport : NULL );
}
int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
DeviceCheckFn deviceCheckFn )
{
test_start();
cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
cl_uint num_platforms = 0;
cl_platform_id *platforms;
cl_device_id device;
int num_elements = DEFAULT_NUM_ELEMENTS;
cl_uint num_devices = 0;
cl_device_id *devices = NULL;
cl_uint choosen_device_index = 0;
cl_uint choosen_platform_index = 0;
int err, ret;
char *endPtr;
unsigned int i;
int based_on_env_var = 0;
/* Check for environment variable to set device type */
char *env_mode = getenv( "CL_DEVICE_TYPE" );
if( env_mode != NULL )
{
based_on_env_var = 1;
if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
device_type = CL_DEVICE_TYPE_GPU;
else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
device_type = CL_DEVICE_TYPE_CPU;
else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
device_type = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
device_type = CL_DEVICE_TYPE_DEFAULT;
else
{
log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
abort();
}
}
#if defined( __APPLE__ )
{
// report on any unusual library search path indirection
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
if( libSearchPath )
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
// report on any unusual framework search path indirection
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
if( libSearchPath )
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
}
#endif
env_mode = getenv( "CL_DEVICE_INDEX" );
if( env_mode != NULL )
{
choosen_device_index = atoi(env_mode);
}
env_mode = getenv( "CL_PLATFORM_INDEX" );
if( env_mode != NULL )
{
choosen_platform_index = atoi(env_mode);
}
/* Process the command line arguments */
argc = parseCustomParam(argc, argv);
if (argc == -1)
{
test_finish();
return 0;
}
/* Special case: just list the tests */
if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
{
log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
for( i = 0; i < num_fns - 1; i++ )
{
log_info( "\t\t%s\n", fnNames[ i ] );
}
test_finish();
return 0;
}
/* How are we supposed to seed the random # generators? */
if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
{
gRandomSeed = (cl_uint) time( NULL );
log_info( "Random seed: %u.\n", gRandomSeed );
gReSeed = 1;
argc--;
}
else
{
log_info(" Initializing random seed to 0.\n");
}
/* Do we have an integer to specify the number of elements to pass to tests? */
if( argc > 1 )
{
ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
{
/* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
/* (hence why we stored the result in ret first) */
num_elements = ret;
log_info( "Testing with num_elements of %d\n", num_elements );
argc--;
}
}
/* Do we have a CPU/GPU specification? */
if( argc > 1 )
{
if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
{
device_type = CL_DEVICE_TYPE_GPU;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
{
device_type = CL_DEVICE_TYPE_CPU;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
{
device_type = CL_DEVICE_TYPE_ACCELERATOR;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
{
device_type = CL_DEVICE_TYPE_DEFAULT;
argc--;
}
}
/* Did we choose a specific device index? */
if( argc > 1 )
{
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
{
choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
argc--;
}
}
/* Did we choose a specific platform index? */
if( argc > 1 )
{
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
{
choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
argc--;
}
}
switch( device_type )
{
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
default: log_error( "Requesting unknown device "); return -1;
}
log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
#if defined( __APPLE__ )
#if defined( __i386__ ) || defined( __x86_64__ )
#define kHasSSE3 0x00000008
#define kHasSupplementalSSE3 0x00000100
#define kHasSSE4_1 0x00000400
#define kHasSSE4_2 0x00000800
/* check our environment for a hint to disable SSE variants */
{
const char *env = getenv( "CL_MAX_SSE" );
if( env )
{
extern int _cpu_capabilities;
int mask = 0;
if( 0 == strcasecmp( env, "SSE4.1" ) )
mask = kHasSSE4_2;
else if( 0 == strcasecmp( env, "SSSE3" ) )
mask = kHasSSE4_2 | kHasSSE4_1;
else if( 0 == strcasecmp( env, "SSE3" ) )
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
else if( 0 == strcasecmp( env, "SSE2" ) )
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
else
{
log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
return -2;
}
log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
_cpu_capabilities &= ~mask;
}
}
#endif
#endif
/* Get the platform */
err = clGetPlatformIDs(0, NULL, &num_platforms);
if (err) {
print_error(err, "clGetPlatformIDs failed");
test_finish();
return -1;
}
platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
if (!platforms || choosen_platform_index >= num_platforms) {
log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
test_finish();
return -1;
}
BufferOwningPtr<cl_platform_id> platformsBuf(platforms);
err = clGetPlatformIDs(num_platforms, platforms, NULL);
if (err) {
print_error(err, "clGetPlatformIDs failed");
test_finish();
return -1;
}
/* Get the number of requested devices */
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, 0, NULL, &num_devices );
if (err) {
print_error(err, "clGetDeviceIDs failed");
test_finish();
return -1;
}
devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
if (!devices || choosen_device_index >= num_devices) {
log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
test_finish();
return -1;
}
BufferOwningPtr<cl_device_id> devicesBuf(devices);
/* Get the requested device */
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, num_devices, devices, NULL );
if (err) {
print_error(err, "clGetDeviceIDs failed");
test_finish();
return -1;
}
device = devices[choosen_device_index];
if( printDeviceHeader( device ) != CL_SUCCESS )
{
test_finish();
return -1;
}
cl_device_fp_config fpconfig = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
if (err) {
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
test_finish();
return -1;
}
gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
log_info( "sizeof( void*) = %d (host)\n", (int) sizeof( void* ) );
//detect whether profile of the device is embedded
char profile[1024] = "";
err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if (err)
{
print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
test_finish();
return -1;
}
gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
//detect the floating point capabilities
cl_device_fp_config floatCapabilities = 0;
err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
if (err)
{
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
test_finish();
return -1;
}
// Check for problems that only embedded will have
if( gIsEmbedded )
{
//If the device is embedded, we need to detect if the device supports Infinity and NaN
if ((floatCapabilities & CL_FP_INF_NAN) == 0)
gInfNanSupport = 0;
// check the extensions list to see if ulong and long are supported
size_t extensionsStringSize = 0;
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
{
print_error( err, "Unable to get extensions string size for embedded device" );
test_finish();
return -1;
}
char *extensions_string = (char*) malloc(extensionsStringSize);
if( NULL == extensions_string )
{
print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
test_finish();
return -1;
}
BufferOwningPtr<char> extensions_stringBuf(extensions_string);
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
{
print_error( err, "Unable to get extensions string for embedded device" );
test_finish();
return -1;
}
if( extensions_string[extensionsStringSize-1] != '\0' )
{
log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
test_finish();
return -1;
}
if( NULL == strstr( extensions_string, "cles_khr_int64" ))
gHasLong = 0;
}
if( getenv( "OPENCL_1_0_DEVICE" ) )
{
char c_version[1024];
gIsOpenCL_1_0_Device = 1;
memset( c_version, 0, sizeof( c_version ) );
if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
{
log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
test_finish();
return -1;
}
if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
{
gIsOpenCL_C_1_0_Device = 1;
log_info( "Device is a OpenCL C 1.0 device\n" );
}
else
log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
}
cl_uint device_address_bits = 0;
if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
{
print_error( err, "Unable to obtain device address bits" );
test_finish();
return -1;
}
if( device_address_bits )
log_info( "sizeof( void*) = %d (device)\n", device_address_bits/8 );
else
{
log_error("Invalid device address bit size returned by device.\n");
test_finish();
return -1;
}
/* If we have a device checking function, run it */
if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
{
test_finish();
return -1;
}
if (num_elements <= 0)
num_elements = DEFAULT_NUM_ELEMENTS;
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
#if defined(__APPLE__) && defined(__arm__)
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
#if defined(__APPLE__) && defined(__arm__)
// Restore the old FP mode before leaving.
RestoreFPState( &oldMode );
#endif
return error;
}
static int find_wildcard_matching_functions( const char *fnNames[], unsigned char fnsToCall[], unsigned int num_fns,
const char *wildcard )
{
int found_tests = 0;
size_t wildcard_length = strlen( wildcard ) - 1; /* -1 for the asterisk */
for( unsigned int fnIndex = 0; fnIndex < num_fns; fnIndex++ )
{
if( strncmp( fnNames[ fnIndex ], wildcard, wildcard_length ) == 0 )
{
if( fnsToCall[ fnIndex ] )
{
log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
return EXIT_FAILURE;
}
fnsToCall[ fnIndex ] = 1;
found_tests = 1;
}
}
if( !found_tests )
{
log_error( "ERROR: The wildcard '%s' did not match any test names.\n", wildcard );
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
static int find_argument_matching_function( const char *fnNames[], unsigned char *fnsToCall, unsigned int num_fns,
const char *argument )
{
unsigned int fnIndex;
for( fnIndex = 0; fnIndex < num_fns; fnIndex++ )
{
if( strcmp( argument, fnNames[ fnIndex ] ) == 0 )
{
if( fnsToCall[ fnIndex ] )
{
log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
return EXIT_FAILURE;
}
else
{
fnsToCall[ fnIndex ] = 1;
break;
}
}
}
if( fnIndex == num_fns )
{
log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
basefn fnList[], const char *fnNames[], int forceNoContextCreation,
cl_command_queue_properties queueProps, int num_elements )
{
int ret = EXIT_SUCCESS;
unsigned char *fnsToCall = ( unsigned char* ) calloc( num_fns, 1 );
if( argc == 1 )
{
/* No actual arguments, all tests will be run. */
memset( fnsToCall, 1, num_fns );
}
else
{
for( int argIndex = 1; argIndex < argc; argIndex++ )
{
if( strchr( argv[ argIndex ], '*' ) != NULL )
{
ret = find_wildcard_matching_functions( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
}
else
{
if( strcmp( argv[ argIndex ], "all" ) == 0 )
{
memset( fnsToCall, 1, num_fns );
break;
}
else
{
ret = find_argument_matching_function( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
}
}
if( ret == EXIT_FAILURE )
{
break;
}
}
}
if( ret == EXIT_SUCCESS )
{
ret = callTestFunctions( fnList, fnNames, fnsToCall, num_fns, device, forceNoContextCreation, num_elements, queueProps );
if( gTestsFailed == 0 )
{
if( gTestsPassed > 1 )
{
log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
}
else if( gTestsPassed > 0 )
{
log_info("PASSED test.\n");
}
}
else if( gTestsFailed > 0 )
{
if( gTestsFailed+gTestsPassed > 1 )
{
log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
}
else
{
log_error("FAILED test.\n");
}
}
}
test_finish();
free( fnsToCall );
return ret;
}
int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps )
{
int numErrors = 0;
for( int i = 0; i < numFunctions; ++i )
{
if( functionsToCall[ i ] )
{
/* Skip any unimplemented tests. */
if( functionList[ i ] != NULL )
{
numErrors += callSingleTestFunction( functionList[ i ], functionNames[ i ], deviceToUse,
forceNoContextCreation, numElementsToUse, queueProps );
}
else
{
log_info( "%s test currently not implemented\n", functionNames[ i ] );
}
}
}
return numErrors;
}
void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
log_info( "%s\n", errinfo );
}
// Actual function execution
int callSingleTestFunction( basefn functionToCall, const char *functionName,
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, const cl_queue_properties queueProps )
{
int numErrors = 0, ret;
cl_int error;
cl_context context = NULL;
cl_command_queue queue = NULL;
const cl_command_queue_properties cmd_queueProps = (queueProps)?CL_QUEUE_PROPERTIES:0;
cl_command_queue_properties queueCreateProps[] = {cmd_queueProps, queueProps, 0};
/* Create a context to work with, unless we're told not to */
if( !forceNoContextCreation )
{
context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
if (!context)
{
print_error( error, "Unable to create testing context" );
return 1;
}
queue = clCreateCommandQueueWithProperties( context, deviceToUse, &queueCreateProps[0], &error );
if( queue == NULL )
{
print_error( error, "Unable to create testing command queue" );
return 1;
}
}
/* Run the test and print the result */
log_info( "%s...\n", functionName );
fflush( stdout );
error = check_opencl_version_with_testname(functionName, deviceToUse);
test_missing_feature(error, functionName);
error = check_functions_for_offline_compiler(functionName, deviceToUse);
test_missing_support_offline_cmpiler(error, functionName);
ret = functionToCall( deviceToUse, context, queue, numElementsToUse); //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
if( ret == TEST_NOT_IMPLEMENTED )
{
/* Tests can also let us know they're not implemented yet */
log_info("%s test currently not implemented\n\n", functionName);
}
else
{
/* Print result */
if( ret == 0 ) {
log_info( "%s passed\n", functionName );
gTestsPassed++;
}
else
{
numErrors++;
log_error( "%s FAILED\n", functionName );
gTestsFailed++;
}
}
/* Release the context */
if( !forceNoContextCreation )
{
int error = clFinish(queue);
if (error) {
log_error("clFinish failed: %d", error);
numErrors++;
}
clReleaseCommandQueue( queue );
clReleaseContext( context );
}
return numErrors;
}
void checkDeviceTypeOverride( cl_device_type *inOutType )
{
/* Check if we are forced to CPU mode */
char *force_cpu = getenv( "CL_DEVICE_TYPE" );
if( force_cpu != NULL )
{
if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
*inOutType = CL_DEVICE_TYPE_GPU;
else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
*inOutType = CL_DEVICE_TYPE_CPU;
else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
*inOutType = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
*inOutType = CL_DEVICE_TYPE_DEFAULT;
}
switch( *inOutType )
{
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
default: break;
}
log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
#if defined( __APPLE__ )
{
// report on any unusual library search path indirection
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
if( libSearchPath )
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
// report on any unusual framework search path indirection
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
if( libSearchPath )
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
}
#endif
}
#if ! defined( __APPLE__ )
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
{
uint32_t pat = ((uint32_t*) src_pattern)[0];
size_t count = bytes / 4;
size_t i;
uint32_t *d = (uint32_t*)dest;
for( i = 0; i < count; i++ )
d[i] = pat;
d += i;
bytes &= 3;
if( bytes )
memcpy( d, src_pattern, bytes );
}
#endif
extern cl_device_type GetDeviceType( cl_device_id d )
{
cl_device_type result = -1;
cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
if( CL_SUCCESS != err )
log_error( "ERROR: Unable to get device type for device %p\n", d );
return result;
}
cl_device_id GetOpposingDevice( cl_device_id device )
{
cl_int error;
cl_device_id *otherDevices;
cl_uint actualCount;
cl_platform_id plat;
// Get the platform of the device to use for getting a list of devices
error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get device's platform" );
return NULL;
}
// Get a list of all devices
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get list of devices size" );
return NULL;
}
otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
if (NULL == otherDevices) {
print_error( error, "Unable to allocate list of other devices." );
return NULL;
}
BufferOwningPtr<cl_device_id> otherDevicesBuf(otherDevices);
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get list of devices" );
return NULL;
}
if( actualCount == 1 )
{
return device; // NULL means error, returning self means we couldn't find another one
}
// Loop and just find one that isn't the one we were given
cl_uint i;
for( i = 0; i < actualCount; i++ )
{
if( otherDevices[ i ] != device )
{
cl_device_type newType;
error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get device type for other device" );
return NULL;
}
cl_device_id result = otherDevices[ i ];
return result;
}
}
// Should never get here
return NULL;
}

View File

@@ -0,0 +1,102 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testHarness_h
#define _testHarness_h
#include "threadTesting.h"
#include "clImageHelper.h"
#include <string>
#ifdef __cplusplus
extern "C" {
#endif
extern cl_uint gReSeed;
extern cl_uint gRandomSeed;
// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
// setup work, and then call each function in turn as dictatated by the passed arguments.
extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
typedef int (*DeviceCheckFn)( cl_device_id device );
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
basefn *fnList, const char *fnNames[],
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
// Call this function if you need to do all the setup work yourself, and just need the function list called/
// managed.
// functionList is the actual array of functions
// functionNames is an array of strings representing the name of each function
// functionsToCall is an array of integers (treated as bools) which tell which function is to be called,
// each element at index i, corresponds to the element in functionList at index i
// numFunctions is the number of elements in the arrays
// contextProps are used to create a testing context for each test
// deviceToUse and numElementsToUse are all just passed to each test function
extern int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps );
// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps );
///// Miscellaneous steps
// Given a pre-existing device type choice, check the environment for an override, then print what
// choice was made and how (and return the overridden choice, if there is one)
extern void checkDeviceTypeOverride( cl_device_type *inOutType );
// standard callback function for context pfn_notify
extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
extern cl_device_type GetDeviceType( cl_device_id );
// Given a device (most likely passed in by the harness, but not required), will attempt to find
// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
// is the only device available, the SAME device is returned, so check!
extern cl_device_id GetOpposingDevice( cl_device_id device );
extern int gFlushDenormsToZero; // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
extern int gInfNanSupport; // This is set to 1 if the device supports infinities and NaNs
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
extern int gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
#if ! defined( __APPLE__ )
void memset_pattern4(void *, const void *, size_t);
#endif
#ifdef __cplusplus
}
#endif
#endif // _testHarness_h

View File

@@ -0,0 +1,51 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "mt19937.h"
#include <stdio.h>
int main( void )
{
MTdata d = init_genrand(42);
int i;
const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
int errcount = 0;
for( i = 0; i < 65536; i++ )
{
cl_uint u = genrand_int32( d );
if( 0 == (i & 4095) )
{
if( u != reference[i>>12] )
{
printf("ERROR: expected *0x%8.8x at %d. Got 0x%8.8x\n", reference[i>>12], i, u );
errcount++;
}
}
}
free_mtdata(d);
if( errcount )
printf("mt19937 test failed.\n");
else
printf("mt19937 test passed.\n");
return 0;
}

View File

@@ -0,0 +1,100 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "compat.h"
#include "threadTesting.h"
#include "errorHelpers.h"
#include <stdio.h>
#include <string.h>
#if !defined(_WIN32)
#include <pthread.h>
#endif
#if 0 // Disabed for now
typedef struct
{
basefn mFunction;
cl_device_id mDevice;
cl_context mContext;
int mNumElements;
} TestFnArgs;
////////////////////////////////////////////////////////////////////////////////
// Thread-based testing. Spawns a new thread to run the given test function,
// then waits for it to complete. The entire idea is that, if the thread crashes,
// we can catch it and report it as a failure instead of crashing the entire suite
////////////////////////////////////////////////////////////////////////////////
void *test_thread_wrapper( void *data )
{
TestFnArgs *args;
int retVal;
cl_context context;
args = (TestFnArgs *)data;
/* Create a new context to use (contexts can't cross threads) */
context = clCreateContext(NULL, args->mDeviceGroup);
if( context == NULL )
{
log_error("clCreateContext failed for new thread\n");
return (void *)(-1);
}
/* Call function */
retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
clReleaseContext( context );
return (void *)retVal;
}
int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
int error;
pthread_t threadHdl;
void *retVal;
TestFnArgs args;
args.mFunction = fnToTest;
args.mDeviceGroup = deviceGroup;
args.mDevice = device;
args.mContext = context;
args.mNumElements = numElements;
error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
if( error != 0 )
{
log_error( "ERROR: Unable to create thread for testing!\n" );
return -1;
}
/* Thread has been started, now just wait for it to complete (or crash) */
error = pthread_join( threadHdl, &retVal );
if( error != 0 )
{
log_error( "ERROR: Unable to join testing thread!\n" );
return -1;
}
return (int)((intptr_t)retVal);
}
#endif

View File

@@ -0,0 +1,32 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _threadTesting_h
#define _threadTesting_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#define TEST_NOT_IMPLEMENTED -99
typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
#endif // _threadTesting_h

View File

@@ -0,0 +1,481 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "typeWrappers.h"
#include "kernelHelpers.h"
#include "errorHelpers.h"
#include <stdlib.h>
#include "clImageHelper.h"
#define ROUND_SIZE_UP( _size, _align ) (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
#if defined( __APPLE__ )
#define kPageSize 4096
#include <sys/mman.h>
#include <stdlib.h>
#elif defined(__linux__)
#include <unistd.h>
#define kPageSize (getpagesize())
#endif
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
} else {
backingStore = NULL;
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width, height );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = height * rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
for( row = 0; row < height; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
} else {
backingStore = NULL;
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = height * depth * rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
for( row = 0; row < height*depth; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
} else {
backingStore = NULL;
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
{
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
backingStoreSize = rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE2D:
backingStoreSize = height * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE3D:
backingStoreSize = height * depth * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
backingStoreSize = arraySize * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
break;
}
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
for( row = 0; row < sz; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
break;
}
} else {
backingStore = NULL;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
break;
}
}
#else
backingStore = NULL;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
break;
}
#endif
return error;
}
/*******
* clProtectedArray implementation
*******/
clProtectedArray::clProtectedArray()
{
mBuffer = mValidBuffer = NULL;
}
clProtectedArray::clProtectedArray( size_t sizeInBytes )
{
mBuffer = mValidBuffer = NULL;
Allocate( sizeInBytes );
}
clProtectedArray::~clProtectedArray()
{
if( mBuffer != NULL ) {
#if defined( __APPLE__ )
int error = munmap( mBuffer, mRealSize );
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
#else
free( mBuffer );
#endif
}
}
void clProtectedArray::Allocate( size_t sizeInBytes )
{
#if defined( __APPLE__ )
// Allocate enough space to: round up our actual allocation to an even number of pages
// and allocate two pages on either side
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
mRealSize = mRoundedSize + kPageSize * 2;
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
mValidBuffer = mBuffer + kPageSize;
// Protect guard area from access
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
#else
mRoundedSize = mRealSize = sizeInBytes;
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
#endif
}

View File

@@ -0,0 +1,332 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _typeWrappers_h
#define _typeWrappers_h
#include <stdio.h>
#include <stdlib.h>
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#include "compat.h"
#include <stdio.h>
#include "mt19937.h"
#include "errorHelpers.h"
#include "kernelHelpers.h"
extern "C" cl_uint gReSeed;
extern "C" cl_uint gRandomSeed;
/* cl_context wrapper */
class clContextWrapper
{
public:
clContextWrapper() { mContext = NULL; }
clContextWrapper( cl_context program ) { mContext = program; }
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
operator cl_context() const { return mContext; }
cl_context * operator&() { return &mContext; }
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
protected:
cl_context mContext;
};
/* cl_program wrapper */
class clProgramWrapper
{
public:
clProgramWrapper() { mProgram = NULL; }
clProgramWrapper( cl_program program ) { mProgram = program; }
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
operator cl_program() const { return mProgram; }
cl_program * operator&() { return &mProgram; }
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
protected:
cl_program mProgram;
};
/* cl_kernel wrapper */
class clKernelWrapper
{
public:
clKernelWrapper() { mKernel = NULL; }
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
operator cl_kernel() const { return mKernel; }
cl_kernel * operator&() { return &mKernel; }
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
protected:
cl_kernel mKernel;
};
/* cl_mem (stream) wrapper */
class clMemWrapper
{
public:
clMemWrapper() { mMem = NULL; }
clMemWrapper( cl_mem mem ) { mMem = mem; }
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
operator cl_mem() const { return mMem; }
cl_mem * operator&() { return &mMem; }
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
protected:
cl_mem mMem;
};
class clProtectedImage
{
public:
clProtectedImage() { image = NULL; backingStore = NULL; }
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
~clProtectedImage()
{
if( image != NULL )
clReleaseMemObject( image );
#if defined( __APPLE__ )
if(backingStore)
munmap(backingStore, backingStoreSize);
#endif
}
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
operator cl_mem() { return image; }
cl_mem * operator&() { return &image; }
bool operator==( const cl_mem &rhs ) { return image == rhs; }
protected:
void *backingStore;
size_t backingStoreSize;
cl_mem image;
};
/* cl_command_queue wrapper */
class clCommandQueueWrapper
{
public:
clCommandQueueWrapper() { mMem = NULL; }
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
~clCommandQueueWrapper() { if( mMem != NULL ) { clReleaseCommandQueue( mMem ); } }
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
operator cl_command_queue() const { return mMem; }
cl_command_queue * operator&() { return &mMem; }
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
protected:
cl_command_queue mMem;
};
/* cl_sampler wrapper */
class clSamplerWrapper
{
public:
clSamplerWrapper() { mMem = NULL; }
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
operator cl_sampler() const { return mMem; }
cl_sampler * operator&() { return &mMem; }
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
protected:
cl_sampler mMem;
};
/* cl_event wrapper */
class clEventWrapper
{
public:
clEventWrapper() { mMem = NULL; }
clEventWrapper( cl_event mem ) { mMem = mem; }
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
operator cl_event() const { return mMem; }
cl_event * operator&() { return &mMem; }
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
protected:
cl_event mMem;
};
/* Generic protected memory buffer, for verifying access within bounds */
class clProtectedArray
{
public:
clProtectedArray();
clProtectedArray( size_t sizeInBytes );
virtual ~clProtectedArray();
void Allocate( size_t sizeInBytes );
operator void *() { return (void *)mValidBuffer; }
operator const void *() const { return (const void *)mValidBuffer; }
protected:
char * mBuffer;
char * mValidBuffer;
size_t mRealSize, mRoundedSize;
};
class RandomSeed
{
public:
RandomSeed( cl_uint seed ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
~RandomSeed()
{
if( gReSeed )
gRandomSeed = genrand_int32( mtData );
free_mtdata(mtData);
}
operator MTdata () {return mtData;}
protected:
MTdata mtData;
};
template <typename T> class BufferOwningPtr
{
BufferOwningPtr(BufferOwningPtr const &); // do not implement
void operator=(BufferOwningPtr const &); // do not implement
void *ptr;
void *map;
size_t mapsize; // Bytes allocated total, pointed to by map.
size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
bool aligned;
public:
explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
explicit BufferOwningPtr(void *p, void *m, size_t s)
: ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
{
#if ! defined( __APPLE__ )
if(m)
{
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
abort();
}
#endif
}
~BufferOwningPtr() {
if (map) {
#if defined( __APPLE__ )
int error = munmap(map, mapsize);
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
#endif
} else {
if ( aligned )
{
align_free(ptr);
}
else
{
free(ptr);
}
}
}
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
if (map){
#if defined( __APPLE__ )
int error = munmap(map, mapsize);
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
#else
log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
abort();
#endif
} else {
if ( aligned )
{
align_free(ptr);
}
else
{
free(ptr);
}
}
ptr = p;
map = m;
mapsize = mapsize_;
allocsize = (ptr != NULL) ? allocsize_ : 0; // Force allocsize to zero if ptr is NULL.
aligned = aligned_;
#if ! defined( __APPLE__ )
if(m)
{
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
abort();
}
#endif
}
operator T*() { return (T*)ptr; }
size_t getSize() const { return allocsize; };
};
#endif // _typeWrappers_h