diff --git a/test_common/harness/ThreadPool.c b/test_common/harness/ThreadPool.c index 16f31e74..c329452d 100644 --- a/test_common/harness/ThreadPool.c +++ b/test_common/harness/ThreadPool.c @@ -446,6 +446,7 @@ void ThreadPool_Init(void) // Check for manual override of multithreading code. We add this for better debuggability. if( getenv( "CL_TEST_SINGLE_THREADED" ) ) { + log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n"); gThreadCount = 1; return; } diff --git a/test_conformance/compatibility/test_common/harness/ThreadPool.c b/test_conformance/compatibility/test_common/harness/ThreadPool.c deleted file mode 100644 index a4598f2e..00000000 --- a/test_conformance/compatibility/test_common/harness/ThreadPool.c +++ /dev/null @@ -1,899 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "ThreadPool.h" -#include "errorHelpers.h" -#include "fpcontrol.h" -#include -#include - -#if defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 ) // or any other POSIX system - -#if defined( _WIN32 ) -#include -#if defined(_MSC_VER) -#include -#endif -#include "mingw_compat.h" -#include -#else // !_WIN32 -#include -#include -#include -#endif // !_WIN32 - -// declarations -#ifdef _WIN32 -void ThreadPool_WorkerFunc( void *p ); -#else -void *ThreadPool_WorkerFunc( void *p ); -#endif -void ThreadPool_Init(void); -void ThreadPool_Exit(void); - -#if defined (__MINGW32__) - // Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC - CRITICAL_SECTION gAtomicLock; -#elif defined( __GNUC__ ) || defined( _MSC_VER) -#else - pthread_mutex_t gAtomicLock; -#endif - -// Atomic add operator with mem barrier. Mem barrier needed to protect state modified by the worker functions. -cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ) -{ -#if defined (__MINGW32__) - // No atomics on Mingw32 - EnterCriticalSection(&gAtomicLock); - cl_int old = *a; - *a = old + b; - LeaveCriticalSection(&gAtomicLock); - return old; -#elif defined( __GNUC__ ) - // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins - return __sync_fetch_and_add( a, b ); - // do we need __sync_synchronize() here, too? GCC docs are unclear whether __sync_fetch_and_add does a synchronize -#elif defined( _MSC_VER ) - return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b ); -#else - #warning Please add a atomic add implementation here, with memory barrier. Fallback code is slow. - if( pthread_mutex_lock(&gAtomicLock) ) - log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n"); - cl_int old = *a; - *a = old + b; - if( pthread_mutex_unlock(&gAtomicLock) ) - log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n"); - return old; -#endif -} - -#if defined( _WIN32 ) -// Uncomment the following line if Windows XP support is not required. -// #define HAS_INIT_ONCE_EXECUTE_ONCE 1 - -#if defined(HAS_INIT_ONCE_EXECUTE_ONCE) -#define _INIT_ONCE INIT_ONCE -#define _PINIT_ONCE PINIT_ONCE -#define _InitOnceExecuteOnce InitOnceExecuteOnce -#else // !HAS_INIT_ONCE_EXECUTE_ONCE - -typedef volatile LONG _INIT_ONCE; -typedef _INIT_ONCE *_PINIT_ONCE; -typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *); - -#define _INIT_ONCE_UNINITIALIZED 0 -#define _INIT_ONCE_IN_PROGRESS 1 -#define _INIT_ONCE_DONE 2 - -static BOOL _InitOnceExecuteOnce( - _PINIT_ONCE InitOnce, - _PINIT_ONCE_FN InitFn, - PVOID Parameter, - LPVOID *Context -) -{ - while ( *InitOnce != _INIT_ONCE_DONE ) - { - if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED ) - { - InitFn( InitOnce, Parameter, Context ); - *InitOnce = _INIT_ONCE_DONE; - return TRUE; - } - Sleep( 1 ); - } - return TRUE; -} -#endif // !HAS_INIT_ONCE_EXECUTE_ONCE - -// Uncomment the following line if Windows XP support is not required. -// #define HAS_CONDITION_VARIABLE 1 - -#if defined(HAS_CONDITION_VARIABLE) -#define _CONDITION_VARIABLE CONDITION_VARIABLE -#define _InitializeConditionVariable InitializeConditionVariable -#define _SleepConditionVariableCS SleepConditionVariableCS -#define _WakeAllConditionVariable WakeAllConditionVariable -#else // !HAS_CONDITION_VARIABLE -typedef struct -{ - HANDLE mEvent; // Used to park the thread. - CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount. - volatile cl_int mWaiters; // Number of threads waiting on this cond var. - volatile cl_int mGeneration; // Wait generation count. - volatile cl_int mReleaseCount; // Number of releases to execute before reseting the event. -} _CONDITION_VARIABLE; - -typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE; - -static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var ) -{ - cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL ); - InitializeCriticalSection( cond_var->mLock ); - cond_var->mWaiters = 0; - cond_var->mGeneration = 0; -#if !defined ( NDEBUG ) - cond_var->mReleaseCount = 0; -#endif // !NDEBUG -} - -static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored) -{ - EnterCriticalSection( cond_var->mLock ); - cl_int generation = cond_var->mGeneration; - ++cond_var->mWaiters; - LeaveCriticalSection( cond_var->mLock ); - LeaveCriticalSection( cond_lock ); - - while ( TRUE ) - { - WaitForSingleObject( cond_var->mEvent, INFINITE ); - EnterCriticalSection( cond_var->mLock ); - BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation; - LeaveCriticalSection( cond_var->mLock ); - if ( done ) - { - break; - } - } - - EnterCriticalSection( cond_lock ); - EnterCriticalSection( cond_var->mLock ); - if ( --cond_var->mReleaseCount == 0 ) - { - ResetEvent( cond_var->mEvent ); - } - --cond_var->mWaiters; - LeaveCriticalSection( cond_var->mLock ); -} - -static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var ) -{ - EnterCriticalSection( cond_var->mLock ); - if (cond_var->mWaiters > 0 ) - { - ++cond_var->mGeneration; - cond_var->mReleaseCount = cond_var->mWaiters; - SetEvent( cond_var->mEvent ); - } - LeaveCriticalSection( cond_var->mLock ); -} -#endif // !HAS_CONDITION_VARIABLE -#endif // _WIN32 - -#define MAX_COUNT (1<<29) - -// Global state to coordinate whether the threads have been launched successfully or not -#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600) -static _INIT_ONCE threadpool_init_control; -#elif defined (_WIN32) // MingW of XP -static int threadpool_init_control; -#else // Posix platforms -pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT; -#endif -cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch - -// critical region lock around ThreadPool_Do. We can only run one ThreadPool_Do at a time, -// because we are too lazy to set up a queue here, and don't expect to need one. -#if defined( _WIN32 ) -CRITICAL_SECTION gThreadPoolLock[1]; -#else // !_WIN32 -pthread_mutex_t gThreadPoolLock; -#endif // !_WIN32 - -// Condition variable to park ThreadPool threads when not working -#if defined( _WIN32 ) -CRITICAL_SECTION cond_lock[1]; -_CONDITION_VARIABLE cond_var[1]; -#else // !_WIN32 -pthread_mutex_t cond_lock; -pthread_cond_t cond_var; -#endif // !_WIN32 -volatile cl_int gRunCount = 0; // Condition variable state. How many iterations on the function left to run. - // set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative. - -// State that only changes when the threadpool is not working. -volatile TPFuncPtr gFunc_ptr = NULL; -volatile void *gUserInfo = NULL; -volatile cl_int gJobCount = 0; - -// State that may change while the thread pool is working -volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole - -// Condition variable to park caller while waiting -#if defined( _WIN32 ) -HANDLE caller_event; -#else // !_WIN32 -pthread_mutex_t caller_cond_lock; -pthread_cond_t caller_cond_var; -#endif // !_WIN32 -volatile cl_int gRunning = 0; // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do. - -// The total number of threads launched. -volatile cl_int gThreadCount = 0; -#ifdef _WIN32 -void ThreadPool_WorkerFunc( void *p ) -#else -void *ThreadPool_WorkerFunc( void *p ) -#endif -{ - cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 ); - cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 ); -// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning ); - - while( MAX_COUNT > item ) - { - cl_int err; - - // check for more work to do - if( 0 >= item ) - { -// log_info( "Thread %d has run out of work.\n", threadID ); - - // No work to do. Attempt to block waiting for work -#if defined( _WIN32 ) - EnterCriticalSection( cond_lock ); -#else // !_WIN32 - if((err = pthread_mutex_lock( &cond_lock) )) - { - log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID ); - goto exit; - } -#endif // !_WIN32 - - cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 ); -// log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 ); - if( 1 == remaining ) - { // last thread out signal the main thread to wake up -#if defined( _WIN32 ) - SetEvent( caller_event ); -#else // !_WIN32 - if((err = pthread_mutex_lock( &caller_cond_lock) )) - { - log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err ); - goto exit; - } - if( (err = pthread_cond_broadcast( &caller_cond_var ))) - { - log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err ); - goto exit; - } - if((err = pthread_mutex_unlock( &caller_cond_lock) )) - { - log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err ); - goto exit; - } -#endif // !_WIN32 - } - - // loop in case we are woken only to discover that some other thread already did all the work - while( 0 >= item ) - { -#if defined( _WIN32 ) - _SleepConditionVariableCS( cond_var, cond_lock, INFINITE ); -#else // !_WIN32 - if((err = pthread_cond_wait( &cond_var, &cond_lock) )) - { - log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err ); - pthread_mutex_unlock( &cond_lock); - goto exit; - } -#endif // !_WIN32 - - // try again to get a valid item id - item = ThreadPool_AtomicAdd( &gRunCount, -1 ); - if( MAX_COUNT <= item ) // exit if we are done - { -#if defined( _WIN32 ) - LeaveCriticalSection( cond_lock ); -#else // !_WIN32 - pthread_mutex_unlock( &cond_lock); -#endif // !_WIN32 - goto exit; - } - } - - ThreadPool_AtomicAdd( &gRunning, 1 ); -// log_info( "Thread %d has found work.\n", threadID); - -#if defined( _WIN32 ) - LeaveCriticalSection( cond_lock ); -#else // !_WIN32 - if((err = pthread_mutex_unlock( &cond_lock) )) - { - log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err ); - goto exit; - } -#endif // !_WIN32 - - } - - // we have a valid item, so do the work - if( CL_SUCCESS == jobError ) // but only if we haven't already encountered an error - { -// log_info( "Thread %d doing job %d\n", threadID, item - 1); - -#if defined(__APPLE__) && defined(__arm__) - // On most platforms which support denorm, default is FTZ off. However, - // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm. - // This creates issues in result verification. Since spec allows the implementation to either flush or - // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas - // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side - // where reference is being computed to make sure we get non-flushed reference result. If implementation - // returns flushed result, we correctly take care of that in verification code. - FPU_mode_type oldMode; - DisableFTZ( &oldMode ); -#endif - - // Call the user's function with this item ID - err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo ); -#if defined(__APPLE__) && defined(__arm__) - // Restore FP state - RestoreFPState( &oldMode ); -#endif - - if( err ) - { -#if (__MINGW32__) - EnterCriticalSection(&gAtomicLock); - if( jobError == CL_SUCCESS ); - jobError = err; - gRunCount = 0; - LeaveCriticalSection(&gAtomicLock); -#elif defined( __GNUC__ ) - // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins - // set the new error if we are the first one there. - __sync_val_compare_and_swap( &jobError, CL_SUCCESS, err ); - - // drop run count to 0 - gRunCount = 0; - __sync_synchronize(); -#elif defined( _MSC_VER ) - // set the new error if we are the first one there. - _InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS ); - - // drop run count to 0 - gRunCount = 0; - _mm_mfence(); -#else - if( pthread_mutex_lock(&gAtomicLock) ) - log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n"); - if( jobError == CL_SUCCESS ); - jobError = err; - gRunCount = 0; - if( pthread_mutex_unlock(&gAtomicLock) ) - log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n"); -#endif - } - } - - // get the next item - item = ThreadPool_AtomicAdd( &gRunCount, -1 ); - } - -exit: - log_info( "ThreadPool: thread %d exiting.\n", threadID ); - ThreadPool_AtomicAdd( &gThreadCount, -1 ); -#if !defined(_WIN32) - return NULL; -#endif -} - -// SetThreadCount() may be used to artifically set the number of worker threads -// If the value is 0 (the default) the number of threads will be determined based on -// the number of CPU cores. If it is a unicore machine, then 2 will be used, so -// that we still get some testing for thread safety. -// -// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the -// code will run single threaded, but will report an error to indicate that the test -// is invalid. This option is intended for debugging purposes only. It is suggested -// as a convention that test apps set the thread count to 1 in response to the -m flag. -// -// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(), -// otherwise the behavior is indefined. -void SetThreadCount( int count ) -{ - if( threadPoolInitErr == CL_SUCCESS ) - { - log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" ); - abort(); - } - - gThreadCount = count; -} - -void ThreadPool_Init(void) -{ - cl_int i; - int err; - volatile cl_uint threadID = 0; - - // Check for manual override of multithreading code. We add this for better debuggability. - if( getenv( "CL_TEST_SINGLE_THREADED" ) ) - { - log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n"); - gThreadCount = 1; - return; - } - - // Figure out how many threads to run -- check first for non-zero to give the implementation the chance - if( 0 == gThreadCount ) - { -#if defined(_MSC_VER) || defined (__MINGW64__) - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL; - DWORD length = 0; - - GetLogicalProcessorInformation( NULL, &length ); - buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length ); - if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE ) - { - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer; - while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] ) - { - if( ptr->Relationship == RelationProcessorCore ) - { - // Count the number of bits in ProcessorMask (number of logical cores) - ULONG mask = ptr->ProcessorMask; - while( mask ) - { - ++gThreadCount; - mask &= mask - 1; // Remove 1 bit at a time - } - } - ++ptr; - } - free(buffer); - } -#elif defined (__MINGW32__) - { - #warning How about this, instead of hard coding it to 2? - SYSTEM_INFO sysinfo; - GetSystemInfo( &sysinfo ); - gThreadCount = sysinfo.dwNumberOfProcessors; - } -#else // !_WIN32 - gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X -#endif // !_WIN32 - - // Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety - if( 1 == gThreadCount ) - gThreadCount = 2; - } - - //Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded. - if( gThreadCount < 2 ) - { - log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n"); - gThreadCount = 1; - return; - } - -#if defined( _WIN32 ) - InitializeCriticalSection( gThreadPoolLock ); - InitializeCriticalSection( cond_lock ); - _InitializeConditionVariable( cond_var ); - caller_event = CreateEvent( NULL, FALSE, FALSE, NULL ); -#elif defined (__GNUC__) - // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem - // with some flavors of gcc compilers. - pthread_cond_init(&cond_var, NULL); - pthread_mutex_init(&cond_lock ,NULL); - pthread_cond_init(&caller_cond_var, NULL); - pthread_mutex_init(&caller_cond_lock, NULL); - pthread_mutex_init(&gThreadPoolLock, NULL); -#endif - -#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__)) - pthread_mutex_initialize(gAtomicLock); -#elif defined (__MINGW32__) - InitializeCriticalSection(&gAtomicLock); -#endif - // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait - // That would cause a deadlock. -#if !defined( _WIN32 ) - if((err = pthread_mutex_lock( &caller_cond_lock) )) - { - log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err ); - gThreadCount = 1; - return; - } -#endif // !_WIN32 - - gRunning = gThreadCount; - // init threads - for( i = 0; i < gThreadCount; i++ ) - { -#if defined( _WIN32 ) - uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID); - err = ( handle == 0 ); -#else // !_WIN32 - pthread_t tid = 0; - err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID ); -#endif // !_WIN32 - if( err ) - { - log_error( "Error %d launching thread %d\n", err, i ); - threadPoolInitErr = err; - gThreadCount = i; - break; - } - } - - atexit( ThreadPool_Exit ); - -// block until they are done launching. - do - { -#if defined( _WIN32 ) - WaitForSingleObject( caller_event, INFINITE ); -#else // !_WIN32 - if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) )) - { - log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err ); - pthread_mutex_unlock( &caller_cond_lock); - return; - } -#endif // !_WIN32 - } - while( gRunCount != -gThreadCount ); -#if !defined( _WIN32 ) - if((err = pthread_mutex_unlock( &caller_cond_lock) )) - { - log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err ); - return; - } -#endif // !_WIN32 - - threadPoolInitErr = CL_SUCCESS; -} - -#if defined(_MSC_VER) -static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex) -{ - ThreadPool_Init(); - return TRUE; -} -#endif - -void ThreadPool_Exit(void) -{ - int err, count; - gRunCount = CL_INT_MAX; - -#if defined( __GNUC__ ) - // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins - __sync_synchronize(); -#elif defined( _MSC_VER ) - _mm_mfence(); -#else - #warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed -#endif - - // spin waiting for threads to die - for (count = 0; 0 != gThreadCount && count < 1000; count++) - { -#if defined( _WIN32 ) - _WakeAllConditionVariable( cond_var ); - Sleep(1); -#else // !_WIN32 - if( (err = pthread_cond_broadcast( &cond_var ))) - { - log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err ); - break; - } - usleep(1000); -#endif // !_WIN32 - } - - if( gThreadCount ) - log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount ); - else - log_info( "Thread pool exited in a orderly fashion.\n" ); -} - - -// Blocking API that farms out count jobs to a thread pool. -// It may return with some work undone if func_ptr() returns a non-zero -// result. -// -// This function obviously has its shortcommings. Only one call to ThreadPool_Do -// can be running at a time. It is not intended for general purpose use. -// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were -// all available then it would make more sense to use those features. -cl_int ThreadPool_Do( TPFuncPtr func_ptr, - cl_uint count, - void *userInfo ) -{ - cl_int newErr; - cl_int err = 0; - // Lazily set up our threads -#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600) - err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL ); -#elif defined (_WIN32) - if (threadpool_init_control == 0) { - #warning This is buggy and race prone. Find a better way. - ThreadPool_Init(); - threadpool_init_control = 1; - } -#else //posix platform - err = pthread_once( &threadpool_init_control, ThreadPool_Init ); - if( err ) - { - log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err ); - return err; - } -#endif - // Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable - if( threadPoolInitErr ) - { - cl_uint currentJob = 0; - cl_int result = CL_SUCCESS; - -#if defined(__APPLE__) && defined(__arm__) - // On most platforms which support denorm, default is FTZ off. However, - // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm. - // This creates issues in result verification. Since spec allows the implementation to either flush or - // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas - // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side - // where reference is being computed to make sure we get non-flushed reference result. If implementation - // returns flushed result, we correctly take care of that in verification code. - FPU_mode_type oldMode; - DisableFTZ( &oldMode ); -#endif - for( currentJob = 0; currentJob < count; currentJob++ ) - if((result = func_ptr( currentJob, 0, userInfo ))) - { -#if defined(__APPLE__) && defined(__arm__) - // Restore FP state before leaving - RestoreFPState( &oldMode ); -#endif - return result; - } - -#if defined(__APPLE__) && defined(__arm__) - // Restore FP state before leaving - RestoreFPState( &oldMode ); -#endif - - return CL_SUCCESS; - } - - if( count >= MAX_COUNT ) - { - log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT ); - return -1; - } - - // Enter critical region -#if defined( _WIN32 ) - EnterCriticalSection( gThreadPoolLock ); -#else // !_WIN32 - if( (err = pthread_mutex_lock( &gThreadPoolLock ))) - { - switch (err) - { - case EDEADLK: - log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" ); - break; - case EINVAL: - log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" ); - break; - default: - break; - } - return err; - } -#endif // !_WIN32 - - // Start modifying the job state observable by worker threads -#if defined( _WIN32 ) - EnterCriticalSection( cond_lock ); -#else // !_WIN32 - if((err = pthread_mutex_lock( &cond_lock) )) - { - log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err ); - goto exit; - } -#endif // !_WIN32 - - // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait - // That would cause a deadlock. -#if !defined( _WIN32 ) - if((err = pthread_mutex_lock( &caller_cond_lock) )) - { - log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err ); - goto exit; - } -#endif // !_WIN32 - - // Prime the worker threads to get going - jobError = CL_SUCCESS; - gRunCount = gJobCount = count; - gFunc_ptr = func_ptr; - gUserInfo = userInfo; - -#if defined( _WIN32 ) - _WakeAllConditionVariable( cond_var ); - LeaveCriticalSection( cond_lock ); -#else // !_WIN32 - if( (err = pthread_cond_broadcast( &cond_var ))) - { - log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err ); - goto exit; - } - if((err = pthread_mutex_unlock( &cond_lock) )) - { - log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err ); - goto exit; - } -#endif // !_WIN32 - -// block until they are done. It would be slightly more efficient to do some of the work here though. - do - { -#if defined( _WIN32 ) - WaitForSingleObject( caller_event, INFINITE ); -#else // !_WIN32 - if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) )) - { - log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err ); - pthread_mutex_unlock( &caller_cond_lock); - goto exit; - } -#endif // !_WIN32 - } - while( gRunning ); -#if !defined(_WIN32) - if((err = pthread_mutex_unlock( &caller_cond_lock) )) - { - log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err ); - goto exit; - } -#endif // !_WIN32 - - err = jobError; - -exit: - // exit critical region -#if defined( _WIN32 ) - LeaveCriticalSection( gThreadPoolLock ); -#else // !_WIN32 - newErr = pthread_mutex_unlock( &gThreadPoolLock ); - if( newErr) - { - log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr ); - return err; - } -#endif // !_WIN32 - - return err; -} - -cl_uint GetThreadCount( void ) -{ - // Lazily set up our threads -#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600) - cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL ); -#elif defined (_WIN32) - if (threadpool_init_control == 0) { - #warning This is buggy and race prone. Find a better way. - ThreadPool_Init(); - threadpool_init_control = 1; - } -#else - cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init ); - if( err ) - { - log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err ); - return err; - } -#endif // !_WIN32 - - if( gThreadCount < 1 ) - return 1; - - return gThreadCount; -} - -#else - -#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS - #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section. -#endif -// -// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements -// of OpenCL API, while also checking -// -// A sample single threaded implementation follows, for documentation / bootstrapping purposes. -// It is not okay to use this for conformance testing!!! -// -// Exception: If your operating system does not support multithreaded execution of any kind, then you may use this code. -// - -cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ) -{ - cl_uint r = *a; - - // since this fallback code path is not multithreaded, we just do a regular add here - // If your operating system supports memory-barrier-atomics, use those here - *a = r + b; - - return r; -} - -// Blocking API that farms out count jobs to a thread pool. -// It may return with some work undone if func_ptr() returns a non-zero -// result. -cl_int ThreadPool_Do( TPFuncPtr func_ptr, - cl_uint count, - void *userInfo ) -{ - cl_uint currentJob = 0; - cl_int result = CL_SUCCESS; - -#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS - // THIS FUNCTION IS NOT INTENDED FOR USE!! - log_error( "ERROR: Test must be multithreaded!\n" ); - exit(-1); -#else - static int spewCount = 0; - - if( 0 == spewCount ) - { - log_info( "\nWARNING: The operating system is claimed not to support threads of any sort. Running single threaded.\n" ); - spewCount = 1; - } -#endif - -// The multithreaded code should mimic this behavior: - for( currentJob = 0; currentJob < count; currentJob++ ) - if((result = func_ptr( currentJob, 0, userInfo ))) - return result; - - return CL_SUCCESS; -} - -cl_uint GetThreadCount( void ) -{ - return 1; -} - -void SetThreadCount( int count ) -{ - if( count > 1 ) - log_info( "WARNING: SetThreadCount(%d) ignored\n", count ); -} - -#endif diff --git a/test_conformance/compatibility/test_common/harness/compat.h b/test_conformance/compatibility/test_common/harness/compat.h deleted file mode 100644 index 608dc95e..00000000 --- a/test_conformance/compatibility/test_common/harness/compat.h +++ /dev/null @@ -1,393 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -/* - Header compat.h should be used instead of stdlib.h, stdbool.h, stdint.h, float.h, fenv.h, - math.h. It provides workarounds if these headers are not available or not complete. - - Important: It should be included before math.h, directly or indirectly, because Intel mathimf.h - is not compatible with Microsoft math.h. Including math.h before mathimf.h causes compile-time - error. -*/ -#ifndef _COMPAT_H_ -#define _COMPAT_H_ - -#if defined(_WIN32) && defined (_MSC_VER) -#include -#endif - -#ifdef __cplusplus - #define EXTERN_C extern "C" -#else - #define EXTERN_C -#endif - - -// -// stdlib.h -// - -#include // On Windows, _MAX_PATH defined there. - -// llabs appeared in MS C v16 (VS 10/2010). -#if defined( _MSC_VER ) && _MSC_VER <= 1500 - EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; } -#endif - - -// -// stdbool.h -// - -// stdbool.h appeared in MS C v18 (VS 12/2013). -#if defined( _MSC_VER ) && MSC_VER <= 1700 -#if !defined(__cplusplus) -typedef char bool; - #define true 1 - #define false 0 - #endif -#else - #include -#endif - - - -// -// stdint.h -// - -// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12. -#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 ) -typedef unsigned char uint8_t; -typedef char int8_t; -typedef unsigned short uint16_t; -typedef short int16_t; -typedef unsigned int uint32_t; -typedef int int32_t; -typedef unsigned long long uint64_t; -typedef long long int64_t; -#else -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS -#endif - #include -#endif - - - -// -// float.h -// - -#include - - - -// -// fenv.h -// - -// fenv.h appeared in MS C v18 (VS 12/2013). -#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER ) - // reimplement fenv.h because windows doesn't have it - #define FE_INEXACT 0x0020 - #define FE_UNDERFLOW 0x0010 - #define FE_OVERFLOW 0x0008 - #define FE_DIVBYZERO 0x0004 - #define FE_INVALID 0x0001 - #define FE_ALL_EXCEPT 0x003D - int fetestexcept(int excepts); - int feclearexcept(int excepts); -#else - #include -#endif - - -// -// math.h -// - -#if defined( __INTEL_COMPILER ) - #include -#else - #include -#endif - -#if defined( _MSC_VER ) - - #ifdef __cplusplus - extern "C" { - #endif - -#ifndef M_PI - #define M_PI 3.14159265358979323846264338327950288 -#endif - - #if ! defined( __INTEL_COMPILER ) - - #ifndef NAN - #define NAN (INFINITY - INFINITY) - #endif - #ifndef HUGE_VALF - #define HUGE_VALF (float)HUGE_VAL - #endif - #ifndef INFINITY - #define INFINITY (FLT_MAX + FLT_MAX) - #endif - #ifndef isfinite - #define isfinite(x) _finite(x) - #endif - #ifndef isnan -#define isnan( x ) ((x) != (x)) - #endif - #ifndef isinf -#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY) - #endif - -double rint( double x); -float rintf( float x); -long double rintl( long double x); - -float cbrtf( float ); -double cbrt( double ); - -int ilogb( double x); -int ilogbf (float x); -int ilogbl(long double x); - -double fmax(double x, double y); -double fmin(double x, double y); -float fmaxf( float x, float y ); -float fminf(float x, float y); - -double log2(double x); -long double log2l(long double x); - -double exp2(double x); -long double exp2l(long double x); - -double fdim(double x, double y); -float fdimf(float x, float y); -long double fdiml(long double x, long double y); - -double remquo( double x, double y, int *quo); -float remquof( float x, float y, int *quo); -long double remquol( long double x, long double y, int *quo); - -long double scalblnl(long double x, long n); - -// snprintf added in _MSC_VER == 1900 (Visual Studio 2015) -#if defined( _MSC_VER ) && _MSC_VER < 1900 - #define snprintf sprintf_s -#endif -float hypotf(float x, float y); -long double hypotl(long double x, long double y) ; -double lgamma(double x); -float lgammaf(float x); - -double trunc(double x); -float truncf(float x); - -double log1p(double x); -float log1pf(float x); -long double log1pl(long double x); - -double copysign(double x, double y); -float copysignf(float x, float y); -long double copysignl(long double x, long double y); - -long lround(double x); -long lroundf(float x); -//long lroundl(long double x) - -double round(double x); -float roundf(float x); -long double roundl(long double x); - - int cf_signbit(double x); - int cf_signbitf(float x); - -// Added in _MSC_VER == 1800 (Visual Studio 2013) -#if defined( _MSC_VER ) && _MSC_VER < 1800 - static int signbit(double x) { return cf_signbit(x); } -#endif - static int signbitf(float x) { return cf_signbitf(x); } - -long int lrint (double flt); -long int lrintf (float flt); - -float int2float (int32_t ix); -int32_t float2int (float fx); - - #endif - - #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 - // These functions appeared in Intel C v13. - float nanf( const char* str); - double nan( const char* str); - long double nanl( const char* str); - #endif - - #ifdef __cplusplus - } - #endif - -#endif - -#if defined( __ANDROID__ ) - #define log2(X) (log(X)/log(2)) -#endif - - - -// -// stdio.h -// - - - -// -// unistd.h -// - -#if defined( _MSC_VER ) - EXTERN_C unsigned int sleep( unsigned int sec ); - EXTERN_C int usleep( int usec ); -#endif - - - -// -// syscall.h -// - -#if defined( __ANDROID__ ) - // Android bionic's isn't providing SYS_sysctl wrappers. - #define SYS__sysctl __NR__sysctl -#endif - - - -// Some tests use _malloca which defined in malloc.h. -#if !defined (__APPLE__) -#include -#endif - - -// -// ??? -// - -#if defined( _MSC_VER ) - - #define MAXPATHLEN _MAX_PATH - - EXTERN_C uint64_t ReadTime( void ); - EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime ); - -/** Returns the number of leading 0-bits in x, - starting at the most significant bit position. - If x is 0, the result is undefined. -*/ - EXTERN_C int __builtin_clz(unsigned int pattern); - -#endif - -#ifndef MIN - #define MIN(x,y) (((x)<(y))?(x):(y)) -#endif -#ifndef MAX - #define MAX(x,y) (((x)>(y))?(x):(y)) -#endif - - -/* - ------------------------------------------------------------------------------------------------ - WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG. - - This is a typical usage of the macros: - - double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2); - - (taken from math_brute_force/reference_math.c). There are two problems: - - 1. There is an error here. On Windows in will produce incorrect result - `0x1.5555555555555p+50'. To have a correct result it should be written as - `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the - third argument is not obvious -- sometimes it should be the same as exponent of the - first argument, but sometimes not. - - 2. Information is duplicated. It is easy to make a mistake. - - Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file). - ------------------------------------------------------------------------------------------------ -*/ -#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER ) - - #define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z)) - #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z) - #define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z)) - -#else - -// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead. -#define MAKE_HEX_FLOAT(x,y,z) x -#define MAKE_HEX_DOUBLE(x,y,z) x -#define MAKE_HEX_LONG(x,y,z) x - -#endif - - -/* - ------------------------------------------------------------------------------------------------ - HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long - double respectively. Arguments: - - sm -- sign of number, - int -- integer part of mantissa (without `0x' prefix), - fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes), - se -- sign of exponent, - exp -- absolute value of (binary) exponent. - - Example: - - double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2 - - Note: - - We have to pass signs as separate arguments because gcc pass negative integer values - (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result - `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point - literal. - ------------------------------------------------------------------------------------------------ -*/ -#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER ) - // If compiler does not support hex floating point literals: - #define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) ) - #define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) ) - #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) ) -#else - // If compiler supports hex floating point literals: just concatenate all the parts into a literal. - #define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F - #define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp - #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L -#endif - -#if defined(__MINGW32__) - #include - #define sleep(sec) Sleep((sec) * 1000) -#endif - -#endif // _COMPAT_H_ diff --git a/test_conformance/compatibility/test_common/harness/errorHelpers.h b/test_conformance/compatibility/test_common/harness/errorHelpers.h deleted file mode 100644 index 54e73b07..00000000 --- a/test_conformance/compatibility/test_common/harness/errorHelpers.h +++ /dev/null @@ -1,164 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef _errorHelpers_h -#define _errorHelpers_h - -#include - -#ifdef __APPLE__ -#include -#else -#include -#endif -#include -#ifdef __cplusplus -extern "C" { -#endif - -#define LOWER_IS_BETTER 0 -#define HIGHER_IS_BETTER 1 - -// If USE_ATF is defined, all log_error and log_info calls can be routed to test library -// functions as described below. This is helpful for integration into an automated testing -// system. -#if USE_ATF -// export BUILD_WITH_ATF=1 - #include - #define test_start() ATFTestStart() - #define log_info ATFLogInfo - #define log_error ATFLogError - #define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__) - #define test_finish() ATFTestFinish() - #define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__) - #define vlog ATFLogInfo - #define vlog_error ATFLogError -#else - #define test_start() - #define log_info printf - #define log_error printf - #define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \ - _higherBetter?"higher is better":"lower is better", _number ) - #define test_finish() - #define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \ - _higherBetter?"higher is better":"lower is better" , _number) - #ifdef _WIN32 - #ifdef __MINGW32__ - // Use __mingw_printf since it supports "%a" format specifier - #define vlog __mingw_printf - #define vlog_error __mingw_printf - #else - // Use home-baked function that treats "%a" as "%f" - static int vlog_win32(const char *format, ...); - #define vlog vlog_win32 - #define vlog_error vlog_win32 - #endif - #else - #define vlog_error printf - #define vlog printf - #endif -#endif - -#define ct_assert(b) ct_assert_i(b, __LINE__) -#define ct_assert_i(b, line) ct_assert_ii(b, line) -#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1]; - -#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode) -#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } } -#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ ); - -// expected error code vs. what we got -#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode) -#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } } -#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ ); -#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode) -#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } } -#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ ); - -#define ASSERT_SUCCESS(expr, msg) \ - do \ - { \ - cl_int _temp_retval = (expr); \ - if (_temp_retval != CL_SUCCESS) \ - { \ - std::stringstream ss; \ - ss << "ERROR: " << msg << "=" << IGetErrorString(_temp_retval) \ - << " at " << __FILE__ << ":" << __LINE__ << "\n"; \ - throw std::runtime_error(ss.str()); \ - } \ - } while (0) - -extern const char *IGetErrorString( int clErrorCode ); - -extern float Ulp_Error_Half( cl_ushort test, float reference ); -extern float Ulp_Error( float test, double reference ); -extern float Ulp_Error_Double( double test, long double reference ); - -extern const char *GetChannelTypeName( cl_channel_type type ); -extern int IsChannelTypeSupported( cl_channel_type type ); -extern const char *GetChannelOrderName( cl_channel_order order ); -extern int IsChannelOrderSupported( cl_channel_order order ); -extern const char *GetAddressModeName( cl_addressing_mode mode ); - -extern const char *GetDeviceTypeName( cl_device_type type ); - -// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!) -extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer ); - -#if defined (_WIN32) && !defined(__MINGW32__) -#include -#include -#include -static int vlog_win32(const char *format, ...) -{ - const char *new_format = format; - - if (strstr(format, "%a")) { - char *temp; - if ((temp = strdup(format)) == NULL) { - printf("vlog_win32: Failed to allocate memory for strdup\n"); - return -1; - } - new_format = temp; - while (*temp) { - // replace %a with %f - if ((*temp == '%') && (*(temp+1) == 'a')) { - *(temp+1) = 'f'; - } - temp++; - } - } - - va_list args; - va_start(args, format); - vprintf(new_format, args); - va_end(args); - - if (new_format != format) { - free((void*)new_format); - } - - return 0; -} -#endif - - -#ifdef __cplusplus -} -#endif - -#endif // _errorHelpers_h - - diff --git a/test_conformance/compatibility/test_common/harness/fpcontrol.h b/test_conformance/compatibility/test_common/harness/fpcontrol.h deleted file mode 100644 index d6de0048..00000000 --- a/test_conformance/compatibility/test_common/harness/fpcontrol.h +++ /dev/null @@ -1,104 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef _fpcontrol_h -#define _fpcontrol_h - -// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware -// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations -// in integer code, and have found this is the only way to correctly verify operation. -// -// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both -// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default -// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode. -#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__) - typedef int FPU_mode_type; -#if defined( __i386__ ) || defined( __x86_64__ ) - #include -#elif defined( __PPC__ ) - #include - extern __thread fpu_control_t fpu_control; -#endif - // Set the reference hardware floating point unit to FTZ mode - static inline void ForceFTZ( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr( *mode | 0x8040); -#elif defined( __PPC__ ) - *mode = fpu_control; - fpu_control |= _FPU_MASK_NI; -#elif defined ( __arm__ ) - unsigned fpscr; - __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; - __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24))); - // Add 64 bit support -#elif defined (__aarch64__) - unsigned fpscr; - __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr)); - *mode = fpscr; - __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24))); -#else - #error ForceFTZ needs an implentation -#endif - } - - // Disable the denorm flush to zero - static inline void DisableFTZ( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr( *mode & ~0x8040); -#elif defined( __PPC__ ) - *mode = fpu_control; - fpu_control &= ~_FPU_MASK_NI; -#elif defined ( __arm__ ) - unsigned fpscr; - __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; - __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24))); - // Add 64 bit support -#elif defined (__aarch64__) - unsigned fpscr; - __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr)); - *mode = fpscr; - __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24))); -#else -#error DisableFTZ needs an implentation -#endif - } - - // Restore the reference hardware to floating point state indicated by *mode - static inline void RestoreFPState( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - _mm_setcsr( *mode ); -#elif defined( __PPC__) - fpu_control = *mode; -#elif defined (__arm__) - __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode)); - // Add 64 bit support -#elif defined (__aarch64__) - __asm__ volatile ("msr fpcr, %0" :: "r"(*mode)); -#else - #error RestoreFPState needs an implementation -#endif - } -#else - #error ForceFTZ and RestoreFPState need implentations -#endif - -#endif diff --git a/test_conformance/compatibility/test_common/harness/msvc9.c b/test_conformance/compatibility/test_common/harness/msvc9.c deleted file mode 100644 index cf32b673..00000000 --- a/test_conformance/compatibility/test_common/harness/msvc9.c +++ /dev/null @@ -1,773 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "compat.h" - -#if defined ( _MSC_VER ) - -#include -#include - -#include - -#include - -#if ! defined( __INTEL_COMPILER ) - -/////////////////////////////////////////////////////////////////// -// -// rint, rintf -// -/////////////////////////////////////////////////////////////////// - -float copysignf( float x, float y ) -{ - union{ cl_uint u; float f; }ux, uy; - - ux.f = x; - uy.f = y; - - ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U); - - return ux.f; -} - -double copysign( double x, double y ) -{ - union{ cl_ulong u; double f; }ux, uy; - - ux.f = x; - uy.f = y; - - ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL); - - return ux.f; -} - -long double copysignl( long double x, long double y ) -{ - union - { - long double f; - struct{ cl_ulong m; cl_ushort sexp; }u; - }ux, uy; - - ux.f = x; - uy.f = y; - - ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000); - - return ux.f; -} - -float rintf(float x) -{ - float absx = fabsf(x); - - if( absx < 8388608.0f /* 0x1.0p23f */ ) - { - float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x ); - float rounded = x + magic; - rounded -= magic; - x = copysignf( rounded, x ); - } - - return x; -} - -double rint(double x) -{ - double absx = fabs(x); - - if( absx < 4503599627370496.0 /* 0x1.0p52f */ ) - { - double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x ); - double rounded = x + magic; - rounded -= magic; - x = copysign( rounded, x ); - } - - return x; -} - -long double rintl(long double x) -{ - double absx = fabs(x); - - if( absx < 9223372036854775808.0L /* 0x1.0p64f */ ) - { - long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x ); - long double rounded = x + magic; - rounded -= magic; - x = copysignl( rounded, x ); - } - - return x; -} - - -/////////////////////////////////////////////////////////////////// -// -// ilogb, ilogbf, ilogbl -// -/////////////////////////////////////////////////////////////////// -#ifndef FP_ILOGB0 - #define FP_ILOGB0 INT_MIN -#endif - -#ifndef FP_ILOGBNAN - #define FP_ILOGBNAN INT_MIN -#endif - -int ilogb (double x) -{ - union{ double f; cl_ulong u;} u; - u.f = x; - - cl_ulong absx = u.u & CL_LONG_MAX; - if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL) - { - switch( absx ) - { - case 0: - return FP_ILOGB0; - case 0x7ff0000000000000ULL: - return INT_MAX; - default: - if( absx > 0x7ff0000000000000ULL ) - return FP_ILOGBNAN; - - // subnormal - u.u = absx | 0x3ff0000000000000ULL; - u.f -= 1.0; - return (u.u >> 52) - (1023 + 1022); - } - } - - return (absx >> 52) - 1023; -} - - -int ilogbf (float x) -{ - union{ float f; cl_uint u;} u; - u.f = x; - - cl_uint absx = u.u & 0x7fffffff; - if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U) - { - switch( absx ) - { - case 0: - return FP_ILOGB0; - case 0x7f800000U: - return INT_MAX; - default: - if( absx > 0x7f800000 ) - return FP_ILOGBNAN; - - // subnormal - u.u = absx | 0x3f800000U; - u.f -= 1.0f; - return (u.u >> 23) - (127 + 126); - } - } - - return (absx >> 23) - 127; -} - -int ilogbl (long double x) -{ - union - { - long double f; - struct{ cl_ulong m; cl_ushort sexp; }u; - } u; - u.f = x; - - int exp = u.u.sexp & 0x7fff; - if( 0 == exp ) - { - if( 0 == u.u.m ) - return FP_ILOGB0; - - //subnormal - u.u.sexp = 0x3fff; - u.f -= 1.0f; - exp = u.u.sexp & 0x7fff; - - return exp - (0x3fff + 0x3ffe); - } - else if( 0x7fff == exp ) - { - if( u.u.m & CL_LONG_MAX ) - return FP_ILOGBNAN; - - return INT_MAX; - } - - return exp - 0x3fff; -} - - - -/////////////////////////////////////////////////////////////////// -// -// fmax, fmin, fmaxf, fminf -// -/////////////////////////////////////////////////////////////////// - -static void GET_BITS_SP32(float fx, unsigned int* ux) -{ - volatile union {float f; unsigned int u;} _bitsy; - _bitsy.f = (fx); - *ux = _bitsy.u; -} -/* static void GET_BITS_SP32(float fx, unsigned int* ux) */ -/* { */ -/* volatile union {float f; unsigned int i;} _bitsy; */ -/* _bitsy.f = (fx); */ -/* *ux = _bitsy.i; */ -/* } */ -static void PUT_BITS_SP32(unsigned int ux, float* fx) -{ - volatile union {float f; unsigned int u;} _bitsy; - _bitsy.u = (ux); - *fx = _bitsy.f; -} -/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */ -/* { */ -/* volatile union {float f; unsigned int i;} _bitsy; */ -/* _bitsy.i = (ux); */ -/* *fx = _bitsy.f; */ -/* } */ -static void GET_BITS_DP64(double dx, unsigned __int64* lx) -{ - volatile union {double d; unsigned __int64 l;} _bitsy; - _bitsy.d = (dx); - *lx = _bitsy.l; -} -static void PUT_BITS_DP64(unsigned __int64 lx, double* dx) -{ - volatile union {double d; unsigned __int64 l;} _bitsy; - _bitsy.l = (lx); - *dx = _bitsy.d; -} - -#if 0 -int SIGNBIT_DP64(double x ) -{ - int hx; - _GET_HIGH_WORD(hx,x); - return((hx>>31)); -} -#endif - -/* fmax(x, y) returns the larger (more positive) of x and y. - NaNs are treated as missing values: if one argument is NaN, - the other argument is returned. If both arguments are NaN, - the first argument is returned. */ - -/* This works so long as the compiler knows that (x != x) means - that x is NaN; gcc does. */ -double fmax(double x, double y) -{ - if( isnan(y) ) - return x; - - return x >= y ? x : y; -} - - -/* fmin(x, y) returns the smaller (more negative) of x and y. - NaNs are treated as missing values: if one argument is NaN, - the other argument is returned. If both arguments are NaN, - the first argument is returned. */ - -double fmin(double x, double y) -{ - if( isnan(y) ) - return x; - - return x <= y ? x : y; -} - - -float fmaxf( float x, float y ) -{ - if( isnan(y) ) - return x; - - return x >= y ? x : y; -} - -/* fminf(x, y) returns the smaller (more negative) of x and y. - NaNs are treated as missing values: if one argument is NaN, - the other argument is returned. If both arguments are NaN, - the first argument is returned. */ - -float fminf(float x, float y) -{ - if( isnan(y) ) - return x; - - return x <= y ? x : y; -} - -long double scalblnl(long double x, long n) -{ - union - { - long double d; - struct{ cl_ulong m; cl_ushort sexp;}u; - }u; - u.u.m = CL_LONG_MIN; - - if( x == 0.0L || n < -2200) - return copysignl( 0.0L, x ); - - if( n > 2200 ) - return INFINITY; - - if( n < 0 ) - { - u.u.sexp = 0x3fff - 1022; - while( n <= -1022 ) - { - x *= u.d; - n += 1022; - } - u.u.sexp = 0x3fff + n; - x *= u.d; - return x; - } - - if( n > 0 ) - { - u.u.sexp = 0x3fff + 1023; - while( n >= 1023 ) - { - x *= u.d; - n -= 1023; - } - u.u.sexp = 0x3fff + n; - x *= u.d; - return x; - } - - return x; -} - -/////////////////////////////////////////////////////////////////// -// -// log2 -// -/////////////////////////////////////////////////////////////////// -const static cl_double log_e_base2 = 1.4426950408889634074; -const static cl_double log_10_base2 = 3.3219280948873623478; - -//double log10(double x); - -double log2(double x) -{ - return 1.44269504088896340735992468100189214 * log(x); -} - -long double log2l(long double x) -{ - return 1.44269504088896340735992468100189214L * log(x); -} - -double trunc(double x) -{ - double absx = fabs(x); - - if( absx < 4503599627370496.0 /* 0x1.0p52f */ ) - { - cl_long rounded = x; - x = copysign( (double) rounded, x ); - } - - return x; -} - -float truncf(float x) -{ - float absx = fabsf(x); - - if( absx < 8388608.0f /* 0x1.0p23f */ ) - { - cl_int rounded = x; - x = copysignf( (float) rounded, x ); - } - - return x; -} - -long lround(double x) -{ - double absx = fabs(x); - - if( absx < 0.5 ) - return 0; - - if( absx < 4503599627370496.0 /* 0x1.0p52 */) - { - absx += 0.5; - cl_long rounded = absx; - absx = rounded; - x = copysign( absx, x ); - } - - if( x >= (double) LONG_MAX ) - return LONG_MAX; - - return (long) x; -} - -long lroundf(float x) -{ - float absx = fabsf(x); - - if( absx < 0.5f ) - return 0; - - if( absx < 8388608.0f ) - { - absx += 0.5f; - cl_int rounded = absx; - absx = rounded; - x = copysignf( absx, x ); - } - - if( x >= (float) LONG_MAX ) - return LONG_MAX; - - return (long) x; -} - -double round(double x) -{ - double absx = fabs(x); - - if( absx < 0.5 ) - return copysign( 0.0, x); - - if( absx < 4503599627370496.0 /* 0x1.0p52 */) - { - absx += 0.5; - cl_long rounded = absx; - absx = rounded; - x = copysign( absx, x ); - } - - return x; -} - -float roundf(float x) -{ - float absx = fabsf(x); - - if( absx < 0.5f ) - return copysignf( 0.0f, x); - - if( absx < 8388608.0f ) - { - absx += 0.5f; - cl_int rounded = absx; - absx = rounded; - x = copysignf( absx, x ); - } - - return x; -} - -long double roundl(long double x) -{ - long double absx = fabsl(x); - - if( absx < 0.5L ) - return copysignl( 0.0L, x); - - if( absx < 9223372036854775808.0L /*0x1.0p63L*/ ) - { - absx += 0.5L; - cl_ulong rounded = absx; - absx = rounded; - x = copysignl( absx, x ); - } - - return x; -} - -float cbrtf( float x ) -{ - float z = pow( fabs((double) x), 1.0 / 3.0 ); - return copysignf( z, x ); -} - -double cbrt( double x ) -{ - return copysign( pow( fabs( x ), 1.0 / 3.0 ), x ); -} - -long int lrint (double x) -{ - double absx = fabs(x); - - if( x >= (double) LONG_MAX ) - return LONG_MAX; - - if( absx < 4503599627370496.0 /* 0x1.0p52 */ ) - { - double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x ); - double rounded = x + magic; - rounded -= magic; - return (long int) rounded; - } - - return (long int) x; -} - -long int lrintf (float x) -{ - float absx = fabsf(x); - - if( x >= (float) LONG_MAX ) - return LONG_MAX; - - if( absx < 8388608.0f /* 0x1.0p23f */ ) - { - float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x ); - float rounded = x + magic; - rounded -= magic; - return (long int) rounded; - } - - return (long int) x; -} - - -/////////////////////////////////////////////////////////////////// -// -// fenv functions -// -/////////////////////////////////////////////////////////////////// -#if _MSC_VER < 1900 -int fetestexcept(int excepts) -{ - unsigned int status = _statusfp(); - return excepts & ( - ((status & _SW_INEXACT) ? FE_INEXACT : 0) | - ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) | - ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) | - ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) | - ((status & _SW_INVALID) ? FE_INVALID : 0) - ); -} - -int feclearexcept(int excepts) -{ - _clearfp(); - return 0; -} -#endif - -#endif // __INTEL_COMPILER - -#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 - -float make_nan() -{ -/* This is the IEEE 754 single-precision format: - unsigned int mantissa: 22; - unsigned int quiet_nan: 1; - unsigned int exponent: 8; - unsigned int negative: 1; -*/ - //const static unsigned - static const int32_t _nan = 0x7fc00000; - return *(const float*)(&_nan); -} - -float nanf( const char* str) -{ - cl_uint u = atoi( str ); - u |= 0x7fc00000U; - return *( float*)(&u); -} - - -double nan( const char* str) -{ - cl_ulong u = atoi( str ); - u |= 0x7ff8000000000000ULL; - return *( double*)(&u); -} - -// double check this implementatation -long double nanl( const char* str) -{ - union - { - long double f; - struct { cl_ulong m; cl_ushort sexp; }u; - }u; - u.u.sexp = 0x7fff; - u.u.m = 0x8000000000000000ULL | atoi( str ); - - return u.f; -} - -#endif - -/////////////////////////////////////////////////////////////////// -// -// misc functions -// -/////////////////////////////////////////////////////////////////// - -/* -// This function is commented out because the Windows implementation should never call munmap. -// If it is calling it, we have a bug. Please file a bugzilla. -int munmap(void *addr, size_t len) -{ -// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html - - return (int)VirtualAlloc( (LPVOID)addr, len, - MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS ); -} -*/ - -uint64_t ReadTime( void ) -{ - LARGE_INTEGER current; - QueryPerformanceCounter(¤t); - return (uint64_t)current.QuadPart; -} - -double SubtractTime( uint64_t endTime, uint64_t startTime ) -{ - static double PerformanceFrequency = 0.0; - - if (PerformanceFrequency == 0.0) { - LARGE_INTEGER frequency; - QueryPerformanceFrequency(&frequency); - PerformanceFrequency = (double) frequency.QuadPart; - } - - return (double)(endTime - startTime) / PerformanceFrequency * 1e9; -} - -int cf_signbit(double x) -{ - union - { - double f; - cl_ulong u; - }u; - u.f = x; - return u.u >> 63; -} - -int cf_signbitf(float x) -{ - union - { - float f; - cl_uint u; - }u; - u.f = x; - return u.u >> 31; -} - -float int2float (int32_t ix) -{ - union { - float f; - int32_t i; - } u; - u.i = ix; - return u.f; -} - -int32_t float2int (float fx) -{ - union { - float f; - int32_t i; - } u; - u.f = fx; - return u.i; -} - -#if !defined(_WIN64) -/** Returns the number of leading 0-bits in x, - starting at the most significant bit position. - If x is 0, the result is undefined. -*/ -int __builtin_clz(unsigned int pattern) -{ -#if 0 - int res; - __asm { - mov eax, pattern - bsr eax, eax - mov res, eax - } - return 31 - res; -#endif - unsigned long index; - unsigned char res = _BitScanReverse( &index, pattern); - if (res) { - return 8*sizeof(int) - 1 - index; - } else { - return 8*sizeof(int); - } -} -#else -int __builtin_clz(unsigned int pattern) -{ - int count; - if (pattern == 0u) { - return 32; - } - count = 31; - if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; } - if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; } - if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; } - if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; } - if (pattern >= 1u<<1) { count -= 1; } - return count; -} - -#endif // !defined(_WIN64) - -#include -#include - -int usleep(int usec) -{ - Sleep((usec + 999) / 1000); - return 0; -} - -unsigned int sleep( unsigned int sec ) -{ - Sleep( sec * 1000 ); - return 0; -} - -#endif // defined( _MSC_VER ) diff --git a/test_conformance/compatibility/test_common/harness/mt19937.c b/test_conformance/compatibility/test_common/harness/mt19937.c deleted file mode 100644 index 75b76a74..00000000 --- a/test_conformance/compatibility/test_common/harness/mt19937.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - A C-program for MT19937, with initialization improved 2002/1/26. - Coded by Takuji Nishimura and Makoto Matsumoto. - - Before using, initialize the state by using init_genrand(seed) - or init_by_array(init_key, key_length). - - Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. The names of its contributors may not be used to endorse or promote - products derived from this software without specific prior written - permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - - Any feedback is very welcome. - http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html - email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) - - Modifications for use in OpenCL by Ian Ollmann, Apple Inc. - -*/ - -#include -#include -#include "mt19937.h" -#include "mingw_compat.h" - -#ifdef __SSE2__ - #include -#endif - -static void * align_malloc(size_t size, size_t alignment) -{ -#if defined(_WIN32) && defined(_MSC_VER) - return _aligned_malloc(size, alignment); -#elif defined(__linux__) || defined (linux) || defined(__APPLE__) - void * ptr = NULL; - if (0 == posix_memalign(&ptr, alignment, size)) - return ptr; - return NULL; -#elif defined(__MINGW32__) - return __mingw_aligned_malloc(size, alignment); -#else - #error "Please add support OS for aligned malloc" -#endif -} - -static void align_free(void * ptr) -{ -#if defined(_WIN32) && defined(_MSC_VER) - _aligned_free(ptr); -#elif defined(__linux__) || defined (linux) || defined(__APPLE__) - return free(ptr); -#elif defined(__MINGW32__) - return __mingw_aligned_free(ptr); -#else - #error "Please add support OS for aligned free" -#endif -} - - -/* Period parameters */ -#define N 624 /* vector code requires multiple of 4 here */ -#define M 397 -#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */ -#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */ -#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */ - -typedef struct _MTdata -{ - cl_uint mt[N]; -#ifdef __SSE2__ - cl_uint cache[N]; -#endif - cl_int mti; -}_MTdata; - -/* initializes mt[N] with a seed */ -MTdata init_genrand(cl_uint s) -{ - MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 ); - if( NULL != r ) - { - cl_uint *mt = r->mt; - int mti = 0; - mt[0]= s; // & 0xffffffffUL; - for (mti=1; mti> 30)) + mti); - /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ - /* In the previous versions, MSBs of the seed affect */ - /* only MSBs of the array mt[]. */ - /* 2002/01/09 modified by Makoto Matsumoto */ - // mt[mti] &= 0xffffffffUL; - /* for >32 bit machines */ - } - r->mti = mti; - } - - return r; -} - -void free_mtdata( MTdata d ) -{ - if(d) - align_free(d); -} - -/* generates a random number on [0,0xffffffff]-interval */ -cl_uint genrand_int32( MTdata d) -{ - /* mag01[x] = x * MATRIX_A for x=0,1 */ - static const cl_uint mag01[2]={0x0UL, MATRIX_A}; -#ifdef __SSE2__ - static volatile int init = 0; - static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1; -#endif - - - cl_uint *mt = d->mt; - cl_uint y; - - if (d->mti == N) - { /* generate N words at one time */ - int kk; - -#ifdef __SSE2__ - if( 0 == init ) - { - upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK; - lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK; - one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1; - matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A; - c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL; - c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL; - init = 1; - } -#endif - - kk = 0; -#ifdef __SSE2__ - // vector loop - for( ; kk + 4 <= N-M; kk += 4 ) - { - __m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ), - _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK)) - - __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0 - __m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL] - __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1) - vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL] - _mm_store_si128( (__m128i*) (mt + kk ), vr ); - } -#endif - for ( ;kk> 1) ^ mag01[y & (cl_uint) 0x1UL]; - } - -#ifdef __SSE2__ - // advance to next aligned location - for (;kk> 1) ^ mag01[y & (cl_uint) 0x1UL]; - } - - // vector loop - for( ; kk + 4 <= N-1; kk += 4 ) - { - __m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ), - _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK)) - - __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0 - __m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL] - __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1) - vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL] - _mm_store_si128( (__m128i*) (mt + kk ), vr ); - } -#endif - - for (;kk> 1) ^ mag01[y & (cl_uint) 0x1UL]; - } - y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK)); - mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]; - -#ifdef __SSE2__ - // Do the tempering ahead of time in vector code - for( kk = 0; kk + 4 <= N; kk += 4 ) - { - __m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k]; - vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11); - vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL; - vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL; - vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18); - _mm_store_si128( (__m128i*)(d->cache+kk), vy ); - } -#endif - - d->mti = 0; - } -#ifdef __SSE2__ - y = d->cache[d->mti++]; -#else - y = mt[d->mti++]; - - /* Tempering */ - y ^= (y >> 11); - y ^= (y << 7) & (cl_uint) 0x9d2c5680UL; - y ^= (y << 15) & (cl_uint) 0xefc60000UL; - y ^= (y >> 18); -#endif - - - return y; -} - -cl_ulong genrand_int64( MTdata d) -{ - return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d); -} - -/* generates a random number on [0,1]-real-interval */ -double genrand_real1(MTdata d) -{ - return genrand_int32(d)*(1.0/4294967295.0); - /* divided by 2^32-1 */ -} - -/* generates a random number on [0,1)-real-interval */ -double genrand_real2(MTdata d) -{ - return genrand_int32(d)*(1.0/4294967296.0); - /* divided by 2^32 */ -} - -/* generates a random number on (0,1)-real-interval */ -double genrand_real3(MTdata d) -{ - return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0); - /* divided by 2^32 */ -} - -/* generates a random number on [0,1) with 53-bit resolution*/ -double genrand_res53(MTdata d) -{ - unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6; - return(a*67108864.0+b)*(1.0/9007199254740992.0); -} diff --git a/test_conformance/compatibility/test_common/harness/rounding_mode.c b/test_conformance/compatibility/test_common/harness/rounding_mode.c deleted file mode 100644 index f77da958..00000000 --- a/test_conformance/compatibility/test_common/harness/rounding_mode.c +++ /dev/null @@ -1,175 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "rounding_mode.h" - -#if !(defined(_WIN32) && defined(_MSC_VER)) -RoundingMode set_round( RoundingMode r, Type outType ) -{ - static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO }; - static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO }; - const int *p = int_rounds; - if( outType == kfloat || outType == kdouble ) - p = flt_rounds; - int oldRound = fegetround(); - fesetround( p[r] ); - - switch( oldRound ) - { - case FE_TONEAREST: - return kRoundToNearestEven; - case FE_UPWARD: - return kRoundUp; - case FE_DOWNWARD: - return kRoundDown; - case FE_TOWARDZERO: - return kRoundTowardZero; - default: - abort(); // ??! - } - return kDefaultRoundingMode; //never happens -} - -RoundingMode get_round( void ) -{ - int oldRound = fegetround(); - - switch( oldRound ) - { - case FE_TONEAREST: - return kRoundToNearestEven; - case FE_UPWARD: - return kRoundUp; - case FE_DOWNWARD: - return kRoundDown; - case FE_TOWARDZERO: - return kRoundTowardZero; - } - - return kDefaultRoundingMode; -} - -#else -RoundingMode set_round( RoundingMode r, Type outType ) -{ - static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP }; - static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP }; - const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds; - unsigned int oldRound; - - int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound - if (err) { - vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__); - return kDefaultRoundingMode; //what else never happens - } - - oldRound &= _MCW_RC; - - RoundingMode old = - (oldRound == _RC_NEAR)? kRoundToNearestEven : - (oldRound == _RC_UP)? kRoundUp : - (oldRound == _RC_DOWN)? kRoundDown : - (oldRound == _RC_CHOP)? kRoundTowardZero: - kDefaultRoundingMode; - - _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode - return old; //returning old rounding mode -} - -RoundingMode get_round( void ) -{ - unsigned int oldRound; - - int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound - oldRound &= _MCW_RC; - return - (oldRound == _RC_NEAR)? kRoundToNearestEven : - (oldRound == _RC_UP)? kRoundUp : - (oldRound == _RC_DOWN)? kRoundDown : - (oldRound == _RC_CHOP)? kRoundTowardZero: - kDefaultRoundingMode; -} - -#endif - -// -// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in -// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in -// software by testing against FLT_MIN or DBL_MIN in that file. -// -// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of -// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic -// operators do (e.g. add, subtract, multiply, divide, etc.) -// -// Configuring hardware to FTZ mode varies by platform. -// CAUTION: Some C implementations may also fail to behave properly in this mode. -// -// On PowerPC, it is done by setting the FPSCR into non-IEEE mode. -// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2 -// is used for floating point computation! If your OS uses x87, you'll need to figure out how -// to turn that off for the conversions code in basic_test_conversions.c so that they flush to -// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c -// in which case, these function are at liberty to do nothing. -// -#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32) - #include -#elif defined( __PPC__ ) - #include -#endif -void *FlushToZero( void ) -{ -#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32) - #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER) - union{ int i; void *p; }u = { _mm_getcsr() }; - _mm_setcsr( u.i | 0x8040 ); - return u.p; - #elif defined( __arm__ ) || defined(__aarch64__) - // processor is already in FTZ mode -- do nothing - return NULL; - #elif defined( __PPC__ ) - fpu_control_t flags = 0; - _FPU_GETCW(flags); - flags |= _FPU_MASK_NI; - _FPU_SETCW(flags); - return NULL; - #else - #error Unknown arch - #endif -#else - #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. -#endif -} - -// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p. -void UnFlushToZero( void *p) -{ -#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32) - #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER) - union{ void *p; int i; }u = { p }; - _mm_setcsr( u.i ); - #elif defined( __arm__ ) || defined(__aarch64__) - // processor is already in FTZ mode -- do nothing - #elif defined( __PPC__) - fpu_control_t flags = 0; - _FPU_GETCW(flags); - flags &= ~_FPU_MASK_NI; - _FPU_SETCW(flags); - #else - #error Unknown arch - #endif -#else - #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. -#endif -} diff --git a/test_conformance/compatibility/test_common/harness/rounding_mode.h b/test_conformance/compatibility/test_common/harness/rounding_mode.h deleted file mode 100644 index 837ec687..00000000 --- a/test_conformance/compatibility/test_common/harness/rounding_mode.h +++ /dev/null @@ -1,71 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef __ROUNDING_MODE_H__ -#define __ROUNDING_MODE_H__ - -#include "compat.h" - -#include - -#if (defined(_WIN32) && defined (_MSC_VER)) -#include "errorHelpers.h" -#include "testHarness.h" -#endif - -typedef enum -{ - kDefaultRoundingMode = 0, - kRoundToNearestEven, - kRoundUp, - kRoundDown, - kRoundTowardZero, - - kRoundingModeCount -}RoundingMode; - -typedef enum -{ - kuchar = 0, - kchar = 1, - kushort = 2, - kshort = 3, - kuint = 4, - kint = 5, - kfloat = 6, - kdouble = 7, - kulong = 8, - klong = 9, - - //This goes last - kTypeCount -}Type; - -#ifdef __cplusplus -extern "C" { -#endif - -extern RoundingMode set_round( RoundingMode r, Type outType ); -extern RoundingMode get_round( void ); -extern void *FlushToZero( void ); -extern void UnFlushToZero( void *p); - -#ifdef __cplusplus -} -#endif - - - -#endif /* __ROUNDING_MODE_H__ */ diff --git a/test_conformance/compatibility/test_common/harness/threadTesting.c b/test_conformance/compatibility/test_common/harness/threadTesting.c deleted file mode 100644 index 2f16dcca..00000000 --- a/test_conformance/compatibility/test_common/harness/threadTesting.c +++ /dev/null @@ -1,106 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "threadTesting.h" -#include "errorHelpers.h" -#include -#include - -#if !defined(_WIN32) -#include -#endif - -#include -#include - -#if !defined(_WIN32) -#include -#endif - -#if 0 // Disabed for now - -typedef struct -{ - basefn mFunction; - cl_device_id mDevice; - cl_context mContext; - int mNumElements; -} TestFnArgs; - -//////////////////////////////////////////////////////////////////////////////// -// Thread-based testing. Spawns a new thread to run the given test function, -// then waits for it to complete. The entire idea is that, if the thread crashes, -// we can catch it and report it as a failure instead of crashing the entire suite -//////////////////////////////////////////////////////////////////////////////// - -void *test_thread_wrapper( void *data ) -{ - TestFnArgs *args; - int retVal; - cl_context context; - - args = (TestFnArgs *)data; - - /* Create a new context to use (contexts can't cross threads) */ - context = clCreateContext(NULL, args->mDeviceGroup); - if( context == NULL ) - { - log_error("clCreateContext failed for new thread\n"); - return (void *)(-1); - } - - /* Call function */ - retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements ); - - clReleaseContext( context ); - - return (void *)retVal; -} - -int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements ) -{ - int error; - pthread_t threadHdl; - void *retVal; - TestFnArgs args; - - - args.mFunction = fnToTest; - args.mDeviceGroup = deviceGroup; - args.mDevice = device; - args.mContext = context; - args.mNumElements = numElements; - - - error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args ); - if( error != 0 ) - { - log_error( "ERROR: Unable to create thread for testing!\n" ); - return -1; - } - - /* Thread has been started, now just wait for it to complete (or crash) */ - error = pthread_join( threadHdl, &retVal ); - if( error != 0 ) - { - log_error( "ERROR: Unable to join testing thread!\n" ); - return -1; - } - - return (int)((intptr_t)retVal); -} -#endif - - diff --git a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt index 8dfa2f2e..76e43689 100644 --- a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt +++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt @@ -22,13 +22,13 @@ set(${MODULE_NAME}_SOURCES test_kernel_arg_info.c test_queue_properties.cpp ../../test_common/harness/errorHelpers.c - ../../test_common/harness/threadTesting.c + ../../../../test_common/harness/threadTesting.c ../../test_common/harness/testHarness.c ../../test_common/harness/kernelHelpers.c ../../../../test_common/harness/typeWrappers.cpp ../../../../test_common/harness/conversions.c - ../../test_common/harness/mt19937.c - ../../test_common/harness/msvc9.c + ../../../../test_common/harness/mt19937.c + ../../../../test_common/harness/msvc9.c ../../test_common/harness/imageHelpers.cpp ) diff --git a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt index 0893596e..3cbe0941 100644 --- a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt @@ -51,15 +51,15 @@ set(${MODULE_NAME}_SOURCES test_kernel_call_kernel_function.cpp test_local_kernel_scope.cpp ../../test_common/harness/errorHelpers.c - ../../test_common/harness/threadTesting.c + ../../../../test_common/harness/threadTesting.c ../../test_common/harness/testHarness.c ../../test_common/harness/kernelHelpers.c ../../../../test_common/harness/typeWrappers.cpp ../../test_common/harness/imageHelpers.cpp - ../../test_common/harness/mt19937.c + ../../../../test_common/harness/mt19937.c ../../../../test_common/harness/conversions.c - ../../test_common/harness/rounding_mode.c - ../../test_common/harness/msvc9.c + ../../../../test_common/harness/rounding_mode.c + ../../../../test_common/harness/msvc9.c ) include(../../../CMakeCommon.txt)