Remove almost duplicate compatibility common code

Use the non-compatibility version. In each case the diff was minimal, didn't have modifications that would invalidate compatibility testing and it was clear that the "latest/best" version was not the one in the compatibility copy. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
2026-03-19 06:09:01 +00:00 · 2019-08-07 11:24:11 +01:00
parent fba5b654e8
commit 4cb8fc49f8
12 changed files with 8 additions and 2966 deletions
--- a/test_common/harness/ThreadPool.c
+++ b/test_common/harness/ThreadPool.c
@@ -446,6 +446,7 @@ void ThreadPool_Init(void)
    // Check for manual override of multithreading code. We add this for better debuggability.
    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
    {
+        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
        gThreadCount = 1;
        return;
    }
--- a/test_conformance/compatibility/test_common/harness/ThreadPool.c
+++ b/test_conformance/compatibility/test_common/harness/ThreadPool.c
@@ -1,899 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "ThreadPool.h"
-#include "errorHelpers.h"
-#include "fpcontrol.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-#if  defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 )  // or any other POSIX system
-
-#if defined( _WIN32 )
-#include <windows.h>
-#if defined(_MSC_VER)
-#include <intrin.h>
-#endif
-#include "mingw_compat.h"
-#include <process.h>
-#else // !_WIN32
-#include <pthread.h>
-#include <unistd.h>
-#include <sys/errno.h>
-#endif // !_WIN32
-
-// declarations
-#ifdef  _WIN32
-void ThreadPool_WorkerFunc( void *p );
-#else
-void *ThreadPool_WorkerFunc( void *p );
-#endif
-void ThreadPool_Init(void);
-void ThreadPool_Exit(void);
-
-#if defined (__MINGW32__)
-    // Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
-    CRITICAL_SECTION     gAtomicLock;
-#elif defined( __GNUC__ ) || defined( _MSC_VER)
-#else
-    pthread_mutex_t     gAtomicLock;
-#endif
-
-// Atomic add operator with mem barrier.  Mem barrier needed to protect state modified by the worker functions.
-cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
-{
-#if defined (__MINGW32__)
-    // No atomics on Mingw32
-    EnterCriticalSection(&gAtomicLock);
-    cl_int old = *a;
-    *a = old + b;
-    LeaveCriticalSection(&gAtomicLock);
-    return old;
-#elif defined( __GNUC__ )
-    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
-    return __sync_fetch_and_add( a, b );
-    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether __sync_fetch_and_add does a synchronize
-#elif defined( _MSC_VER )
-    return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
-#else
-    #warning  Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
-    if( pthread_mutex_lock(&gAtomicLock) )
-        log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
-    cl_int old = *a;
-    *a = old + b;
-    if( pthread_mutex_unlock(&gAtomicLock) )
-        log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
-    return old;
-#endif
-}
-
-#if defined( _WIN32 )
-// Uncomment the following line if Windows XP support is not required.
-// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
-
-#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
-#define _INIT_ONCE           INIT_ONCE
-#define _PINIT_ONCE          PINIT_ONCE
-#define _InitOnceExecuteOnce InitOnceExecuteOnce
-#else // !HAS_INIT_ONCE_EXECUTE_ONCE
-
-typedef volatile LONG _INIT_ONCE;
-typedef _INIT_ONCE *_PINIT_ONCE;
-typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
-
-#define _INIT_ONCE_UNINITIALIZED 0
-#define _INIT_ONCE_IN_PROGRESS   1
-#define _INIT_ONCE_DONE          2
-
-static BOOL _InitOnceExecuteOnce(
-  _PINIT_ONCE InitOnce,
-  _PINIT_ONCE_FN InitFn,
-  PVOID Parameter,
-  LPVOID *Context
-)
-{
-    while ( *InitOnce != _INIT_ONCE_DONE )
-    {
-        if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
-        {
-            InitFn( InitOnce, Parameter, Context );
-            *InitOnce = _INIT_ONCE_DONE;
-            return TRUE;
-        }
-        Sleep( 1 );
-    }
-    return TRUE;
-}
-#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
-
-// Uncomment the following line if Windows XP support is not required.
-// #define HAS_CONDITION_VARIABLE 1
-
-#if defined(HAS_CONDITION_VARIABLE)
-#define _CONDITION_VARIABLE          CONDITION_VARIABLE
-#define _InitializeConditionVariable InitializeConditionVariable
-#define _SleepConditionVariableCS    SleepConditionVariableCS
-#define _WakeAllConditionVariable    WakeAllConditionVariable
-#else // !HAS_CONDITION_VARIABLE
-typedef struct
-{
-    HANDLE           mEvent; // Used to park the thread.
-    CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
-    volatile cl_int  mWaiters; // Number of threads waiting on this cond var.
-    volatile cl_int  mGeneration; // Wait generation count.
-    volatile cl_int  mReleaseCount; // Number of releases to execute before reseting the event.
-} _CONDITION_VARIABLE;
-
-typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
-
-static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
-{
-    cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
-    InitializeCriticalSection( cond_var->mLock );
-    cond_var->mWaiters = 0;
-    cond_var->mGeneration = 0;
-#if !defined ( NDEBUG )
-    cond_var->mReleaseCount = 0;
-#endif // !NDEBUG
-}
-
-static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
-{
-    EnterCriticalSection( cond_var->mLock );
-    cl_int generation = cond_var->mGeneration;
-    ++cond_var->mWaiters;
-    LeaveCriticalSection( cond_var->mLock );
-    LeaveCriticalSection( cond_lock );
-
-    while ( TRUE )
-    {
-        WaitForSingleObject( cond_var->mEvent, INFINITE );
-        EnterCriticalSection( cond_var->mLock );
-        BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
-        LeaveCriticalSection( cond_var->mLock );
-        if ( done )
-        {
-            break;
-        }
-    }
-
-    EnterCriticalSection( cond_lock );
-    EnterCriticalSection( cond_var->mLock );
-    if ( --cond_var->mReleaseCount == 0 )
-    {
-        ResetEvent( cond_var->mEvent );
-    }
-    --cond_var->mWaiters;
-    LeaveCriticalSection( cond_var->mLock );
-}
-
-static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
-{
-    EnterCriticalSection( cond_var->mLock );
-    if (cond_var->mWaiters > 0 )
-    {
-        ++cond_var->mGeneration;
-        cond_var->mReleaseCount = cond_var->mWaiters;
-        SetEvent( cond_var->mEvent );
-    }
-    LeaveCriticalSection( cond_var->mLock );
-}
-#endif // !HAS_CONDITION_VARIABLE
-#endif // _WIN32
-
-#define MAX_COUNT   (1<<29)
-
-// Global state to coordinate whether the threads have been launched successfully or not
-#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
-static _INIT_ONCE threadpool_init_control;
-#elif defined (_WIN32)  // MingW of XP
-static int threadpool_init_control;
-#else // Posix platforms
-pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
-#endif
-cl_int threadPoolInitErr = -1;          // set to CL_SUCCESS on successful thread launch
-
-// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do at a time,
-// because we are too lazy to set up a queue here, and don't expect to need one.
-#if defined( _WIN32 )
-CRITICAL_SECTION    gThreadPoolLock[1];
-#else // !_WIN32
-pthread_mutex_t     gThreadPoolLock;
-#endif // !_WIN32
-
-// Condition variable to park ThreadPool threads when not working
-#if defined( _WIN32 )
-CRITICAL_SECTION    cond_lock[1];
-_CONDITION_VARIABLE cond_var[1];
-#else // !_WIN32
-pthread_mutex_t     cond_lock;
-pthread_cond_t      cond_var;
-#endif // !_WIN32
-volatile cl_int     gRunCount = 0;              // Condition variable state. How many iterations on the function left to run.
-                                                // set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
-
-// State that only changes when the threadpool is not working.
-volatile TPFuncPtr  gFunc_ptr = NULL;
-volatile void       *gUserInfo = NULL;
-volatile cl_int     gJobCount = 0;
-
-// State that may change while the thread pool is working
-volatile cl_int     jobError = CL_SUCCESS;      // err code return for the job as a whole
-
-// Condition variable to park caller while waiting
-#if defined( _WIN32 )
-HANDLE              caller_event;
-#else // !_WIN32
-pthread_mutex_t     caller_cond_lock;
-pthread_cond_t      caller_cond_var;
-#endif // !_WIN32
-volatile cl_int     gRunning = 0;       // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
-
-// The total number of threads launched.
-volatile cl_int     gThreadCount = 0;
-#ifdef _WIN32
-void ThreadPool_WorkerFunc( void *p )
-#else
-void *ThreadPool_WorkerFunc( void *p )
-#endif
-{
-    cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
-    cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
-//    log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
-
-    while( MAX_COUNT > item )
-    {
-        cl_int err;
-
-        // check for more work to do
-        if( 0 >= item )
-        {
-//            log_info( "Thread %d has run out of work.\n", threadID );
-
-            // No work to do. Attempt to block waiting for work
-#if defined( _WIN32 )
-            EnterCriticalSection( cond_lock );
-#else // !_WIN32
-            if((err = pthread_mutex_lock( &cond_lock) ))
-            {
-                log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
-                goto exit;
-            }
-#endif // !_WIN32
-
-            cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
-//            log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
-            if( 1 == remaining )
-            { // last thread out signal the main thread to wake up
-#if defined( _WIN32 )
-                SetEvent( caller_event );
-#else // !_WIN32
-                if((err = pthread_mutex_lock( &caller_cond_lock) ))
-                {
-                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
-                    goto exit;
-                }
-                if( (err = pthread_cond_broadcast( &caller_cond_var )))
-                {
-                    log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
-                    goto exit;
-                }
-                if((err = pthread_mutex_unlock( &caller_cond_lock) ))
-                {
-                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
-                    goto exit;
-                }
-#endif // !_WIN32
-            }
-
-            // loop in case we are woken only to discover that some other thread already did all the work
-            while( 0 >= item )
-            {
-#if defined( _WIN32 )
-                _SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
-#else // !_WIN32
-                if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
-                {
-                    log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
-                    pthread_mutex_unlock( &cond_lock);
-                    goto exit;
-                }
-#endif // !_WIN32
-
-                // try again to get a valid item id
-                item = ThreadPool_AtomicAdd( &gRunCount, -1 );
-                if( MAX_COUNT <= item )  // exit if we are done
-                {
-#if defined( _WIN32 )
-                    LeaveCriticalSection( cond_lock );
-#else // !_WIN32
-                    pthread_mutex_unlock( &cond_lock);
-#endif // !_WIN32
-                    goto exit;
-                }
-            }
-
-            ThreadPool_AtomicAdd( &gRunning, 1 );
-//            log_info( "Thread %d has found work.\n", threadID);
-
-#if defined( _WIN32 )
-            LeaveCriticalSection( cond_lock );
-#else // !_WIN32
-            if((err = pthread_mutex_unlock( &cond_lock) ))
-            {
-                log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
-                goto exit;
-            }
-#endif // !_WIN32
-
-        }
-
-        // we have a valid item, so do the work
-        if( CL_SUCCESS == jobError )  // but only if we haven't already encountered an error
-        {
-//            log_info( "Thread %d doing job %d\n", threadID, item - 1);
-
-#if defined(__APPLE__) && defined(__arm__)
-            // On most platforms which support denorm, default is FTZ off. However,
-            // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
-            // This creates issues in result verification. Since spec allows the implementation to either flush or
-            // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
-            // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
-            // where reference is being computed to make sure we get non-flushed reference result. If implementation
-            // returns flushed result, we correctly take care of that in verification code.
-            FPU_mode_type oldMode;
-            DisableFTZ( &oldMode );
-#endif
-
-            // Call the user's function with this item ID
-            err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
-#if defined(__APPLE__) && defined(__arm__)
-            // Restore FP state
-            RestoreFPState( &oldMode );
-#endif
-
-            if( err )
-            {
-#if (__MINGW32__)
-                EnterCriticalSection(&gAtomicLock);
-                if( jobError == CL_SUCCESS );
-                    jobError = err;
-                gRunCount = 0;
-                LeaveCriticalSection(&gAtomicLock);
-#elif defined( __GNUC__ )
-                // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
-                // set the new error if we are the first one there.
-                __sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
-
-                // drop run count to 0
-                gRunCount = 0;
-                __sync_synchronize();
-#elif defined( _MSC_VER )
-                // set the new error if we are the first one there.
-                _InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
-
-                // drop run count to 0
-                gRunCount = 0;
-                _mm_mfence();
-#else
-                if( pthread_mutex_lock(&gAtomicLock) )
-                    log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
-                if( jobError == CL_SUCCESS );
-                    jobError = err;
-                gRunCount = 0;
-                if( pthread_mutex_unlock(&gAtomicLock) )
-                    log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
-#endif
-            }
-        }
-
-        // get the next item
-        item = ThreadPool_AtomicAdd( &gRunCount, -1 );
-    }
-
-exit:
-    log_info( "ThreadPool: thread %d exiting.\n", threadID );
-    ThreadPool_AtomicAdd( &gThreadCount, -1 );
-#if !defined(_WIN32)
-    return NULL;
-#endif
-}
-
-// SetThreadCount() may be used to artifically set the number of worker threads
-// If the value is 0 (the default) the number of threads will be determined based on
-// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
-// that we still get some testing for thread safety.
-//
-// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
-// code will run single threaded, but will report an error to indicate that the test
-// is invalid.  This option is intended for debugging purposes only. It is suggested
-// as a convention that test apps set the thread count to 1 in response to the -m flag.
-//
-// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
-// otherwise the behavior is indefined.
-void        SetThreadCount( int count )
-{
-    if( threadPoolInitErr == CL_SUCCESS )
-    {
-        log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
-        abort();
-    }
-
-    gThreadCount = count;
-}
-
-void ThreadPool_Init(void)
-{
-    cl_int i;
-    int err;
-    volatile cl_uint threadID = 0;
-
-    // Check for manual override of multithreading code. We add this for better debuggability.
-    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
-    {
-        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
-        gThreadCount = 1;
-        return;
-    }
-
-    // Figure out how many threads to run -- check first for non-zero to give the implementation the chance
-    if( 0 == gThreadCount )
-    {
-#if defined(_MSC_VER) || defined (__MINGW64__)
-        PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
-        DWORD length = 0;
-
-        GetLogicalProcessorInformation( NULL, &length );
-        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
-        if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
-        {
-            PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
-            while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
-            {
-                if( ptr->Relationship == RelationProcessorCore )
-                {
-                    // Count the number of bits in ProcessorMask (number of logical cores)
-                    ULONG mask = ptr->ProcessorMask;
-                    while( mask )
-                    {
-                        ++gThreadCount;
-                        mask &= mask - 1; // Remove 1 bit at a time
-                    }
-                }
-                ++ptr;
-            }
-            free(buffer);
-        }
-#elif defined (__MINGW32__)
-        {
-            #warning  How about this, instead of hard coding it to 2?
-            SYSTEM_INFO sysinfo;
-            GetSystemInfo( &sysinfo );
-            gThreadCount = sysinfo.dwNumberOfProcessors;
-        }
-#else // !_WIN32
-        gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
-#endif // !_WIN32
-
-        // Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
-        if( 1 == gThreadCount )
-            gThreadCount = 2;
-    }
-
-    //Allow the app to set thread count to <0 for debugging purposes.  This will cause the test to run single threaded.
-    if( gThreadCount < 2 )
-    {
-        log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
-        gThreadCount = 1;
-        return;
-    }
-
-#if defined( _WIN32 )
-    InitializeCriticalSection( gThreadPoolLock );
-    InitializeCriticalSection( cond_lock );
-    _InitializeConditionVariable( cond_var );
-    caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
-#elif defined (__GNUC__)
-    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
-    // with some flavors of gcc compilers.
-    pthread_cond_init(&cond_var, NULL);
-    pthread_mutex_init(&cond_lock ,NULL);
-    pthread_cond_init(&caller_cond_var, NULL);
-    pthread_mutex_init(&caller_cond_lock, NULL);
-    pthread_mutex_init(&gThreadPoolLock, NULL);
-#endif
-
-#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
-    pthread_mutex_initialize(gAtomicLock);
-#elif defined (__MINGW32__)
-    InitializeCriticalSection(&gAtomicLock);
-#endif
-    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
-    //  That would cause a deadlock.
-#if !defined( _WIN32 )
-    if((err = pthread_mutex_lock( &caller_cond_lock) ))
-    {
-        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
-        gThreadCount = 1;
-        return;
-    }
-#endif // !_WIN32
-
-    gRunning = gThreadCount;
-    // init threads
-    for( i = 0; i < gThreadCount; i++ )
-    {
-#if defined( _WIN32 )
-        uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
-        err = ( handle == 0 );
-#else // !_WIN32
-        pthread_t tid = 0;
-        err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
-#endif // !_WIN32
-        if( err )
-        {
-            log_error( "Error %d launching thread %d\n", err, i );
-            threadPoolInitErr = err;
-            gThreadCount = i;
-            break;
-        }
-    }
-
-    atexit( ThreadPool_Exit );
-
-// block until they are done launching.
-    do
-    {
-#if defined( _WIN32 )
-        WaitForSingleObject( caller_event, INFINITE );
-#else // !_WIN32
-        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
-        {
-            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
-            pthread_mutex_unlock( &caller_cond_lock);
-            return;
-        }
-#endif // !_WIN32
-    }
-    while( gRunCount != -gThreadCount );
-#if !defined( _WIN32 )
-    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
-    {
-        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
-        return;
-    }
-#endif // !_WIN32
-
-    threadPoolInitErr = CL_SUCCESS;
-}
-
-#if defined(_MSC_VER)
-static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
-{
-    ThreadPool_Init();
-    return TRUE;
-}
-#endif
-
-void ThreadPool_Exit(void)
-{
-    int err, count;
-    gRunCount = CL_INT_MAX;
-
-#if defined( __GNUC__ )
-    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
-    __sync_synchronize();
-#elif defined( _MSC_VER )
-    _mm_mfence();
-#else
-    #warning   If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
-#endif
-
-    // spin waiting for threads to die
-    for (count = 0; 0 != gThreadCount && count < 1000; count++)
-    {
-#if defined( _WIN32 )
-        _WakeAllConditionVariable( cond_var );
-        Sleep(1);
-#else // !_WIN32
-        if( (err = pthread_cond_broadcast( &cond_var )))
-        {
-            log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
-            break;
-        }
-        usleep(1000);
-#endif // !_WIN32
-    }
-
-    if( gThreadCount )
-        log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
-    else
-        log_info( "Thread pool exited in a orderly fashion.\n" );
-}
-
-
-// Blocking API that farms out count jobs to a thread pool.
-// It may return with some work undone if func_ptr() returns a non-zero
-// result.
-//
-// This function obviously has its shortcommings. Only one call to ThreadPool_Do
-// can be running at a time. It is not intended for general purpose use.
-// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
-// all available then it would make more sense to use those features.
-cl_int ThreadPool_Do( TPFuncPtr func_ptr,
-                      cl_uint count,
-                      void *userInfo )
-{
-    cl_int newErr;
-    cl_int err = 0;
-    // Lazily set up our threads
-#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
-    err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
-#elif defined (_WIN32)
-    if (threadpool_init_control == 0) {
-    #warning  This is buggy and race prone.  Find a better way.
-        ThreadPool_Init();
-        threadpool_init_control = 1;
-    }
-#else //posix platform
-    err = pthread_once( &threadpool_init_control, ThreadPool_Init );
-    if( err )
-    {
-        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
-        return err;
-    }
-#endif
-    // Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
-    if( threadPoolInitErr )
-    {
-        cl_uint currentJob = 0;
-        cl_int  result = CL_SUCCESS;
-
-#if defined(__APPLE__) && defined(__arm__)
-        // On most platforms which support denorm, default is FTZ off. However,
-        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
-        // This creates issues in result verification. Since spec allows the implementation to either flush or
-        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
-        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
-        // where reference is being computed to make sure we get non-flushed reference result. If implementation
-        // returns flushed result, we correctly take care of that in verification code.
-        FPU_mode_type oldMode;
-        DisableFTZ( &oldMode );
-#endif
-        for( currentJob = 0; currentJob < count; currentJob++ )
-            if((result = func_ptr( currentJob, 0, userInfo )))
-            {
-#if defined(__APPLE__) && defined(__arm__)
-                // Restore FP state before leaving
-                RestoreFPState( &oldMode );
-#endif
-                return result;
-            }
-
-#if defined(__APPLE__) && defined(__arm__)
-        // Restore FP state before leaving
-        RestoreFPState( &oldMode );
-#endif
-
-        return CL_SUCCESS;
-    }
-
-    if( count >= MAX_COUNT )
-    {
-        log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
-        return -1;
-    }
-
-    // Enter critical region
-#if defined( _WIN32 )
-    EnterCriticalSection( gThreadPoolLock );
-#else // !_WIN32
-    if( (err = pthread_mutex_lock( &gThreadPoolLock )))
-    {
-        switch (err)
-        {
-            case EDEADLK:
-                log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
-                break;
-            case EINVAL:
-                log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
-                break;
-            default:
-                break;
-        }
-        return err;
-    }
-#endif // !_WIN32
-
-    // Start modifying the job state observable by worker threads
-#if defined( _WIN32 )
-    EnterCriticalSection( cond_lock );
-#else // !_WIN32
-    if((err = pthread_mutex_lock( &cond_lock) ))
-    {
-        log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
-        goto exit;
-    }
-#endif // !_WIN32
-
-    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
-    //  That would cause a deadlock.
-#if !defined( _WIN32 )
-    if((err = pthread_mutex_lock( &caller_cond_lock) ))
-    {
-        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
-        goto exit;
-    }
-#endif // !_WIN32
-
-    // Prime the worker threads to get going
-    jobError = CL_SUCCESS;
-    gRunCount = gJobCount = count;
-    gFunc_ptr = func_ptr;
-    gUserInfo = userInfo;
-
-#if defined( _WIN32 )
-    _WakeAllConditionVariable( cond_var );
-    LeaveCriticalSection( cond_lock );
-#else // !_WIN32
-    if( (err = pthread_cond_broadcast( &cond_var )))
-    {
-        log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
-        goto exit;
-    }
-    if((err = pthread_mutex_unlock( &cond_lock) ))
-    {
-        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
-        goto exit;
-    }
-#endif // !_WIN32
-
-// block until they are done.  It would be slightly more efficient to do some of the work here though.
-    do
-    {
-#if defined( _WIN32 )
-        WaitForSingleObject( caller_event, INFINITE );
-#else // !_WIN32
-        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
-        {
-            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
-            pthread_mutex_unlock( &caller_cond_lock);
-            goto exit;
-        }
-#endif // !_WIN32
-    }
-    while( gRunning );
-#if !defined(_WIN32)
-    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
-    {
-        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
-        goto exit;
-    }
-#endif // !_WIN32
-
-    err = jobError;
-
-exit:
-    // exit critical region
-#if defined( _WIN32 )
-    LeaveCriticalSection( gThreadPoolLock );
-#else // !_WIN32
-    newErr = pthread_mutex_unlock( &gThreadPoolLock );
-    if( newErr)
-    {
-        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
-        return err;
-    }
-#endif // !_WIN32
-
-    return err;
-}
-
-cl_uint GetThreadCount( void )
-{
-    // Lazily set up our threads
-#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
-    cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
-#elif defined (_WIN32)
-    if (threadpool_init_control == 0) {
-    #warning  This is buggy and race prone.  Find a better way.
-        ThreadPool_Init();
-        threadpool_init_control = 1;
-    }
-#else
-    cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
-    if( err )
-    {
-        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
-        return err;
-    }
-#endif // !_WIN32
-
-    if( gThreadCount < 1 )
-        return 1;
-
-    return gThreadCount;
-}
-
-#else
-
-#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
-    #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
-#endif
-//
-// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
-// of OpenCL API, while also checking
-//
-// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
-// It is not okay to use this for conformance testing!!!
-//
-// Exception:  If your operating system does not support multithreaded execution of any kind, then you may use this code.
-//
-
-cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
-{
-    cl_uint r = *a;
-
-    // since this fallback code path is not multithreaded, we just do a regular add here
-    // If your operating system supports memory-barrier-atomics, use those here
-    *a = r + b;
-
-    return r;
-}
-
-// Blocking API that farms out count jobs to a thread pool.
-// It may return with some work undone if func_ptr() returns a non-zero
-// result.
-cl_int ThreadPool_Do(   TPFuncPtr func_ptr,
-                        cl_uint count,
-                        void *userInfo )
-{
-    cl_uint currentJob = 0;
-    cl_int  result = CL_SUCCESS;
-
-#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
-    // THIS FUNCTION IS NOT INTENDED FOR USE!!
-    log_error( "ERROR:  Test must be multithreaded!\n" );
-    exit(-1);
-#else
-    static int spewCount = 0;
-
-    if( 0 == spewCount )
-    {
-        log_info( "\nWARNING:  The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
-        spewCount = 1;
-    }
-#endif
-
-// The multithreaded code should mimic this behavior:
-    for( currentJob = 0; currentJob < count; currentJob++ )
-        if((result = func_ptr( currentJob, 0, userInfo )))
-            return result;
-
-    return CL_SUCCESS;
-}
-
-cl_uint GetThreadCount( void )
-{
-    return 1;
-}
-
-void SetThreadCount( int count )
-{
-    if( count > 1 )
-        log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
-}
-
-#endif
--- a/test_conformance/compatibility/test_common/harness/compat.h
+++ b/test_conformance/compatibility/test_common/harness/compat.h
@@ -1,393 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-/*
-    Header compat.h should be used instead of stdlib.h, stdbool.h, stdint.h, float.h, fenv.h,
-    math.h. It provides workarounds if these headers are not available or not complete.
-
-    Important: It should be included before math.h, directly or indirectly, because Intel mathimf.h
-    is not compatible with Microsoft math.h. Including math.h before mathimf.h causes compile-time
-    error.
-*/
-#ifndef _COMPAT_H_
-#define _COMPAT_H_
-
-#if defined(_WIN32) && defined (_MSC_VER)
-#include <Windows.h>
-#endif
-
-#ifdef __cplusplus
-    #define EXTERN_C extern "C"
-#else
-    #define EXTERN_C
-#endif
-
-
-//
-// stdlib.h
-//
-
-#include <stdlib.h>     // On Windows, _MAX_PATH defined there.
-
-// llabs appeared in MS C v16 (VS 10/2010).
-#if defined( _MSC_VER ) && _MSC_VER <= 1500
-    EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
-#endif
-
-
-//
-// stdbool.h
-//
-
-// stdbool.h appeared in MS C v18 (VS 12/2013).
-#if defined( _MSC_VER ) && MSC_VER <= 1700
-#if !defined(__cplusplus)
-typedef char bool;
-        #define true  1
-        #define false 0
-    #endif
-#else
-    #include <stdbool.h>
-#endif
-
-
-
-//
-// stdint.h
-//
-
-// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
-#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
-typedef unsigned char       uint8_t;
-typedef char                int8_t;
-typedef unsigned short      uint16_t;
-typedef short               int16_t;
-typedef unsigned int        uint32_t;
-typedef int                 int32_t;
-typedef unsigned long long  uint64_t;
-typedef long long           int64_t;
-#else
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS
-#endif
-    #include <stdint.h>
-#endif
-
-
-
-//
-// float.h
-//
-
-#include <float.h>
-
-
-
-//
-// fenv.h
-//
-
-// fenv.h appeared in MS C v18 (VS 12/2013).
-#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
-    // reimplement fenv.h because windows doesn't have it
-    #define FE_INEXACT          0x0020
-    #define FE_UNDERFLOW        0x0010
-    #define FE_OVERFLOW         0x0008
-    #define FE_DIVBYZERO        0x0004
-    #define FE_INVALID          0x0001
-    #define FE_ALL_EXCEPT       0x003D
-    int fetestexcept(int excepts);
-    int feclearexcept(int excepts);
-#else
-    #include <fenv.h>
-#endif
-
-
-//
-// math.h
-//
-
-#if defined( __INTEL_COMPILER )
-    #include <mathimf.h>
-#else
-    #include <math.h>
-#endif
-
-#if defined( _MSC_VER )
-
-    #ifdef __cplusplus
-        extern "C" {
-    #endif
-
-#ifndef M_PI
-    #define M_PI    3.14159265358979323846264338327950288
-#endif
-
-    #if ! defined( __INTEL_COMPILER )
-
-        #ifndef NAN
-            #define NAN  (INFINITY - INFINITY)
-        #endif
-        #ifndef HUGE_VALF
-            #define HUGE_VALF (float)HUGE_VAL
-        #endif
-        #ifndef INFINITY
-            #define INFINITY    (FLT_MAX + FLT_MAX)
-        #endif
-        #ifndef isfinite
-            #define isfinite(x) _finite(x)
-        #endif
-        #ifndef isnan
-#define    isnan( x )       ((x) != (x))
-        #endif
-        #ifndef isinf
-#define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
-        #endif
-
-double rint( double x);
-float  rintf( float x);
-long double rintl( long double x);
-
-float cbrtf( float );
-double cbrt( double );
-
-int    ilogb( double x);
-int    ilogbf (float x);
-int    ilogbl(long double x);
-
-double fmax(double x, double y);
-double fmin(double x, double y);
-float  fmaxf( float x, float y );
-float  fminf(float x, float y);
-
-double      log2(double x);
-long double log2l(long double x);
-
-double      exp2(double x);
-long double exp2l(long double x);
-
-double      fdim(double x, double y);
-float       fdimf(float x, float y);
-long double fdiml(long double x, long double y);
-
-double      remquo( double x, double y, int *quo);
-float       remquof( float x, float y, int *quo);
-long double remquol( long double x, long double y, int *quo);
-
-long double scalblnl(long double x, long n);
-
-// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
-#if defined( _MSC_VER ) && _MSC_VER < 1900
-	#define snprintf   sprintf_s
-#endif
-float hypotf(float x, float y);
-long double hypotl(long double x, long double y) ;
-double lgamma(double x);
-float  lgammaf(float x);
-
-double trunc(double x);
-float  truncf(float x);
-
-double log1p(double x);
-float  log1pf(float x);
-long double log1pl(long double x);
-
-double copysign(double x, double y);
-float  copysignf(float x, float y);
-long double copysignl(long double x, long double y);
-
-long lround(double x);
-long lroundf(float x);
-//long lroundl(long double x)
-
-double round(double x);
-float  roundf(float x);
-long double roundl(long double x);
-
-        int cf_signbit(double x);
-        int cf_signbitf(float x);
-
-// Added in _MSC_VER == 1800 (Visual Studio 2013)
-#if defined( _MSC_VER ) && _MSC_VER < 1800
-        static int signbit(double x) { return  cf_signbit(x); }
-#endif
-        static int signbitf(float x) { return cf_signbitf(x); }
-
-long int lrint (double flt);
-long int lrintf (float flt);
-
-float   int2float (int32_t ix);
-int32_t float2int (float   fx);
-
-    #endif
-
-    #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
-        // These functions appeared in Intel C v13.
-        float  nanf( const char* str);
-        double nan( const char* str);
-        long double nanl( const char* str);
-    #endif
-
-    #ifdef __cplusplus
-        }
-    #endif
-
-#endif
-
-#if defined( __ANDROID__ )
-    #define log2(X)  (log(X)/log(2))
-#endif
-
-
-
-//
-// stdio.h
-//
-
-
-
-//
-// unistd.h
-//
-
-#if defined( _MSC_VER )
-    EXTERN_C unsigned int sleep( unsigned int sec );
-    EXTERN_C int usleep( int usec );
-#endif
-
-
-
-//
-// syscall.h
-//
-
-#if defined( __ANDROID__ )
-    // Android bionic's isn't providing SYS_sysctl wrappers.
-    #define SYS__sysctl  __NR__sysctl
-#endif
-
-
-
-// Some tests use _malloca which defined in malloc.h.
-#if !defined (__APPLE__)
-#include <malloc.h>
-#endif
-
-
-//
-// ???
-//
-
-#if defined( _MSC_VER )
-
-    #define MAXPATHLEN _MAX_PATH
-
-    EXTERN_C uint64_t ReadTime( void );
-    EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
-
-/** Returns the number of leading 0-bits in x,
-    starting at the most significant bit position.
-    If x is 0, the result is undefined.
-*/
-    EXTERN_C int __builtin_clz(unsigned int pattern);
-
-#endif
-
-#ifndef MIN
-    #define MIN(x,y) (((x)<(y))?(x):(y))
-#endif
-#ifndef MAX
-    #define MAX(x,y) (((x)>(y))?(x):(y))
-#endif
-
-
-/*
-    ------------------------------------------------------------------------------------------------
-    WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
-
-    This is a typical usage of the macros:
-
-        double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
-
-     (taken from math_brute_force/reference_math.c). There are two problems:
-
-        1.  There is an error here. On Windows in will produce incorrect result
-            `0x1.5555555555555p+50'. To have a correct result it should be written as
-            `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
-            third argument is not obvious -- sometimes it should be the same as exponent of the
-            first argument, but sometimes not.
-
-        2.  Information is duplicated. It is easy to make a mistake.
-
-    Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
-    ------------------------------------------------------------------------------------------------
-*/
-#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
-
-    #define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
-    #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
-    #define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
-
-#else
-
-// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
-#define MAKE_HEX_FLOAT(x,y,z) x
-#define MAKE_HEX_DOUBLE(x,y,z) x
-#define MAKE_HEX_LONG(x,y,z) x
-
-#endif
-
-
-/*
-    ------------------------------------------------------------------------------------------------
-    HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
-    double respectively. Arguments:
-
-        sm    -- sign of number,
-        int   -- integer part of mantissa (without `0x' prefix),
-        fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
-        se    -- sign of exponent,
-        exp   -- absolute value of (binary) exponent.
-
-    Example:
-
-        double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
-
-    Note:
-
-        We have to pass signs as separate arguments because gcc pass negative integer values
-        (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
-        `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
-        literal.
-    ------------------------------------------------------------------------------------------------
-*/
-#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
-    // If compiler does not support hex floating point literals:
-    #define HEX_FLT(  sm, int, fract, se, exp ) sm ldexpf(       (float)( 0x ## int ## fract ## UL  ), se exp + ilogbf(       (float) 0x ## int ) - ilogbf(       ( float )( 0x ## int ## fract ## UL  ) ) )
-    #define HEX_DBL(  sm, int, fract, se, exp ) sm ldexp(       (double)( 0x ## int ## fract ## ULL ), se exp + ilogb(       (double) 0x ## int ) - ilogb(       ( double )( 0x ## int ## fract ## ULL ) ) )
-    #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
-#else
-    // If compiler supports hex floating point literals: just concatenate all the parts into a literal.
-    #define HEX_FLT(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
-    #define HEX_DBL(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
-    #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
-#endif
-
-#if defined(__MINGW32__)
-    #include <Windows.h>
-    #define sleep(sec) Sleep((sec) * 1000)
-#endif
-
-#endif // _COMPAT_H_
--- a/test_conformance/compatibility/test_common/harness/errorHelpers.h
+++ b/test_conformance/compatibility/test_common/harness/errorHelpers.h
@@ -1,164 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef _errorHelpers_h
-#define _errorHelpers_h
-
-#include <sstream>
-
-#ifdef __APPLE__
-#include <OpenCL/opencl.h>
-#else
-#include <CL/opencl.h>
-#endif
-#include <stdlib.h>
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define LOWER_IS_BETTER     0
-#define HIGHER_IS_BETTER    1
-
-// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
-// functions as described below. This is helpful for integration into an automated testing
-// system.
-#if USE_ATF
-// export BUILD_WITH_ATF=1
-    #include <ATF/ATF.h>
-    #define test_start() ATFTestStart()
-    #define log_info ATFLogInfo
-    #define log_error ATFLogError
-    #define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
-    #define test_finish() ATFTestFinish()
-    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
-    #define vlog ATFLogInfo
-    #define vlog_error ATFLogError
-#else
-    #define test_start()
-    #define log_info printf
-    #define log_error printf
-    #define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,        \
-                        _higherBetter?"higher is better":"lower is better", _number )
-    #define test_finish()
-    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,    \
-                        _higherBetter?"higher is better":"lower is better" , _number)
-    #ifdef _WIN32
-        #ifdef __MINGW32__
-            // Use __mingw_printf since it supports "%a" format specifier
-            #define vlog __mingw_printf
-            #define vlog_error __mingw_printf
-        #else
-            // Use home-baked function that treats "%a" as "%f"
-        static int vlog_win32(const char *format, ...);
-        #define vlog vlog_win32
-        #define vlog_error vlog_win32
-        #endif
-    #else
-        #define vlog_error printf
-        #define vlog printf
-    #endif
-#endif
-
-#define ct_assert(b)          ct_assert_i(b, __LINE__)
-#define ct_assert_i(b, line)  ct_assert_ii(b, line)
-#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
-
-#define test_error(errCode,msg)    test_error_ret(errCode,msg,errCode)
-#define test_error_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
-#define print_error(errCode,msg)    log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
-
-// expected error code vs. what we got
-#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
-#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
-#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
-#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
-#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
-#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
-
-#define ASSERT_SUCCESS(expr, msg)                                                                  \
-    do                                                                                             \
-    {                                                                                              \
-        cl_int _temp_retval = (expr);                                                              \
-        if (_temp_retval != CL_SUCCESS)                                                            \
-        {                                                                                          \
-            std::stringstream ss;                                                                  \
-            ss << "ERROR: " << msg << "=" << IGetErrorString(_temp_retval)                         \
-               << " at " << __FILE__ << ":" << __LINE__ << "\n";                                   \
-            throw std::runtime_error(ss.str());                                                    \
-        }                                                                                          \
-    } while (0)
-
-extern const char    *IGetErrorString( int clErrorCode );
-
-extern float Ulp_Error_Half( cl_ushort test, float reference );
-extern float Ulp_Error( float test, double reference );
-extern float Ulp_Error_Double( double test, long double reference );
-
-extern const char *GetChannelTypeName( cl_channel_type type );
-extern int IsChannelTypeSupported( cl_channel_type type );
-extern const char *GetChannelOrderName( cl_channel_order order );
-extern int IsChannelOrderSupported( cl_channel_order order );
-extern const char *GetAddressModeName( cl_addressing_mode mode );
-
-extern const char *GetDeviceTypeName( cl_device_type type );
-
-// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
-extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
-
-#if defined (_WIN32) && !defined(__MINGW32__)
-#include <stdarg.h>
-#include <stdio.h>
-#include <string.h>
-static int vlog_win32(const char *format, ...)
-{
-    const char *new_format = format;
-
-    if (strstr(format, "%a")) {
-        char *temp;
-        if ((temp = strdup(format)) == NULL) {
-            printf("vlog_win32: Failed to allocate memory for strdup\n");
-            return -1;
-        }
-        new_format = temp;
-        while (*temp) {
-            // replace %a with %f
-            if ((*temp == '%') && (*(temp+1) == 'a')) {
-                *(temp+1) = 'f';
-            }
-            temp++;
-        }
-    }
-
-    va_list args;
-    va_start(args, format);
-    vprintf(new_format, args);
-    va_end(args);
-
-    if (new_format != format) {
-        free((void*)new_format);
-    }
-
-    return 0;
-}
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // _errorHelpers_h
-
-
--- a/test_conformance/compatibility/test_common/harness/fpcontrol.h
+++ b/test_conformance/compatibility/test_common/harness/fpcontrol.h
@@ -1,104 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef _fpcontrol_h
-#define _fpcontrol_h
-
-// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
-// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
-// in integer code, and have found this is the only way to correctly verify operation.
-//
-// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
-// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
-// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
-#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
-    typedef int     FPU_mode_type;
-#if defined( __i386__ ) || defined( __x86_64__ )
-    #include <xmmintrin.h>
-#elif defined( __PPC__ )
-    #include <fpu_control.h>
-    extern __thread fpu_control_t fpu_control;
-#endif
-    // Set the reference hardware floating point unit to FTZ mode
-    static inline void ForceFTZ( FPU_mode_type *mode )
-    {
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
-        *mode = _mm_getcsr();
-        _mm_setcsr( *mode | 0x8040);
-#elif defined( __PPC__ )
-        *mode = fpu_control;
-        fpu_control |= _FPU_MASK_NI;
-#elif defined ( __arm__ )
-        unsigned fpscr;
-        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
-        *mode = fpscr;
-        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
-        // Add 64 bit support
-#elif defined (__aarch64__)
-        unsigned fpscr;
-        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
-        *mode = fpscr;
-        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
-#else
-        #error ForceFTZ needs an implentation
-#endif
-    }
-
-    // Disable the denorm flush to zero
-    static inline void DisableFTZ( FPU_mode_type *mode )
-    {
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
-        *mode = _mm_getcsr();
-        _mm_setcsr( *mode & ~0x8040);
-#elif defined( __PPC__ )
-        *mode = fpu_control;
-        fpu_control &= ~_FPU_MASK_NI;
-#elif defined ( __arm__ )
-        unsigned fpscr;
-        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
-        *mode = fpscr;
-        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
-        // Add 64 bit support
-#elif defined (__aarch64__)
-        unsigned fpscr;
-        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
-        *mode = fpscr;
-        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
-#else
-#error DisableFTZ needs an implentation
-#endif
-    }
-
-    // Restore the reference hardware to floating point state indicated by *mode
-    static inline void RestoreFPState( FPU_mode_type *mode )
-    {
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
-        _mm_setcsr( *mode );
-#elif defined( __PPC__)
-        fpu_control = *mode;
-#elif defined (__arm__)
-        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
-        // Add 64 bit support
-#elif defined (__aarch64__)
-        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
-#else
-        #error RestoreFPState needs an implementation
-#endif
-    }
-#else
-        #error ForceFTZ and RestoreFPState need implentations
-#endif
-
-#endif
--- a/test_conformance/compatibility/test_common/harness/msvc9.c
+++ b/test_conformance/compatibility/test_common/harness/msvc9.c
@@ -1,773 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "compat.h"
-
-#if defined ( _MSC_VER )
-
-#include <limits.h>
-#include <stdlib.h>
-
-#include <CL/cl.h>
-
-#include <windows.h>
-
-#if ! defined( __INTEL_COMPILER )
-
-///////////////////////////////////////////////////////////////////
-//
-//                   rint, rintf
-//
-///////////////////////////////////////////////////////////////////
-
-float copysignf( float x, float y )
-{
-    union{ cl_uint u; float f; }ux, uy;
-
-    ux.f = x;
-    uy.f = y;
-
-    ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
-
-    return ux.f;
-}
-
-double copysign( double x, double y )
-{
-    union{ cl_ulong u; double f; }ux, uy;
-
-    ux.f = x;
-    uy.f = y;
-
-    ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
-
-    return ux.f;
-}
-
-long double copysignl( long double x, long double y )
-{
-    union
-    {
-        long double f;
-        struct{ cl_ulong m; cl_ushort sexp; }u;
-    }ux, uy;
-
-    ux.f = x;
-    uy.f = y;
-
-    ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
-
-    return ux.f;
-}
-
-float rintf(float x)
-{
-    float absx = fabsf(x);
-
-    if( absx < 8388608.0f /* 0x1.0p23f */ )
-    {
-        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
-        float rounded = x + magic;
-        rounded -= magic;
-        x = copysignf( rounded, x );
-    }
-
-    return x;
-}
-
-double rint(double x)
-{
-    double absx = fabs(x);
-
-    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
-    {
-        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
-        double rounded = x + magic;
-        rounded -= magic;
-        x = copysign( rounded, x );
-    }
-
-    return x;
-}
-
-long double rintl(long double x)
-{
-    double absx = fabs(x);
-
-    if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
-    {
-        long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
-        long double rounded = x + magic;
-        rounded -= magic;
-        x = copysignl( rounded, x );
-    }
-
-    return x;
-}
-
-
-///////////////////////////////////////////////////////////////////
-//
-//                   ilogb, ilogbf, ilogbl
-//
-///////////////////////////////////////////////////////////////////
-#ifndef FP_ILOGB0
-    #define FP_ILOGB0   INT_MIN
-#endif
-
-#ifndef FP_ILOGBNAN
-    #define FP_ILOGBNAN INT_MIN
-#endif
-
-int ilogb (double x)
-{
-    union{ double f; cl_ulong u;} u;
-    u.f = x;
-
-    cl_ulong absx = u.u & CL_LONG_MAX;
-    if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
-    {
-        switch( absx )
-        {
-            case 0:
-                return FP_ILOGB0;
-            case 0x7ff0000000000000ULL:
-                return INT_MAX;
-            default:
-                if( absx > 0x7ff0000000000000ULL )
-                    return FP_ILOGBNAN;
-
-                // subnormal
-                u.u = absx | 0x3ff0000000000000ULL;
-                u.f -= 1.0;
-                return (u.u >> 52) - (1023 + 1022);
-        }
-    }
-
-    return (absx >> 52) - 1023;
-}
-
-
-int ilogbf (float x)
-{
-    union{ float f; cl_uint u;} u;
-    u.f = x;
-
-    cl_uint absx = u.u & 0x7fffffff;
-    if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
-    {
-        switch( absx )
-        {
-            case 0:
-                return FP_ILOGB0;
-            case 0x7f800000U:
-                return INT_MAX;
-            default:
-                if( absx > 0x7f800000 )
-                    return FP_ILOGBNAN;
-
-                // subnormal
-                u.u = absx | 0x3f800000U;
-                u.f -= 1.0f;
-                return (u.u >> 23) - (127 + 126);
-        }
-    }
-
-    return (absx >> 23) - 127;
-}
-
-int ilogbl (long double x)
-{
-    union
-    {
-        long double f;
-        struct{ cl_ulong m; cl_ushort sexp; }u;
-    } u;
-    u.f = x;
-
-    int exp = u.u.sexp & 0x7fff;
-    if( 0 == exp )
-    {
-        if( 0 == u.u.m )
-            return FP_ILOGB0;
-
-        //subnormal
-        u.u.sexp = 0x3fff;
-        u.f -= 1.0f;
-        exp = u.u.sexp & 0x7fff;
-
-        return exp - (0x3fff + 0x3ffe);
-    }
-    else if( 0x7fff == exp )
-    {
-        if( u.u.m & CL_LONG_MAX )
-            return FP_ILOGBNAN;
-
-        return INT_MAX;
-    }
-
-    return exp - 0x3fff;
-}
-
-
-
-///////////////////////////////////////////////////////////////////
-//
-//                 fmax, fmin, fmaxf, fminf
-//
-///////////////////////////////////////////////////////////////////
-
-static void GET_BITS_SP32(float fx, unsigned int* ux)
-{
-    volatile union {float f; unsigned int u;} _bitsy;
-    _bitsy.f = (fx);
-    *ux = _bitsy.u;
-}
-/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
-/* { */
-/*     volatile union {float f; unsigned int i;} _bitsy; */
-/*     _bitsy.f = (fx); */
-/*     *ux = _bitsy.i; */
-/* } */
-static void PUT_BITS_SP32(unsigned int ux, float* fx)
-{
-    volatile union {float f; unsigned int u;} _bitsy;
-    _bitsy.u = (ux);
-    *fx = _bitsy.f;
-}
-/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
-/* { */
-/*     volatile union {float f; unsigned int i;} _bitsy; */
-/*     _bitsy.i = (ux); */
-/*     *fx = _bitsy.f; */
-/* } */
-static void GET_BITS_DP64(double dx, unsigned __int64* lx)
-{
-    volatile union {double d; unsigned __int64 l;} _bitsy;
-    _bitsy.d = (dx);
-    *lx = _bitsy.l;
-}
-static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
-{
-    volatile union {double d; unsigned __int64 l;} _bitsy;
-    _bitsy.l = (lx);
-    *dx = _bitsy.d;
-}
-
-#if 0
-int SIGNBIT_DP64(double x )
-{
-    int hx;
-    _GET_HIGH_WORD(hx,x);
-    return((hx>>31));
-}
-#endif
-
-/* fmax(x, y) returns the larger (more positive) of x and y.
-   NaNs are treated as missing values: if one argument is NaN,
-   the other argument is returned. If both arguments are NaN,
-   the first argument is returned. */
-
-/* This works so long as the compiler knows that (x != x) means
-   that x is NaN; gcc does. */
-double fmax(double x, double y)
-{
-    if( isnan(y) )
-        return x;
-
-    return x >= y ? x : y;
-}
-
-
-/* fmin(x, y) returns the smaller (more negative) of x and y.
-   NaNs are treated as missing values: if one argument is NaN,
-   the other argument is returned. If both arguments are NaN,
-   the first argument is returned. */
-
-double fmin(double x, double y)
-{
-    if( isnan(y) )
-        return x;
-
-    return x <= y ? x : y;
-}
-
-
-float fmaxf( float x, float y )
-{
-    if( isnan(y) )
-        return x;
-
-    return x >= y ? x : y;
-}
-
-/* fminf(x, y) returns the smaller (more negative) of x and y.
-   NaNs are treated as missing values: if one argument is NaN,
-   the other argument is returned. If both arguments are NaN,
-   the first argument is returned. */
-
-float fminf(float x, float y)
-{
-    if( isnan(y) )
-        return x;
-
-    return x <= y ? x : y;
-}
-
-long double scalblnl(long double x, long n)
-{
-    union
-    {
-        long double d;
-        struct{ cl_ulong m; cl_ushort sexp;}u;
-    }u;
-    u.u.m = CL_LONG_MIN;
-
-    if( x == 0.0L || n < -2200)
-        return copysignl( 0.0L, x );
-
-    if( n > 2200 )
-        return INFINITY;
-
-    if( n < 0 )
-    {
-        u.u.sexp = 0x3fff - 1022;
-        while( n <= -1022 )
-        {
-            x *= u.d;
-            n += 1022;
-        }
-        u.u.sexp = 0x3fff + n;
-        x *= u.d;
-        return x;
-    }
-
-    if( n > 0 )
-    {
-        u.u.sexp = 0x3fff + 1023;
-        while( n >= 1023 )
-        {
-            x *= u.d;
-            n -= 1023;
-        }
-        u.u.sexp = 0x3fff + n;
-        x *= u.d;
-        return x;
-    }
-
-    return x;
-}
-
-///////////////////////////////////////////////////////////////////
-//
-//                          log2
-//
-///////////////////////////////////////////////////////////////////
-const static cl_double log_e_base2   = 1.4426950408889634074;
-const static cl_double log_10_base2  = 3.3219280948873623478;
-
-//double log10(double x);
-
-double log2(double x)
-{
-    return 1.44269504088896340735992468100189214 * log(x);
-}
-
-long double log2l(long double x)
-{
-    return 1.44269504088896340735992468100189214L * log(x);
-}
-
-double trunc(double x)
-{
-    double absx = fabs(x);
-
-    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
-    {
-        cl_long rounded = x;
-        x = copysign( (double) rounded, x );
-    }
-
-    return x;
-}
-
-float  truncf(float x)
-{
-    float absx = fabsf(x);
-
-    if( absx < 8388608.0f /* 0x1.0p23f */ )
-    {
-        cl_int rounded = x;
-        x = copysignf( (float) rounded, x );
-    }
-
-    return x;
-}
-
-long lround(double x)
-{
-    double absx = fabs(x);
-
-    if( absx < 0.5 )
-        return 0;
-
-    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
-    {
-        absx += 0.5;
-        cl_long rounded = absx;
-        absx = rounded;
-        x = copysign( absx, x );
-    }
-
-    if( x >= (double) LONG_MAX )
-        return LONG_MAX;
-
-    return (long) x;
-}
-
-long lroundf(float x)
-{
-    float absx = fabsf(x);
-
-    if( absx < 0.5f )
-        return 0;
-
-    if( absx < 8388608.0f )
-    {
-        absx += 0.5f;
-        cl_int rounded = absx;
-        absx = rounded;
-        x = copysignf(  absx, x );
-    }
-
-    if( x >= (float) LONG_MAX )
-        return LONG_MAX;
-
-    return (long) x;
-}
-
-double round(double x)
-{
-    double absx = fabs(x);
-
-    if( absx < 0.5 )
-        return copysign( 0.0, x);
-
-    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
-    {
-        absx += 0.5;
-        cl_long rounded = absx;
-        absx = rounded;
-        x = copysign( absx, x );
-    }
-
-    return x;
-}
-
-float  roundf(float x)
-{
-    float absx = fabsf(x);
-
-    if( absx < 0.5f )
-        return copysignf( 0.0f, x);
-
-    if( absx < 8388608.0f )
-    {
-        absx += 0.5f;
-        cl_int rounded = absx;
-        absx = rounded;
-        x = copysignf( absx, x );
-    }
-
-    return x;
-}
-
-long double roundl(long double x)
-{
-    long double absx = fabsl(x);
-
-    if( absx < 0.5L )
-        return copysignl( 0.0L, x);
-
-    if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
-    {
-        absx += 0.5L;
-        cl_ulong rounded = absx;
-        absx = rounded;
-        x = copysignl( absx, x );
-    }
-
-    return x;
-}
-
-float cbrtf( float x )
-{
-    float z = pow( fabs((double) x), 1.0 / 3.0 );
-    return copysignf( z, x );
-}
-
-double cbrt( double x )
-{
-    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
-}
-
-long int lrint (double x)
-{
-    double absx = fabs(x);
-
-    if( x >= (double) LONG_MAX )
-        return LONG_MAX;
-
-    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
-    {
-        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
-        double rounded = x + magic;
-        rounded -= magic;
-        return (long int) rounded;
-    }
-
-    return (long int) x;
-}
-
-long int lrintf (float x)
-{
-    float absx = fabsf(x);
-
-    if( x >= (float) LONG_MAX )
-        return LONG_MAX;
-
-    if( absx < 8388608.0f /* 0x1.0p23f */ )
-    {
-        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
-        float rounded = x + magic;
-        rounded -= magic;
-        return (long int) rounded;
-    }
-
-    return (long int) x;
-}
-
-
-///////////////////////////////////////////////////////////////////
-//
-//                  fenv functions
-//
-///////////////////////////////////////////////////////////////////
-#if _MSC_VER < 1900
-int fetestexcept(int excepts)
-{
-    unsigned int status = _statusfp();
-    return excepts & (
-        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
-        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
-        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
-        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
-        ((status & _SW_INVALID) ? FE_INVALID : 0)
-    );
-}
-
-int feclearexcept(int excepts)
-{
-    _clearfp();
-    return 0;
-}
-#endif
-
-#endif // __INTEL_COMPILER
-
-#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
-
-float make_nan()
-{
-/* This is the IEEE 754 single-precision format:
-    unsigned int mantissa:  22;
-    unsigned int quiet_nan:  1;
-    unsigned int exponent:   8;
-    unsigned int negative:   1;
-*/
-     //const static unsigned
-     static const int32_t _nan = 0x7fc00000;
-     return *(const float*)(&_nan);
-}
-
-float nanf( const char* str)
-{
-    cl_uint u = atoi( str );
-    u |= 0x7fc00000U;
-    return *( float*)(&u);
-}
-
-
-double nan( const char* str)
-{
-    cl_ulong u = atoi( str );
-    u |= 0x7ff8000000000000ULL;
-    return *( double*)(&u);
-}
-
-// double check this implementatation
-long double nanl( const char* str)
-{
-    union
-    {
-        long double f;
-        struct { cl_ulong m; cl_ushort sexp; }u;
-    }u;
-    u.u.sexp = 0x7fff;
-    u.u.m = 0x8000000000000000ULL | atoi( str );
-
-    return u.f;
-}
-
-#endif
-
-///////////////////////////////////////////////////////////////////
-//
-//                  misc functions
-//
-///////////////////////////////////////////////////////////////////
-
-/*
-// This function is commented out because the Windows implementation should never call munmap.
-// If it is calling it, we have a bug. Please file a bugzilla.
-int munmap(void *addr, size_t len)
-{
-// FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
-
-    return (int)VirtualAlloc( (LPVOID)addr, len,
-                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
-}
-*/
-
-uint64_t ReadTime( void )
-{
-    LARGE_INTEGER current;
-    QueryPerformanceCounter(&current);
-    return (uint64_t)current.QuadPart;
-}
-
-double SubtractTime( uint64_t endTime, uint64_t startTime )
-{
-    static double PerformanceFrequency = 0.0;
-
-    if (PerformanceFrequency == 0.0) {
-        LARGE_INTEGER frequency;
-        QueryPerformanceFrequency(&frequency);
-        PerformanceFrequency = (double) frequency.QuadPart;
-    }
-
-    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
-}
-
-int cf_signbit(double x)
-{
-    union
-    {
-        double f;
-        cl_ulong u;
-    }u;
-    u.f = x;
-    return u.u >> 63;
-}
-
-int cf_signbitf(float x)
-{
-    union
-    {
-        float f;
-        cl_uint u;
-    }u;
-    u.f = x;
-    return u.u >> 31;
-}
-
-float int2float (int32_t ix)
-{
-    union {
-        float   f;
-        int32_t i;
-    } u;
-    u.i = ix;
-    return u.f;
-}
-
-int32_t float2int (float   fx)
-{
-    union {
-        float   f;
-        int32_t i;
-    } u;
-    u.f = fx;
-    return u.i;
-}
-
-#if !defined(_WIN64)
-/** Returns the number of leading 0-bits in x,
-    starting at the most significant bit position.
-    If x is 0, the result is undefined.
-*/
-int __builtin_clz(unsigned int pattern)
-{
-#if 0
-    int res;
-    __asm {
-        mov eax, pattern
-        bsr eax, eax
-        mov res, eax
-    }
-    return 31 - res;
-#endif
-    unsigned long index;
-    unsigned char res = _BitScanReverse( &index, pattern);
-    if (res) {
-        return 8*sizeof(int) - 1 - index;
-    } else {
-        return 8*sizeof(int);
-    }
-}
-#else
-int __builtin_clz(unsigned int pattern)
-{
-   int count;
-   if (pattern == 0u) {
-       return 32;
-   }
-   count = 31;
-   if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
-   if (pattern >=  1u<<8) { pattern >>=  8; count -=  8; }
-   if (pattern >=  1u<<4) { pattern >>=  4; count -=  4; }
-   if (pattern >=  1u<<2) { pattern >>=  2; count -=  2; }
-   if (pattern >=  1u<<1) {                 count -=  1; }
-   return count;
-}
-
-#endif // !defined(_WIN64)
-
-#include <intrin.h>
-#include <emmintrin.h>
-
-int usleep(int usec)
-{
-    Sleep((usec + 999) / 1000);
-    return 0;
-}
-
-unsigned int sleep( unsigned int sec )
-{
-    Sleep( sec * 1000 );
-    return 0;
-}
-
-#endif // defined( _MSC_VER )
--- a/test_conformance/compatibility/test_common/harness/mt19937.c
+++ b/test_conformance/compatibility/test_common/harness/mt19937.c
@@ -1,274 +0,0 @@
-/*
-   A C-program for MT19937, with initialization improved 2002/1/26.
-   Coded by Takuji Nishimura and Makoto Matsumoto.
-
-   Before using, initialize the state by using init_genrand(seed)
-   or init_by_array(init_key, key_length).
-
-   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions
-   are met:
-
-     1. Redistributions of source code must retain the above copyright
-        notice, this list of conditions and the following disclaimer.
-
-     2. Redistributions in binary form must reproduce the above copyright
-        notice, this list of conditions and the following disclaimer in the
-        documentation and/or other materials provided with the distribution.
-
-     3. The names of its contributors may not be used to endorse or promote
-        products derived from this software without specific prior written
-        permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-   Any feedback is very welcome.
-   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
-   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
-
-   Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
-
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "mt19937.h"
-#include "mingw_compat.h"
-
-#ifdef __SSE2__
-    #include <emmintrin.h>
-#endif
-
-static void * align_malloc(size_t size, size_t alignment)
-{
-#if defined(_WIN32) && defined(_MSC_VER)
-    return _aligned_malloc(size, alignment);
-#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
-    void * ptr = NULL;
-    if (0 == posix_memalign(&ptr, alignment, size))
-        return ptr;
-    return NULL;
-#elif defined(__MINGW32__)
-    return __mingw_aligned_malloc(size, alignment);
-#else
-    #error "Please add support OS for aligned malloc"
-#endif
-}
-
-static void   align_free(void * ptr)
-{
-#if defined(_WIN32) && defined(_MSC_VER)
-    _aligned_free(ptr);
-#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
-    return  free(ptr);
-#elif defined(__MINGW32__)
-    return __mingw_aligned_free(ptr);
-#else
-    #error "Please add support OS for aligned free"
-#endif
-}
-
-
-/* Period parameters */
-#define N 624   /* vector code requires multiple of 4 here */
-#define M 397
-#define MATRIX_A    (cl_uint) 0x9908b0dfUL   /* constant vector a */
-#define UPPER_MASK  (cl_uint) 0x80000000UL /* most significant w-r bits */
-#define LOWER_MASK  (cl_uint) 0x7fffffffUL /* least significant r bits */
-
-typedef struct _MTdata
-{
-    cl_uint mt[N];
-#ifdef __SSE2__
-    cl_uint cache[N];
-#endif
-    cl_int  mti;
-}_MTdata;
-
-/* initializes mt[N] with a seed */
-MTdata init_genrand(cl_uint s)
-{
-    MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
-    if( NULL != r )
-    {
-        cl_uint *mt = r->mt;
-        int mti = 0;
-        mt[0]= s; // & 0xffffffffUL;
-        for (mti=1; mti<N; mti++) {
-            mt[mti] = (cl_uint)
-            (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
-            /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
-            /* In the previous versions, MSBs of the seed affect   */
-            /* only MSBs of the array mt[].                        */
-            /* 2002/01/09 modified by Makoto Matsumoto             */
-    //        mt[mti] &= 0xffffffffUL;
-            /* for >32 bit machines */
-        }
-        r->mti = mti;
-    }
-
-    return r;
-}
-
-void    free_mtdata( MTdata d )
-{
-    if(d)
-        align_free(d);
-}
-
-/* generates a random number on [0,0xffffffff]-interval */
-cl_uint genrand_int32( MTdata d)
-{
-    /* mag01[x] = x * MATRIX_A  for x=0,1 */
-    static const cl_uint mag01[2]={0x0UL, MATRIX_A};
-#ifdef __SSE2__
-    static volatile int init = 0;
-    static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
-#endif
-
-
-    cl_uint *mt = d->mt;
-    cl_uint y;
-
-    if (d->mti == N)
-    { /* generate N words at one time */
-        int kk;
-
-#ifdef __SSE2__
-        if( 0 == init )
-        {
-            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
-            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
-            one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
-            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
-            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
-            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
-            init = 1;
-        }
-#endif
-
-        kk = 0;
-#ifdef __SSE2__
-        // vector loop
-        for( ; kk + 4 <= N-M; kk += 4 )
-        {
-            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
-                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
-
-            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
-            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
-            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) );    // mt[kk+M] ^ (y >> 1)
-            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
-            _mm_store_si128( (__m128i*) (mt + kk ), vr );
-        }
-#endif
-        for ( ;kk<N-M;kk++) {
-            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
-            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
-        }
-
-#ifdef __SSE2__
-        // advance to next aligned location
-        for (;kk<N-1 && (kk & 3);kk++) {
-            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
-            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
-        }
-
-        // vector loop
-        for( ; kk + 4 <= N-1; kk += 4 )
-        {
-            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
-                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
-
-            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
-            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
-            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) );          // mt[kk+M-N] ^ (y >> 1)
-            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
-            _mm_store_si128( (__m128i*) (mt + kk ), vr );
-        }
-#endif
-
-        for (;kk<N-1;kk++) {
-            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
-            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
-        }
-        y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
-        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
-
-#ifdef __SSE2__
-        // Do the tempering ahead of time in vector code
-        for( kk = 0; kk + 4 <= N; kk += 4 )
-        {
-            __m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) );                            // y = mt[k];
-            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) );                             // y ^= (y >> 11);
-            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) );        // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
-            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) );       // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
-            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) );                             // y ^= (y >> 18);
-            _mm_store_si128( (__m128i*)(d->cache+kk), vy );
-        }
-#endif
-
-        d->mti = 0;
-    }
-#ifdef __SSE2__
-    y = d->cache[d->mti++];
-#else
-    y = mt[d->mti++];
-
-    /* Tempering */
-    y ^= (y >> 11);
-    y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
-    y ^= (y << 15) & (cl_uint) 0xefc60000UL;
-    y ^= (y >> 18);
-#endif
-
-
-    return y;
-}
-
-cl_ulong genrand_int64( MTdata d)
-{
-    return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
-}
-
-/* generates a random number on [0,1]-real-interval */
-double genrand_real1(MTdata d)
-{
-    return genrand_int32(d)*(1.0/4294967295.0);
-    /* divided by 2^32-1 */
-}
-
-/* generates a random number on [0,1)-real-interval */
-double genrand_real2(MTdata d)
-{
-    return genrand_int32(d)*(1.0/4294967296.0);
-    /* divided by 2^32 */
-}
-
-/* generates a random number on (0,1)-real-interval */
-double genrand_real3(MTdata d)
-{
-    return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
-    /* divided by 2^32 */
-}
-
-/* generates a random number on [0,1) with 53-bit resolution*/
-double genrand_res53(MTdata d)
-{
-    unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
-    return(a*67108864.0+b)*(1.0/9007199254740992.0);
-}
--- a/test_conformance/compatibility/test_common/harness/rounding_mode.c
+++ b/test_conformance/compatibility/test_common/harness/rounding_mode.c
@@ -1,175 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "rounding_mode.h"
-
-#if !(defined(_WIN32) && defined(_MSC_VER))
-RoundingMode set_round( RoundingMode r, Type outType )
-{
-    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
-    static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
-    const int *p = int_rounds;
-    if( outType == kfloat || outType == kdouble )
-        p = flt_rounds;
-    int oldRound = fegetround();
-    fesetround( p[r] );
-
-    switch( oldRound )
-    {
-        case FE_TONEAREST:
-            return kRoundToNearestEven;
-        case FE_UPWARD:
-            return kRoundUp;
-        case FE_DOWNWARD:
-            return kRoundDown;
-        case FE_TOWARDZERO:
-            return kRoundTowardZero;
-        default:
-            abort();    // ??!
-    }
-    return kDefaultRoundingMode;    //never happens
-}
-
-RoundingMode get_round( void )
-{
-    int oldRound = fegetround();
-
-    switch( oldRound )
-    {
-        case FE_TONEAREST:
-            return kRoundToNearestEven;
-        case FE_UPWARD:
-            return kRoundUp;
-        case FE_DOWNWARD:
-            return kRoundDown;
-        case FE_TOWARDZERO:
-            return kRoundTowardZero;
-    }
-
-    return kDefaultRoundingMode;
-}
-
-#else
-RoundingMode set_round( RoundingMode r, Type outType )
-{
-    static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
-    static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
-    const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
-    unsigned int oldRound;
-
-    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
-    if (err) {
-        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
-        return kDefaultRoundingMode;    //what else never happens
-    }
-
-    oldRound &= _MCW_RC;
-
-    RoundingMode old =
-        (oldRound == _RC_NEAR)? kRoundToNearestEven :
-        (oldRound == _RC_UP)?   kRoundUp :
-        (oldRound == _RC_DOWN)? kRoundDown :
-        (oldRound == _RC_CHOP)? kRoundTowardZero:
-        kDefaultRoundingMode;
-
-    _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
-    return old;    //returning old rounding mode
-}
-
-RoundingMode get_round( void )
-{
-    unsigned int oldRound;
-
-    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
-    oldRound &= _MCW_RC;
-    return
-        (oldRound == _RC_NEAR)? kRoundToNearestEven :
-        (oldRound == _RC_UP)?   kRoundUp :
-        (oldRound == _RC_DOWN)? kRoundDown :
-        (oldRound == _RC_CHOP)? kRoundTowardZero:
-        kDefaultRoundingMode;
-}
-
-#endif
-
-//
-// FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
-// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
-// software by testing against FLT_MIN or DBL_MIN in that file.
-//
-// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of
-// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
-// operators do (e.g. add, subtract, multiply, divide, etc.)
-//
-// Configuring hardware to FTZ mode varies by platform.
-// CAUTION: Some C implementations may also fail to behave properly in this mode.
-//
-//  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
-//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
-//          is used for floating point computation! If your OS uses x87, you'll need to figure out how
-//          to turn that off for the conversions code in basic_test_conversions.c so that they flush to
-//          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
-//          in which case, these function are at liberty to do nothing.
-//
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
-    #include <xmmintrin.h>
-#elif defined( __PPC__ )
-    #include <fpu_control.h>
-#endif
-void *FlushToZero( void )
-{
-#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
-    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
-        union{ int i;  void *p; }u = { _mm_getcsr() };
-        _mm_setcsr( u.i | 0x8040 );
-        return u.p;
-    #elif defined( __arm__ ) || defined(__aarch64__)
-        // processor is already in FTZ mode -- do nothing
-        return NULL;
-    #elif defined( __PPC__ )
-        fpu_control_t flags = 0;
-        _FPU_GETCW(flags);
-        flags |= _FPU_MASK_NI;
-        _FPU_SETCW(flags);
-        return NULL;
-        #else
-        #error Unknown arch
-    #endif
-#else
-    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
-#endif
-}
-
-// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
-void UnFlushToZero( void *p)
-{
-#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
-    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
-        union{ void *p; int i;  }u = { p };
-        _mm_setcsr( u.i );
-    #elif defined( __arm__ ) || defined(__aarch64__)
-        // processor is already in FTZ mode -- do nothing
-    #elif defined( __PPC__)
-        fpu_control_t flags = 0;
-        _FPU_GETCW(flags);
-        flags &= ~_FPU_MASK_NI;
-        _FPU_SETCW(flags);
-        #else
-        #error Unknown arch
-    #endif
-#else
-    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
-#endif
-}
--- a/test_conformance/compatibility/test_common/harness/rounding_mode.h
+++ b/test_conformance/compatibility/test_common/harness/rounding_mode.h
@@ -1,71 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef __ROUNDING_MODE_H__
-#define __ROUNDING_MODE_H__
-
-#include "compat.h"
-
-#include <stdlib.h>
-
-#if (defined(_WIN32) && defined (_MSC_VER))
-#include "errorHelpers.h"
-#include "testHarness.h"
-#endif
-
-typedef enum
-{
-    kDefaultRoundingMode = 0,
-    kRoundToNearestEven,
-    kRoundUp,
-    kRoundDown,
-    kRoundTowardZero,
-
-    kRoundingModeCount
-}RoundingMode;
-
-typedef enum
-{
-    kuchar = 0,
-    kchar = 1,
-    kushort = 2,
-    kshort = 3,
-    kuint = 4,
-    kint = 5,
-    kfloat = 6,
-    kdouble = 7,
-    kulong = 8,
-    klong = 9,
-
-    //This goes last
-    kTypeCount
-}Type;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern RoundingMode set_round( RoundingMode r, Type outType );
-extern RoundingMode get_round( void );
-extern void *FlushToZero( void );
-extern void UnFlushToZero( void *p);
-
-#ifdef __cplusplus
-}
-#endif
-
-
-
-#endif /* __ROUNDING_MODE_H__ */
--- a/test_conformance/compatibility/test_common/harness/threadTesting.c
+++ b/test_conformance/compatibility/test_common/harness/threadTesting.c
@@ -1,106 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "threadTesting.h"
-#include "errorHelpers.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <pthread.h>
-#endif
-
-#if 0 // Disabed for now
-
-typedef struct
-{
-    basefn            mFunction;
-    cl_device_id    mDevice;
-    cl_context        mContext;
-    int                mNumElements;
-} TestFnArgs;
-
-////////////////////////////////////////////////////////////////////////////////
-// Thread-based testing. Spawns a new thread to run the given test function,
-// then waits for it to complete. The entire idea is that, if the thread crashes,
-// we can catch it and report it as a failure instead of crashing the entire suite
-////////////////////////////////////////////////////////////////////////////////
-
-void *test_thread_wrapper( void *data )
-{
-    TestFnArgs *args;
-    int retVal;
-    cl_context context;
-
-    args = (TestFnArgs *)data;
-
-    /* Create a new context to use (contexts can't cross threads) */
-    context = clCreateContext(NULL, args->mDeviceGroup);
-    if( context == NULL )
-    {
-        log_error("clCreateContext failed for new thread\n");
-        return (void *)(-1);
-    }
-
-    /* Call function */
-    retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
-
-    clReleaseContext( context );
-
-    return (void *)retVal;
-}
-
-int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int error;
-    pthread_t threadHdl;
-    void *retVal;
-    TestFnArgs args;
-
-
-    args.mFunction = fnToTest;
-    args.mDeviceGroup = deviceGroup;
-    args.mDevice = device;
-    args.mContext = context;
-    args.mNumElements = numElements;
-
-
-    error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
-    if( error != 0 )
-    {
-        log_error( "ERROR: Unable to create thread for testing!\n" );
-        return -1;
-    }
-
-    /* Thread has been started, now just wait for it to complete (or crash) */
-    error = pthread_join( threadHdl, &retVal );
-    if( error != 0 )
-    {
-        log_error( "ERROR: Unable to join testing thread!\n" );
-        return -1;
-    }
-
-    return (int)((intptr_t)retVal);
-}
-#endif
-
-
--- a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
@@ -22,13 +22,13 @@ set(${MODULE_NAME}_SOURCES
         test_kernel_arg_info.c
         test_queue_properties.cpp
         ../../test_common/harness/errorHelpers.c
-         ../../test_common/harness/threadTesting.c
+         ../../../../test_common/harness/threadTesting.c
         ../../test_common/harness/testHarness.c
         ../../test_common/harness/kernelHelpers.c
         ../../../../test_common/harness/typeWrappers.cpp
         ../../../../test_common/harness/conversions.c
-         ../../test_common/harness/mt19937.c
-         ../../test_common/harness/msvc9.c
+         ../../../../test_common/harness/mt19937.c
+         ../../../../test_common/harness/msvc9.c
         ../../test_common/harness/imageHelpers.cpp
 )

--- a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt
@@ -51,15 +51,15 @@ set(${MODULE_NAME}_SOURCES
    test_kernel_call_kernel_function.cpp
    test_local_kernel_scope.cpp
    ../../test_common/harness/errorHelpers.c
-    ../../test_common/harness/threadTesting.c
+    ../../../../test_common/harness/threadTesting.c
    ../../test_common/harness/testHarness.c
    ../../test_common/harness/kernelHelpers.c
    ../../../../test_common/harness/typeWrappers.cpp
    ../../test_common/harness/imageHelpers.cpp
-    ../../test_common/harness/mt19937.c
+    ../../../../test_common/harness/mt19937.c
    ../../../../test_common/harness/conversions.c
-    ../../test_common/harness/rounding_mode.c
-    ../../test_common/harness/msvc9.c
+    ../../../../test_common/harness/rounding_mode.c
+    ../../../../test_common/harness/msvc9.c
 )

 include(../../../CMakeCommon.txt)