Initial open source release of OpenCL 2.1 CTS.

2026-03-24 15:59:03 +00:00 · 2017-05-16 18:48:39 +05:30
parent 6911ba5116
commit c3a61c6bdc
902 changed files with 319106 additions and 0 deletions
--- a/test_common/harness/Jamfile
+++ b/test_common/harness/Jamfile
@@ -0,0 +1,18 @@
+project
+    : requirements <include>.
+      <toolset>gcc:<cflags>"-xc++"
+      <toolset>msvc:<cflags>"/TP"
+      <warnings-as-errors>off
+    : usage-requirements <include>.
+    ;
+
+local harness.objs ;
+for source in [ glob *.c *.cpp ]
+{
+    harness.objs += [ obj $(source:B).obj : $(source) ] ;
+}
+
+alias harness : $(harness.objs)
+    : <use>/Runtime//OpenCL.lib : 
+    : <library>/Runtime//OpenCL.lib
+    ;
--- a/test_common/harness/Makefile
+++ b/test_common/harness/Makefile
@@ -0,0 +1,41 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = conversions.c \
+	errorHelpers.c \
+	genericThread.cpp \
+	imageHelpers.cpp \
+	kernelHelpers.c \
+	mt19937.c \
+	rounding_mode.c \
+	testHarness.c \
+	testHarness.cpp \
+	ThreadPool.c \
+	threadTesting.c \
+	typeWrappers.cpp
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+all: $(OBJECTS)
+
+clean:
+	rm -f $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
--- a/test_common/harness/ThreadPool.c
+++ b/test_common/harness/ThreadPool.c
@@ -0,0 +1,931 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "ThreadPool.h"
+#include "errorHelpers.h"
+#include "fpcontrol.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if  defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 )  // or any other POSIX system
+
+#if defined( _WIN32 )
+#include <windows.h>
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+#include "mingw_compat.h"
+#include <process.h>
+#else // !_WIN32
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/errno.h>
+#ifdef __linux__
+#include <sched.h>
+#endif
+#endif // !_WIN32
+
+// declarations
+#ifdef  _WIN32
+void ThreadPool_WorkerFunc( void *p );
+#else
+void *ThreadPool_WorkerFunc( void *p );
+#endif
+void ThreadPool_Init(void);
+void ThreadPool_Exit(void);
+
+#if defined (__MINGW32__)
+    // Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
+    CRITICAL_SECTION     gAtomicLock;
+#elif defined( __GNUC__ ) || defined( _MSC_VER)
+#else
+    pthread_mutex_t     gAtomicLock;
+#endif
+
+// Atomic add operator with mem barrier.  Mem barrier needed to protect state modified by the worker functions.
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
+{
+#if defined (__MINGW32__)
+    // No atomics on Mingw32
+    EnterCriticalSection(&gAtomicLock);
+    cl_int old = *a;
+    *a = old + b;
+    LeaveCriticalSection(&gAtomicLock);
+    return old;
+#elif defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    return __sync_fetch_and_add( a, b );
+    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether __sync_fetch_and_add does a synchronize
+#elif defined( _MSC_VER )
+    return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
+#else
+    #warning  Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
+    if( pthread_mutex_lock(&gAtomicLock) )
+        log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+    cl_int old = *a;
+    *a = old + b;
+    if( pthread_mutex_unlock(&gAtomicLock) )
+        log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
+    return old;
+#endif
+}
+
+#if defined( _WIN32 )
+// Uncomment the following line if Windows XP support is not required.
+// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
+
+#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
+#define _INIT_ONCE           INIT_ONCE
+#define _PINIT_ONCE          PINIT_ONCE
+#define _InitOnceExecuteOnce InitOnceExecuteOnce
+#else // !HAS_INIT_ONCE_EXECUTE_ONCE
+
+typedef volatile LONG _INIT_ONCE;
+typedef _INIT_ONCE *_PINIT_ONCE;
+typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
+
+#define _INIT_ONCE_UNINITIALIZED 0
+#define _INIT_ONCE_IN_PROGRESS   1
+#define _INIT_ONCE_DONE          2
+
+static BOOL _InitOnceExecuteOnce(
+  _PINIT_ONCE InitOnce,
+  _PINIT_ONCE_FN InitFn,
+  PVOID Parameter,
+  LPVOID *Context
+)
+{
+    while ( *InitOnce != _INIT_ONCE_DONE )
+    {
+        if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
+        {
+            InitFn( InitOnce, Parameter, Context );
+            *InitOnce = _INIT_ONCE_DONE;
+            return TRUE;
+        }
+        Sleep( 1 );
+    }
+    return TRUE;
+}
+#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
+
+// Uncomment the following line if Windows XP support is not required.
+// #define HAS_CONDITION_VARIABLE 1
+
+#if defined(HAS_CONDITION_VARIABLE)
+#define _CONDITION_VARIABLE          CONDITION_VARIABLE
+#define _InitializeConditionVariable InitializeConditionVariable
+#define _SleepConditionVariableCS    SleepConditionVariableCS
+#define _WakeAllConditionVariable    WakeAllConditionVariable
+#else // !HAS_CONDITION_VARIABLE
+typedef struct
+{
+    HANDLE           mEvent; // Used to park the thread.
+    CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
+    volatile cl_int  mWaiters; // Number of threads waiting on this cond var.
+    volatile cl_int  mGeneration; // Wait generation count.
+    volatile cl_int  mReleaseCount; // Number of releases to execute before reseting the event.
+} _CONDITION_VARIABLE;
+
+typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
+
+static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
+{
+    cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
+    InitializeCriticalSection( cond_var->mLock );
+    cond_var->mWaiters = 0;
+    cond_var->mGeneration = 0;
+#if !defined ( NDEBUG )
+    cond_var->mReleaseCount = 0;
+#endif // !NDEBUG
+}
+
+static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
+{
+    EnterCriticalSection( cond_var->mLock );
+    cl_int generation = cond_var->mGeneration;
+    ++cond_var->mWaiters;
+    LeaveCriticalSection( cond_var->mLock );
+    LeaveCriticalSection( cond_lock );
+
+    while ( TRUE )
+    {
+        WaitForSingleObject( cond_var->mEvent, INFINITE );
+        EnterCriticalSection( cond_var->mLock );
+        BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
+        LeaveCriticalSection( cond_var->mLock );
+        if ( done )
+        {
+            break;
+        }
+    }
+
+    EnterCriticalSection( cond_lock );
+    EnterCriticalSection( cond_var->mLock );
+    if ( --cond_var->mReleaseCount == 0 )
+    {
+        ResetEvent( cond_var->mEvent );
+    }
+    --cond_var->mWaiters;
+    LeaveCriticalSection( cond_var->mLock );
+}
+
+static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
+{
+    EnterCriticalSection( cond_var->mLock );
+    if (cond_var->mWaiters > 0 )
+    {
+        ++cond_var->mGeneration;
+        cond_var->mReleaseCount = cond_var->mWaiters;
+        SetEvent( cond_var->mEvent );
+    }
+    LeaveCriticalSection( cond_var->mLock );
+}
+#endif // !HAS_CONDITION_VARIABLE
+#endif // _WIN32
+
+#define MAX_COUNT   (1<<29)
+
+// Global state to coordinate whether the threads have been launched successfully or not
+#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
+static _INIT_ONCE threadpool_init_control;
+#elif defined (_WIN32)  // MingW of XP
+static int threadpool_init_control;
+#else // Posix platforms
+pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
+#endif
+cl_int threadPoolInitErr = -1;          // set to CL_SUCCESS on successful thread launch
+
+// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do at a time,
+// because we are too lazy to set up a queue here, and don't expect to need one.
+#if defined( _WIN32 )
+CRITICAL_SECTION    gThreadPoolLock[1];
+#else // !_WIN32
+pthread_mutex_t     gThreadPoolLock;
+#endif // !_WIN32
+
+// Condition variable to park ThreadPool threads when not working
+#if defined( _WIN32 )
+CRITICAL_SECTION    cond_lock[1];
+_CONDITION_VARIABLE cond_var[1];
+#else // !_WIN32
+pthread_mutex_t     cond_lock;
+pthread_cond_t      cond_var;
+#endif // !_WIN32
+volatile cl_int     gRunCount = 0;              // Condition variable state. How many iterations on the function left to run.
+                                                // set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
+
+// State that only changes when the threadpool is not working.
+volatile TPFuncPtr  gFunc_ptr = NULL;
+volatile void       *gUserInfo = NULL;
+volatile cl_int     gJobCount = 0;
+
+// State that may change while the thread pool is working
+volatile cl_int     jobError = CL_SUCCESS;      // err code return for the job as a whole
+
+// Condition variable to park caller while waiting
+#if defined( _WIN32 )
+HANDLE              caller_event;
+#else // !_WIN32
+pthread_mutex_t     caller_cond_lock;
+pthread_cond_t      caller_cond_var;
+#endif // !_WIN32
+volatile cl_int     gRunning = 0;       // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
+
+// The total number of threads launched.
+volatile cl_int     gThreadCount = 0;
+#ifdef _WIN32
+void ThreadPool_WorkerFunc( void *p )
+#else
+void *ThreadPool_WorkerFunc( void *p )
+#endif
+{
+    cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
+    cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+//    log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
+
+    while( MAX_COUNT > item )
+    {
+        cl_int err;
+
+        // check for more work to do
+        if( 0 >= item )
+        {
+//            log_info( "Thread %d has run out of work.\n", threadID );
+
+            // No work to do. Attempt to block waiting for work
+#if defined( _WIN32 )
+            EnterCriticalSection( cond_lock );
+#else // !_WIN32
+            if((err = pthread_mutex_lock( &cond_lock) ))
+            {
+                log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
+                goto exit;
+            }
+#endif // !_WIN32
+
+            cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
+//            log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
+            if( 1 == remaining )
+            { // last thread out signal the main thread to wake up
+#if defined( _WIN32 )
+                SetEvent( caller_event );
+#else // !_WIN32
+                if((err = pthread_mutex_lock( &caller_cond_lock) ))
+                {
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
+                    goto exit;
+                }
+                if( (err = pthread_cond_broadcast( &caller_cond_var )))
+                {
+                    log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
+                    goto exit;
+                }
+                if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+                {
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
+                    goto exit;
+                }
+#endif // !_WIN32
+            }
+
+            // loop in case we are woken only to discover that some other thread already did all the work
+            while( 0 >= item )
+            {
+#if defined( _WIN32 )
+                _SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
+#else // !_WIN32
+                if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
+                {
+                    log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                    pthread_mutex_unlock( &cond_lock);
+                    goto exit;
+                }
+#endif // !_WIN32
+
+                // try again to get a valid item id
+                item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+                if( MAX_COUNT <= item )  // exit if we are done
+                {
+#if defined( _WIN32 )
+                    LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+                    pthread_mutex_unlock( &cond_lock);
+#endif // !_WIN32
+                    goto exit;
+                }
+            }
+
+            ThreadPool_AtomicAdd( &gRunning, 1 );
+//            log_info( "Thread %d has found work.\n", threadID);
+
+#if defined( _WIN32 )
+            LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+            if((err = pthread_mutex_unlock( &cond_lock) ))
+            {
+                log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                goto exit;
+            }
+#endif // !_WIN32
+
+        }
+
+        // we have a valid item, so do the work
+        if( CL_SUCCESS == jobError )  // but only if we haven't already encountered an error
+        {
+//            log_info( "Thread %d doing job %d\n", threadID, item - 1);
+
+#if defined(__APPLE__) && defined(__arm__)
+            // On most platforms which support denorm, default is FTZ off. However,
+            // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+            // This creates issues in result verification. Since spec allows the implementation to either flush or
+            // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+            // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+            // where reference is being computed to make sure we get non-flushed reference result. If implementation
+            // returns flushed result, we correctly take care of that in verification code.
+            FPU_mode_type oldMode;
+            DisableFTZ( &oldMode );
+#endif
+
+            // Call the user's function with this item ID
+            err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
+#if defined(__APPLE__) && defined(__arm__)
+            // Restore FP state
+            RestoreFPState( &oldMode );
+#endif
+
+            if( err )
+            {
+#if (__MINGW32__)
+                EnterCriticalSection(&gAtomicLock);
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
+                gRunCount = 0;
+                LeaveCriticalSection(&gAtomicLock);
+#elif defined( __GNUC__ )
+                // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+                // set the new error if we are the first one there.
+                __sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
+
+                // drop run count to 0
+                gRunCount = 0;
+                __sync_synchronize();
+#elif defined( _MSC_VER )
+                // set the new error if we are the first one there.
+                _InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
+
+                // drop run count to 0
+                gRunCount = 0;
+                _mm_mfence();
+#else
+                if( pthread_mutex_lock(&gAtomicLock) )
+                    log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
+                gRunCount = 0;
+                if( pthread_mutex_unlock(&gAtomicLock) )
+                    log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
+#endif
+            }
+        }
+
+        // get the next item
+        item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+    }
+
+exit:
+    log_info( "ThreadPool: thread %d exiting.\n", threadID );
+    ThreadPool_AtomicAdd( &gThreadCount, -1 );
+#if !defined(_WIN32)
+    return NULL;
+#endif
+}
+
+// SetThreadCount() may be used to artifically set the number of worker threads
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
+//
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
+//
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined.
+void        SetThreadCount( int count )
+{
+    if( threadPoolInitErr == CL_SUCCESS )
+    {
+        log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
+        abort();
+    }
+
+    gThreadCount = count;
+}
+
+void ThreadPool_Init(void)
+{
+    cl_int i;
+    int err;
+    volatile cl_uint threadID = 0;
+
+    // Check for manual override of multithreading code. We add this for better debuggability.
+    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
+    {
+        gThreadCount = 1;
+        return;
+    }
+
+    // Figure out how many threads to run -- check first for non-zero to give the implementation the chance
+    if( 0 == gThreadCount )
+    {
+#if defined(_MSC_VER) || defined (__MINGW64__)
+        PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
+        DWORD length = 0;
+
+        GetLogicalProcessorInformation( NULL, &length );
+        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
+        if( buffer != NULL )
+        {
+            if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
+            {
+                PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
+                while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
+                {
+                    if( ptr->Relationship == RelationProcessorCore )
+                    {
+                        // Count the number of bits in ProcessorMask (number of logical cores)
+                        ULONG mask = ptr->ProcessorMask;
+                        while( mask )
+                        {
+                            ++gThreadCount;
+                            mask &= mask - 1; // Remove 1 bit at a time
+                        }
+                    }
+                    ++ptr;
+                }
+            }
+            free(buffer);
+        }
+#elif defined (__MINGW32__)
+        {
+            #warning  How about this, instead of hard coding it to 2?
+            SYSTEM_INFO sysinfo;
+            GetSystemInfo( &sysinfo );
+            gThreadCount = sysinfo.dwNumberOfProcessors;
+        }
+#elif defined (__linux__) && !defined(__ANDROID__)
+        cpu_set_t    affinity;
+        if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
+        {
+#if !(defined(CPU_COUNT))
+        gThreadCount = 1;
+#else
+            gThreadCount = CPU_COUNT(&affinity);
+#endif
+        }
+        else
+        {
+            gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
+        }
+#else // !_WIN32
+        gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
+#endif // !_WIN32
+
+        // Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
+        if( 1 == gThreadCount )
+            gThreadCount = 2;
+    }
+
+    // When working in 32 bit limit the thread number to 12
+    // This fix was made due to memory issues in integer_ops test
+    // When running integer_ops, the test opens as many threads as the
+    // machine has and each thread allocates a fixed amount of memory
+    // When running this test on dual socket machine in 32-bit, the
+    // process memory is not sufficient and the test fails
+    #if defined(_WIN32) && !defined(_M_X64)
+        if (gThreadCount > 12) {
+            gThreadCount = 12;
+        }
+    #endif
+
+    //Allow the app to set thread count to <0 for debugging purposes.  This will cause the test to run single threaded.
+    if( gThreadCount < 2 )
+    {
+        log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
+        gThreadCount = 1;
+        return;
+    }
+
+#if defined( _WIN32 )
+    InitializeCriticalSection( gThreadPoolLock );
+    InitializeCriticalSection( cond_lock );
+    _InitializeConditionVariable( cond_var );
+    caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
+#elif defined (__GNUC__)
+    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
+    // with some flavors of gcc compilers.
+    pthread_cond_init(&cond_var, NULL);
+    pthread_mutex_init(&cond_lock ,NULL);
+    pthread_cond_init(&caller_cond_var, NULL);
+    pthread_mutex_init(&caller_cond_lock, NULL);
+    pthread_mutex_init(&gThreadPoolLock, NULL);
+#endif
+
+#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
+    pthread_mutex_initialize(gAtomicLock);
+#elif defined (__MINGW32__)
+    InitializeCriticalSection(&gAtomicLock);
+#endif
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
+    //  That would cause a deadlock.
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+        gThreadCount = 1;
+        return;
+    }
+#endif // !_WIN32
+
+    gRunning = gThreadCount;
+    // init threads
+    for( i = 0; i < gThreadCount; i++ )
+    {
+#if defined( _WIN32 )
+        uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
+        err = ( handle == 0 );
+#else // !_WIN32
+        pthread_t tid = 0;
+        err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
+#endif // !_WIN32
+        if( err )
+        {
+            log_error( "Error %d launching thread %d\n", err, i );
+            threadPoolInitErr = err;
+            gThreadCount = i;
+            break;
+        }
+    }
+
+    atexit( ThreadPool_Exit );
+
+// block until they are done launching.
+    do
+    {
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
+#else // !_WIN32
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
+        {
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
+            return;
+        }
+#endif // !_WIN32
+    }
+    while( gRunCount != -gThreadCount );
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+        return;
+    }
+#endif // !_WIN32
+
+    threadPoolInitErr = CL_SUCCESS;
+}
+
+#if defined(_MSC_VER)
+static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
+{
+    ThreadPool_Init();
+    return TRUE;
+}
+#endif
+
+void ThreadPool_Exit(void)
+{
+    int err, count;
+    gRunCount = CL_INT_MAX;
+
+#if defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    __sync_synchronize();
+#elif defined( _MSC_VER )
+    _mm_mfence();
+#else
+    #warning   If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
+#endif
+
+    // spin waiting for threads to die
+    for (count = 0; 0 != gThreadCount && count < 1000; count++)
+    {
+#if defined( _WIN32 )
+        _WakeAllConditionVariable( cond_var );
+        Sleep(1);
+#else // !_WIN32
+        if( (err = pthread_cond_broadcast( &cond_var )))
+        {
+            log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
+            break;
+        }
+        usleep(1000);
+#endif // !_WIN32
+    }
+
+    if( gThreadCount )
+        log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
+    else
+        log_info( "Thread pool exited in a orderly fashion.\n" );
+}
+
+
+// Blocking API that farms out count jobs to a thread pool.
+// It may return with some work undone if func_ptr() returns a non-zero
+// result.
+//
+// This function obviously has its shortcommings. Only one call to ThreadPool_Do
+// can be running at a time. It is not intended for general purpose use.
+// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
+// all available then it would make more sense to use those features.
+cl_int ThreadPool_Do( TPFuncPtr func_ptr,
+                      cl_uint count,
+                      void *userInfo )
+{
+    cl_int newErr;
+    cl_int err = 0;
+    // Lazily set up our threads
+#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+    err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
+        ThreadPool_Init();
+        threadpool_init_control = 1;
+    }
+#else //posix platform
+    err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
+    {
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
+        return err;
+    }
+#endif
+    // Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
+    if( threadPoolInitErr )
+    {
+        cl_uint currentJob = 0;
+        cl_int  result = CL_SUCCESS;
+
+#if defined(__APPLE__) && defined(__arm__)
+        // On most platforms which support denorm, default is FTZ off. However,
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
+#endif
+        for( currentJob = 0; currentJob < count; currentJob++ )
+            if((result = func_ptr( currentJob, 0, userInfo )))
+            {
+#if defined(__APPLE__) && defined(__arm__)
+                // Restore FP state before leaving
+                RestoreFPState( &oldMode );
+#endif
+                return result;
+            }
+
+#if defined(__APPLE__) && defined(__arm__)
+        // Restore FP state before leaving
+        RestoreFPState( &oldMode );
+#endif
+
+        return CL_SUCCESS;
+    }
+
+    if( count >= MAX_COUNT )
+    {
+        log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
+        return -1;
+    }
+
+    // Enter critical region
+#if defined( _WIN32 )
+    EnterCriticalSection( gThreadPoolLock );
+#else // !_WIN32
+    if( (err = pthread_mutex_lock( &gThreadPoolLock )))
+    {
+        switch (err)
+        {
+            case EDEADLK:
+                log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
+                break;
+            case EINVAL:
+                log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
+                break;
+            default:
+                break;
+        }
+        return err;
+    }
+#endif // !_WIN32
+
+    // Start modifying the job state observable by worker threads
+#if defined( _WIN32 )
+    EnterCriticalSection( cond_lock );
+#else // !_WIN32
+    if((err = pthread_mutex_lock( &cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
+    //  That would cause a deadlock.
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    // Prime the worker threads to get going
+    jobError = CL_SUCCESS;
+    gRunCount = gJobCount = count;
+    gFunc_ptr = func_ptr;
+    gUserInfo = userInfo;
+
+#if defined( _WIN32 )
+    ResetEvent(caller_event);
+    _WakeAllConditionVariable( cond_var );
+    LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+    if( (err = pthread_cond_broadcast( &cond_var )))
+    {
+        log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+    if((err = pthread_mutex_unlock( &cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+// block until they are done.  It would be slightly more efficient to do some of the work here though.
+    do
+    {
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
+#else // !_WIN32
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
+        {
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
+            goto exit;
+        }
+#endif // !_WIN32
+    }
+    while( gRunning );
+#if !defined(_WIN32)
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    err = jobError;
+
+exit:
+    // exit critical region
+#if defined( _WIN32 )
+    LeaveCriticalSection( gThreadPoolLock );
+#else // !_WIN32
+    newErr = pthread_mutex_unlock( &gThreadPoolLock );
+    if( newErr)
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
+        return err;
+    }
+#endif // !_WIN32
+
+    return err;
+}
+
+cl_uint GetThreadCount( void )
+{
+    // Lazily set up our threads
+#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+    cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
+        ThreadPool_Init();
+        threadpool_init_control = 1;
+    }
+#else
+    cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
+    {
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
+        return err;
+    }
+#endif // !_WIN32
+
+    if( gThreadCount < 1 )
+        return 1;
+
+    return gThreadCount;
+}
+
+#else
+
+#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
+    #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
+#endif
+//
+// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
+// of OpenCL API, while also checking
+//
+// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
+// It is not okay to use this for conformance testing!!!
+//
+// Exception:  If your operating system does not support multithreaded execution of any kind, then you may use this code.
+//
+
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
+{
+    cl_uint r = *a;
+
+    // since this fallback code path is not multithreaded, we just do a regular add here
+    // If your operating system supports memory-barrier-atomics, use those here
+    *a = r + b;
+
+    return r;
+}
+
+// Blocking API that farms out count jobs to a thread pool.
+// It may return with some work undone if func_ptr() returns a non-zero
+// result.
+cl_int ThreadPool_Do(   TPFuncPtr func_ptr,
+                        cl_uint count,
+                        void *userInfo )
+{
+    cl_uint currentJob = 0;
+    cl_int  result = CL_SUCCESS;
+
+#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
+    // THIS FUNCTION IS NOT INTENDED FOR USE!!
+    log_error( "ERROR:  Test must be multithreaded!\n" );
+    exit(-1);
+#else
+    static int spewCount = 0;
+
+    if( 0 == spewCount )
+    {
+        log_info( "\nWARNING:  The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
+        spewCount = 1;
+    }
+#endif
+
+// The multithreaded code should mimic this behavior:
+    for( currentJob = 0; currentJob < count; currentJob++ )
+        if((result = func_ptr( currentJob, 0, userInfo )))
+            return result;
+
+    return CL_SUCCESS;
+}
+
+cl_uint GetThreadCount( void )
+{
+    return 1;
+}
+
+void SetThreadCount( int count )
+{
+    if( count > 1 )
+        log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
+}
+
+#endif
--- a/test_common/harness/ThreadPool.h
+++ b/test_common/harness/ThreadPool.h
@@ -0,0 +1,76 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef THREAD_POOL_H
+#define THREAD_POOL_H
+
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/cl.h>
+#endif
+
+#if defined(__cplusplus)
+    extern "C" {
+#endif
+
+//
+// An atomic add operator
+cl_int     ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b );    // returns old value
+
+// Your function prototype
+//
+// A function pointer to the function you want to execute in a multithreaded context.  No
+// synchronization primitives are provided, other than the atomic add above. You may not
+// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
+// work, however.
+//
+// job ids and thread ids are 0 based.  If number of jobs or threads was 8, they will numbered be 0 through 7.
+// Note that while every job will be run, it is not guaranteed that every thread will wake up before
+// the work is done.
+typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
+
+// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
+// Some workitems may not run if a non-zero result is returned from func_ptr().
+// This function may not be called from a TPFuncPtr.
+cl_int      ThreadPool_Do(  TPFuncPtr func_ptr,
+                            cl_uint count,
+                            void *userInfo );
+
+// Returns the number of worker threads that underlie the threadpool.  The value passed
+// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
+// This is safe to call from a TPFuncPtr.
+cl_uint     GetThreadCount( void );
+
+// SetThreadCount() may be used to artifically set the number of worker threads
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
+//
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
+//
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
+void        SetThreadCount( int count );
+
+#ifdef __cplusplus
+    }   /* extern "C" */
+#endif
+
+
+#endif  /* THREAD_POOL_H  */
--- a/test_common/harness/clImageHelper.h
+++ b/test_common/harness/clImageHelper.h
@@ -0,0 +1,290 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_clImageHelper_h
+#define test_conformance_clImageHelper_h
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <stdio.h>
+#include "errorHelpers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+  // helper function to replace clCreateImage2D , to make the existing code use
+  // the functions of version 1.2 and veriosn 1.1  respectively
+
+  static inline cl_mem create_image_2d  (cl_context context,
+                           cl_mem_flags flags,
+                           const cl_image_format *image_format,
+                           size_t image_width,
+                           size_t image_height,
+                           size_t image_row_pitch,
+                           void *host_ptr,
+                           cl_int *errcode_ret)
+  {
+    cl_mem mImage = NULL;
+
+#ifdef CL_VERSION_1_2
+    cl_image_desc image_desc_dest;
+    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
+    image_desc_dest.image_width = image_width;
+    image_desc_dest.image_height = image_height;
+    image_desc_dest.image_depth= 0;// not usedfor 2d
+    image_desc_dest.image_array_size = 0;// not used for 2d
+    image_desc_dest.image_row_pitch = image_row_pitch;
+    image_desc_dest.image_slice_pitch = 0;
+    image_desc_dest.num_mip_levels = 0;
+    image_desc_dest.num_samples = 0;
+    image_desc_dest.mem_object = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    }
+
+#else
+    mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
+    }
+#endif
+
+    return mImage;
+  }
+
+    // helper function to replace clCreateImage2D , to make the existing code use
+    // the functions of version 1.2 and veriosn 1.1  respectively
+
+    static inline cl_mem create_image_2d_buffer  (cl_context context,
+                                    cl_mem_flags flags,
+                                    const cl_image_format *image_format,
+                                    size_t image_width,
+                                    size_t image_height,
+                                    size_t image_row_pitch,
+                                    cl_mem buffer,
+                                    cl_int *errcode_ret)
+    {
+        cl_mem mImage = NULL;
+
+        cl_image_desc image_desc_dest;
+        image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
+        image_desc_dest.image_width = image_width;
+        image_desc_dest.image_height = image_height;
+        image_desc_dest.image_depth= 0;// not usedfor 2d
+        image_desc_dest.image_array_size = 0;// not used for 2d
+        image_desc_dest.image_row_pitch = image_row_pitch;
+        image_desc_dest.image_slice_pitch = 0;
+        image_desc_dest.num_mip_levels = 0;
+        image_desc_dest.num_samples = 0;
+        image_desc_dest.mem_object = buffer;
+        mImage = clCreateImage( context, flags, image_format, &image_desc_dest, NULL, errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+
+
+  static inline cl_mem create_image_3d (cl_context context,
+                          cl_mem_flags flags,
+                          const cl_image_format *image_format,
+                          size_t image_width,
+                          size_t image_height,
+                          size_t image_depth,
+                          size_t image_row_pitch,
+                          size_t image_slice_pitch,
+                          void *host_ptr,
+                          cl_int *errcode_ret)
+  {
+    cl_mem mImage;
+
+#ifdef CL_VERSION_1_2
+    cl_image_desc image_desc;
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+    image_desc.image_width = image_width;
+    image_desc.image_height = image_height;
+    image_desc.image_depth = image_depth;
+    image_desc.image_array_size = 0;// not used for one image
+    image_desc.image_row_pitch = image_row_pitch;
+    image_desc.image_slice_pitch = image_slice_pitch;
+    image_desc.num_mip_levels = 0;
+    image_desc.num_samples = 0;
+    image_desc.mem_object = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context,
+                           flags,
+                           image_format,
+                           &image_desc,
+                           host_ptr,
+                           errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    }
+
+#else
+    mImage = clCreateImage3D( context,
+                             flags, image_format,
+                             image_width,
+                             image_height,
+                             image_depth,
+                             image_row_pitch,
+                             image_slice_pitch,
+                             host_ptr,
+                             errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
+    }
+#endif
+
+    return mImage;
+  }
+
+    static inline cl_mem create_image_2d_array (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_height,
+                                   size_t image_array_size,
+                                   size_t image_row_pitch,
+                                   size_t image_slice_pitch,
+                                   void *host_ptr,
+                                   cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = image_height;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+    static inline cl_mem create_image_1d_array (cl_context context,
+                                         cl_mem_flags flags,
+                                         const cl_image_format *image_format,
+                                         size_t image_width,
+                                         size_t image_array_size,
+                                         size_t image_row_pitch,
+                                         size_t image_slice_pitch,
+                                         void *host_ptr,
+                                         cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+    static inline cl_mem create_image_1d (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_row_pitch,
+                                   void *host_ptr,
+                                   cl_mem buffer,
+                                   cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = 0;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = buffer;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -0,0 +1,382 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+#if defined(_WIN32) && defined (_MSC_VER)
+#include <Windows.h>
+#endif
+
+#ifdef __cplusplus
+    #define EXTERN_C extern "C"
+#else
+    #define EXTERN_C
+#endif
+
+
+//
+// stdlib.h
+//
+
+#include <stdlib.h>     // On Windows, _MAX_PATH defined there.
+
+// llabs appeared in MS C v16 (VS 10/2010).
+#if defined( _MSC_VER ) && _MSC_VER <= 1500
+    EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
+#endif
+
+
+//
+// stdbool.h
+//
+
+// stdbool.h appeared in MS C v18 (VS 12/2013).
+#if defined( _MSC_VER ) && MSC_VER <= 1700
+#if !defined(__cplusplus)
+typedef char bool;
+        #define true  1
+        #define false 0
+    #endif
+#else
+    #include <stdbool.h>
+#endif
+
+
+
+//
+// stdint.h
+//
+
+// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
+#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
+typedef unsigned char       uint8_t;
+typedef char                int8_t;
+typedef unsigned short      uint16_t;
+typedef short               int16_t;
+typedef unsigned int        uint32_t;
+typedef int                 int32_t;
+typedef unsigned long long  uint64_t;
+typedef long long           int64_t;
+#else
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+    #include <stdint.h>
+#endif
+
+
+
+//
+// float.h
+//
+
+#include <float.h>
+
+
+
+//
+// fenv.h
+//
+
+// fenv.h appeared in MS C v18 (VS 12/2013).
+#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
+    // reimplement fenv.h because windows doesn't have it
+    #define FE_INEXACT          0x0020
+    #define FE_UNDERFLOW        0x0010
+    #define FE_OVERFLOW         0x0008
+    #define FE_DIVBYZERO        0x0004
+    #define FE_INVALID          0x0001
+    #define FE_ALL_EXCEPT       0x003D
+    int fetestexcept(int excepts);
+    int feclearexcept(int excepts);
+#else
+    #include <fenv.h>
+#endif
+
+
+//
+// math.h
+//
+
+#if defined( __INTEL_COMPILER )
+    #include <mathimf.h>
+#else
+    #include <math.h>
+#endif
+
+#if defined( _MSC_VER )
+
+    #ifdef __cplusplus
+        extern "C" {
+    #endif
+
+#ifndef M_PI
+    #define M_PI    3.14159265358979323846264338327950288
+#endif
+
+    #if ! defined( __INTEL_COMPILER )
+
+        #ifndef NAN
+            #define NAN  (INFINITY - INFINITY)
+        #endif
+        #ifndef HUGE_VALF
+            #define HUGE_VALF (float)HUGE_VAL
+        #endif
+        #ifndef INFINITY
+            #define INFINITY    (FLT_MAX + FLT_MAX)
+        #endif
+        #ifndef isfinite
+            #define isfinite(x) _finite(x)
+        #endif
+        #ifndef isnan
+#define    isnan( x )       ((x) != (x))
+        #endif
+        #ifndef isinf
+#define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
+        #endif
+
+double rint( double x);
+float  rintf( float x);
+long double rintl( long double x);
+
+float cbrtf( float );
+double cbrt( double );
+
+int    ilogb( double x);
+int    ilogbf (float x);
+int    ilogbl(long double x);
+
+double fmax(double x, double y);
+double fmin(double x, double y);
+float  fmaxf( float x, float y );
+float  fminf(float x, float y);
+
+double      log2(double x);
+long double log2l(long double x);
+
+double      exp2(double x);
+long double exp2l(long double x);
+
+double      fdim(double x, double y);
+float       fdimf(float x, float y);
+long double fdiml(long double x, long double y);
+
+double      remquo( double x, double y, int *quo);
+float       remquof( float x, float y, int *quo);
+long double remquol( long double x, long double y, int *quo);
+
+long double scalblnl(long double x, long n);
+
+float hypotf(float x, float y);
+long double hypotl(long double x, long double y) ;
+double lgamma(double x);
+float  lgammaf(float x);
+
+double trunc(double x);
+float  truncf(float x);
+
+double log1p(double x);
+float  log1pf(float x);
+long double log1pl(long double x);
+
+double copysign(double x, double y);
+float  copysignf(float x, float y);
+long double copysignl(long double x, long double y);
+
+long lround(double x);
+long lroundf(float x);
+//long lroundl(long double x)
+
+double round(double x);
+float  roundf(float x);
+long double roundl(long double x);
+
+        int cf_signbit(double x);
+        int cf_signbitf(float x);
+
+        static int signbit(double x) { return  cf_signbit(x); }
+        static int signbitf(float x) { return cf_signbitf(x); }
+
+long int lrint (double flt);
+long int lrintf (float flt);
+
+float   int2float (int32_t ix);
+int32_t float2int (float   fx);
+
+    #endif
+
+    #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
+        // These functions appeared in Intel C v13.
+        float  nanf( const char* str);
+        double nan( const char* str);
+        long double nanl( const char* str);
+    #endif
+
+    #ifdef __cplusplus
+        }
+    #endif
+
+#endif
+
+#if defined( __ANDROID__ )
+    #define log2(X)  (log(X)/log(2))
+#endif
+
+
+
+//
+// stdio.h
+//
+
+#if defined( _MSC_VER )
+    #define snprintf   sprintf_s
+#endif
+
+
+
+//
+// unistd.h
+//
+
+#if defined( _MSC_VER )
+    EXTERN_C unsigned int sleep( unsigned int sec );
+    EXTERN_C int usleep( int usec );
+#endif
+
+
+
+//
+// syscall.h
+//
+
+#if defined( __ANDROID__ )
+    // Android bionic's isn't providing SYS_sysctl wrappers.
+    #define SYS__sysctl  __NR__sysctl
+#endif
+
+
+
+// Some tests use _malloca which defined in malloc.h.
+#if !defined (__APPLE__)
+#include <malloc.h>
+#endif
+
+
+//
+// ???
+//
+
+#if defined( _MSC_VER )
+
+    #define MAXPATHLEN _MAX_PATH
+
+    EXTERN_C uint64_t ReadTime( void );
+    EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
+
+/** Returns the number of leading 0-bits in x,
+    starting at the most significant bit position.
+    If x is 0, the result is undefined.
+*/
+    EXTERN_C int __builtin_clz(unsigned int pattern);
+
+#endif
+
+#ifndef MIN
+    #define MIN(x,y) (((x)<(y))?(x):(y))
+#endif
+#ifndef MAX
+    #define MAX(x,y) (((x)>(y))?(x):(y))
+#endif
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
+
+    This is a typical usage of the macros:
+
+        double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
+
+     (taken from math_brute_force/reference_math.c). There are two problems:
+
+        1.  There is an error here. On Windows in will produce incorrect result
+            `0x1.5555555555555p+50'. To have a correct result it should be written as
+            `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
+            third argument is not obvious -- sometimes it should be the same as exponent of the
+            first argument, but sometimes not.
+
+        2.  Information is duplicated. It is easy to make a mistake.
+
+    Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
+    ------------------------------------------------------------------------------------------------
+*/
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+
+    #define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
+    #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
+    #define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
+
+#else
+
+// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
+#define MAKE_HEX_FLOAT(x,y,z) x
+#define MAKE_HEX_DOUBLE(x,y,z) x
+#define MAKE_HEX_LONG(x,y,z) x
+
+#endif
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
+    double respectively. Arguments:
+
+        sm    -- sign of number,
+        int   -- integer part of mantissa (without `0x' prefix),
+        fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
+        se    -- sign of exponent,
+        exp   -- absolute value of (binary) exponent.
+
+    Example:
+
+        double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
+
+    Note:
+
+        We have to pass signs as separate arguments because gcc pass negative integer values
+        (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
+        `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
+        literal.
+    ------------------------------------------------------------------------------------------------
+*/
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    // If compiler does not support hex floating point literals:
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm ldexpf(       (float)( 0x ## int ## fract ## UL  ), se exp + ilogbf(       (float) 0x ## int ) - ilogbf(       ( float )( 0x ## int ## fract ## UL  ) ) )
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm ldexp(       (double)( 0x ## int ## fract ## ULL ), se exp + ilogb(       (double) 0x ## int ) - ilogb(       ( double )( 0x ## int ## fract ## ULL ) ) )
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
+#else
+    // If compiler supports hex floating point literals: just concatenate all the parts into a literal.
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
+#endif
+
+#if defined(__MINGW32__)
+    #include <Windows.h>
+    #define sleep(sec) Sleep((sec) * 1000)
+#endif
+
+#endif // _COMPAT_H_
--- a/test_common/harness/conversions.c
+++ b/test_common/harness/conversions.c
--- a/test_common/harness/conversions.h
+++ b/test_common/harness/conversions.h
@@ -0,0 +1,126 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _conversions_h
+#define _conversions_h
+
+#include "compat.h"
+
+#include "errorHelpers.h"
+#include "mt19937.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Note: the next three all have to match in size and order!! */
+
+enum ExplicitTypes
+{
+    kBool        = 0,
+    kChar,
+    kUChar,
+    kUnsignedChar,
+    kShort,
+    kUShort,
+    kUnsignedShort,
+    kInt,
+    kUInt,
+    kUnsignedInt,
+    kLong,
+    kULong,
+    kUnsignedLong,
+    kFloat,
+    kHalf,
+    kDouble,
+    kNumExplicitTypes
+};
+
+typedef enum ExplicitTypes    ExplicitType;
+
+enum RoundingTypes
+{
+    kRoundToEven = 0,
+    kRoundToZero,
+    kRoundToPosInf,
+    kRoundToNegInf,
+    kRoundToNearest,
+
+    kNumRoundingTypes,
+
+    kDefaultRoundingType = kRoundToNearest
+};
+
+typedef enum RoundingTypes    RoundingType;
+
+extern void             print_type_to_string(ExplicitType type, void *data, char* string);
+extern size_t           get_explicit_type_size( ExplicitType type );
+extern const char *     get_explicit_type_name( ExplicitType type );
+extern void             convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
+
+extern void             generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
+extern void    *         create_random_data( ExplicitType type, MTdata d, size_t count );
+
+extern cl_long          read_upscale_signed( void *inRaw, ExplicitType inType );
+extern cl_ulong         read_upscale_unsigned( void *inRaw, ExplicitType inType );
+extern float            read_as_float( void *inRaw, ExplicitType inType );
+
+extern float            get_random_float(float low, float high, MTdata d);
+extern double           get_random_double(double low, double high, MTdata d);
+extern float            any_float( MTdata d );
+extern double           any_double( MTdata d );
+
+extern int              random_in_range( int minV, int maxV, MTdata d );
+
+size_t get_random_size_t(size_t low, size_t high, MTdata d);
+
+// Note: though this takes a double, this is for use with single precision tests
+static inline int IsFloatSubnormal( float x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf(x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+static inline int IsDoubleSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs( x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // _conversions_h
+
+
--- a/test_common/harness/errorHelpers.c
+++ b/test_common/harness/errorHelpers.c
@@ -0,0 +1,813 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "errorHelpers.h"
+
+extern bool gOfflineCompiler;
+
+const char    *IGetErrorString( int clErrorCode )
+{
+    switch( clErrorCode )
+    {
+        case CL_SUCCESS:                return "CL_SUCCESS";
+        case CL_DEVICE_NOT_FOUND:        return "CL_DEVICE_NOT_FOUND";
+        case CL_DEVICE_NOT_AVAILABLE:    return "CL_DEVICE_NOT_AVAILABLE";
+        case CL_COMPILER_NOT_AVAILABLE:    return "CL_COMPILER_NOT_AVAILABLE";
+        case CL_MEM_OBJECT_ALLOCATION_FAILURE:    return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+        case CL_OUT_OF_RESOURCES:        return "CL_OUT_OF_RESOURCES";
+        case CL_OUT_OF_HOST_MEMORY:        return "CL_OUT_OF_HOST_MEMORY";
+        case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
+        case CL_MEM_COPY_OVERLAP:        return "CL_MEM_COPY_OVERLAP";
+        case CL_IMAGE_FORMAT_MISMATCH:    return "CL_IMAGE_FORMAT_MISMATCH";
+        case CL_IMAGE_FORMAT_NOT_SUPPORTED:    return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+        case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
+        case CL_MAP_FAILURE:            return "CL_MAP_FAILURE";
+        case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+        case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+        case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
+        case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
+        case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
+        case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
+        case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+        case CL_INVALID_VALUE:            return "CL_INVALID_VALUE";
+        case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
+        case CL_INVALID_DEVICE:            return "CL_INVALID_DEVICE";
+        case CL_INVALID_CONTEXT:        return "CL_INVALID_CONTEXT";
+        case CL_INVALID_QUEUE_PROPERTIES:    return "CL_INVALID_QUEUE_PROPERTIES";
+        case CL_INVALID_COMMAND_QUEUE:    return "CL_INVALID_COMMAND_QUEUE";
+        case CL_INVALID_HOST_PTR:    return "CL_INVALID_HOST_PTR";
+        case CL_INVALID_MEM_OBJECT:        return "CL_INVALID_MEM_OBJECT";
+        case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:        return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+        case CL_INVALID_IMAGE_SIZE:        return "CL_INVALID_IMAGE_SIZE";
+        case CL_INVALID_SAMPLER:        return "CL_INVALID_SAMPLER";
+        case CL_INVALID_BINARY:        return "CL_INVALID_BINARY";
+        case CL_INVALID_BUILD_OPTIONS:        return "CL_INVALID_BUILD_OPTIONS";
+        case CL_INVALID_PROGRAM:        return "CL_INVALID_PROGRAM";
+        case CL_INVALID_PROGRAM_EXECUTABLE:        return "CL_INVALID_PROGRAM_EXECUTABLE";
+        case CL_INVALID_KERNEL_NAME:    return "CL_INVALID_KERNEL_NAME";
+        case CL_INVALID_KERNEL_DEFINITION:    return "CL_INVALID_KERNEL_DEFINITION";
+        case CL_INVALID_KERNEL:            return "CL_INVALID_KERNEL";
+        case CL_INVALID_ARG_INDEX:        return "CL_INVALID_ARG_INDEX";
+        case CL_INVALID_ARG_VALUE:        return "CL_INVALID_ARG_VALUE";
+        case CL_INVALID_ARG_SIZE:        return "CL_INVALID_ARG_SIZE";
+        case CL_INVALID_KERNEL_ARGS:    return "CL_INVALID_KERNEL_ARGS";
+        case CL_INVALID_WORK_DIMENSION:        return "CL_INVALID_WORK_DIMENSION";
+        case CL_INVALID_WORK_GROUP_SIZE:    return "CL_INVALID_WORK_GROUP_SIZE";
+        case CL_INVALID_WORK_ITEM_SIZE:    return "CL_INVALID_WORK_ITEM_SIZE";
+        case CL_INVALID_GLOBAL_OFFSET:        return "CL_INVALID_GLOBAL_OFFSET";
+        case CL_INVALID_EVENT_WAIT_LIST:    return "CL_INVALID_EVENT_WAIT_LIST";
+        case CL_INVALID_EVENT:            return "CL_INVALID_EVENT";
+        case CL_INVALID_OPERATION:        return "CL_INVALID_OPERATION";
+        case CL_INVALID_GL_OBJECT:        return "CL_INVALID_GL_OBJECT";
+        case CL_INVALID_BUFFER_SIZE:    return "CL_INVALID_BUFFER_SIZE";
+        case CL_INVALID_MIP_LEVEL:      return "CL_INVALID_MIP_LEVEL";
+        case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
+        case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
+        case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
+        case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
+        case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
+        case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
+        default: return "(unknown)";
+    }
+}
+
+const char *GetChannelOrderName( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:      return "CL_R";
+        case CL_A:      return "CL_A";
+        case CL_Rx:     return "CL_Rx";
+        case CL_RG:     return "CL_RG";
+        case CL_RA:     return "CL_RA";
+        case CL_RGx:    return "CL_RGx";
+        case CL_RGB:    return "CL_RGB";
+        case CL_RGBx:   return "CL_RGBx";
+        case CL_RGBA:      return "CL_RGBA";
+        case CL_ARGB:      return "CL_ARGB";
+        case CL_BGRA:      return "CL_BGRA";
+        case CL_INTENSITY: return "CL_INTENSITY";
+        case CL_LUMINANCE: return "CL_LUMINANCE";
+#if defined CL_1RGB_APPLE
+        case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
+#endif
+#if defined CL_BGR1_APPLE
+        case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
+#endif
+#if defined CL_ABGR_APPLE
+        case CL_ABGR_APPLE: return "CL_ABGR_APPLE";
+#endif
+        case CL_DEPTH: return "CL_DEPTH";
+        case CL_DEPTH_STENCIL: return "CL_DEPTH_STENCIL";
+        case CL_sRGB: return "CL_sRGB";
+        case CL_sRGBA: return "CL_sRGBA";
+        case CL_sRGBx: return "CL_sRGBx";
+        case CL_sBGRA: return "CL_sBGRA";
+        case CL_ABGR: return "CL_ABGR";
+        default: return NULL;
+    }
+}
+
+int IsChannelOrderSupported( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+        case CL_RGB:
+        case CL_RGBx:
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+        case CL_ABGR:
+        case CL_sRGB:
+        case CL_sRGBx:
+        case CL_sBGRA:
+        case CL_sRGBA:
+        case CL_DEPTH:
+            return 1;
+#if defined CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+            return 1;
+#endif
+#if defined CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+            return 1;
+#endif
+        default:
+            return 0;
+    }
+}
+
+const char *GetChannelTypeName( cl_channel_type type )
+{
+    switch( type )
+    {
+        case CL_SNORM_INT8:         return "CL_SNORM_INT8";
+        case CL_SNORM_INT16:        return "CL_SNORM_INT16";
+        case CL_UNORM_INT8:         return "CL_UNORM_INT8";
+        case CL_UNORM_INT16:        return "CL_UNORM_INT16";
+        case CL_UNORM_SHORT_565:    return "CL_UNORM_SHORT_565";
+        case CL_UNORM_SHORT_555:    return "CL_UNORM_SHORT_555";
+        case CL_UNORM_INT_101010:   return "CL_UNORM_INT_101010";
+        case CL_SIGNED_INT8:        return "CL_SIGNED_INT8";
+        case CL_SIGNED_INT16:       return "CL_SIGNED_INT16";
+        case CL_SIGNED_INT32:       return "CL_SIGNED_INT32";
+        case CL_UNSIGNED_INT8:      return "CL_UNSIGNED_INT8";
+        case CL_UNSIGNED_INT16:     return "CL_UNSIGNED_INT16";
+        case CL_UNSIGNED_INT32:     return "CL_UNSIGNED_INT32";
+        case CL_HALF_FLOAT:         return "CL_HALF_FLOAT";
+        case CL_FLOAT:              return "CL_FLOAT";
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:     return "CL_SFIXED14_APPLE";
+#endif
+        case CL_UNORM_INT24:        return "CL_UNORM_INT24";
+        default:                    return NULL;
+    }
+}
+
+int IsChannelTypeSupported( cl_channel_type type )
+{
+    switch( type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+        case CL_UNORM_INT24:
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_INT_101010:
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+            return 1;
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 1;
+#endif
+        default:
+            return 0;
+    }
+}
+
+const char *GetAddressModeName( cl_addressing_mode mode )
+{
+    switch( mode )
+    {
+        case CL_ADDRESS_NONE:                return "CL_ADDRESS_NONE";
+        case CL_ADDRESS_CLAMP_TO_EDGE:        return "CL_ADDRESS_CLAMP_TO_EDGE";
+        case CL_ADDRESS_CLAMP:                return "CL_ADDRESS_CLAMP";
+        case CL_ADDRESS_REPEAT:                return "CL_ADDRESS_REPEAT";
+        case CL_ADDRESS_MIRRORED_REPEAT:    return "CL_ADDRESS_MIRRORED_REPEAT";
+        default:                            return NULL;
+    }
+}
+
+const char *GetDeviceTypeName( cl_device_type type )
+{
+    switch( type )
+    {
+        case CL_DEVICE_TYPE_GPU:    return "CL_DEVICE_TYPE_GPU";
+        case CL_DEVICE_TYPE_CPU:    return "CL_DEVICE_TYPE_CPU";
+        case CL_DEVICE_TYPE_ACCELERATOR:    return "CL_DEVICE_TYPE_ACCELERATOR";
+        case CL_DEVICE_TYPE_ALL:    return "CL_DEVICE_TYPE_ALL";
+        default:                    return NULL;
+    }
+}
+
+const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
+{
+    static char scratch[ 1024 ];
+    size_t i, j;
+
+    if( buffer == NULL )
+        buffer = scratch;
+
+    unsigned char *p = (unsigned char *)dataBuffer;
+    char *bPtr;
+
+    buffer[ 0 ] = 0;
+    bPtr = buffer;
+    for( i = 0; i < vecSize; i++ )
+    {
+        if( i > 0 )
+        {
+            bPtr[ 0 ] = ' ';
+            bPtr++;
+        }
+        for( j = 0; j < typeSize; j++ )
+        {
+            sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
+            bPtr += 2;
+        }
+        p += typeSize;
+    }
+    bPtr[ 0 ] = 0;
+
+    return buffer;
+}
+
+#ifndef MAX
+#define MAX( _a, _b )       ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+#if defined( _MSC_VER )
+#define scalbnf(_a, _i )    ldexpf( _a, _i )
+#define scalbn(_a, _i )     ldexp( _a, _i )
+#define scalbnl(_a, _i )    ldexpl( _a, _i )
+#endif
+
+static float Ulp_Error_Half_Float( float test, double reference );
+static inline float  half2float( cl_ushort half );
+
+// taken from math tests
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+static float Ulp_Error_Half_Float( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+    double testVal = test;
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    // reference is a normal power of two or a zero
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+
+// Taken from vLoadHalf test
+static inline float half2float( cl_ushort us )
+{
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
+
+float Ulp_Error_Half( cl_ushort test, float reference )
+{
+    return Ulp_Error_Half_Float( half2float(test), reference );
+}
+
+
+float Ulp_Error( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+    double testVal = test;
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+
+    if( isinf( reference ) )
+    {
+        if( testVal == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    if( isinf( testVal) )
+    { // infinite test value, but finite (but possibly overflowing in float) reference.
+      //
+      // The function probably overflowed prematurely here. Formally, the spec says this is
+      // an infinite ulp error and should not be tolerated. Unfortunately, this would mean
+      // that the internal precision of some half_pow implementations would have to be 29+ bits
+      // at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
+      // is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
+      // after rounding to single is 4*32 = 128, which will ultimately result in premature
+      // overflow, even though a good faith representation would be correct to within 2**-29
+      // interally.
+
+        // In the interest of not requiring the implementation go to extraordinary lengths to
+        // deliver a half precision function, we allow premature overflow within the limit
+        // of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
+        // 2**128, the next value that would appear in the number line if float had sufficient range.
+        testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
+
+        // Note that the same hack may not work in long double, which is not guaranteed to have
+        // more range than double.  It is not clear that premature overflow should be tolerated for
+        // double.
+    }
+
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    // reference is a normal power of two or a zero
+    // The unbiased exponent of the ulp unit place
+    int ulp_exp =  FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+
+float Ulp_Error_Double( double test, long double reference )
+{
+  // Deal with long double = double
+  // On most systems long double is a higher precision type than double. They provide either
+  // a 80-bit or greater floating point type, or they provide a head-tail double double format.
+  // That is sufficient to represent the accuracy of a floating point result to many more bits
+  // than double and we can calculate sub-ulp errors. This is the standard system for which this
+  // test suite is designed.
+  //
+  // On some systems double and long double are the same thing. Then we run into a problem,
+  // because our representation of the infinitely precise result (passed in as reference above)
+  // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
+  // reported error by half an ulp to take this into account.  A more correct and permanent fix
+  // would be to undertake refactoring the reference code to return results in this format:
+  //
+  //    typedef struct DoubleReference
+  //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
+  //        double  correctlyRoundedResult;     // as best we can
+  //        double  ulps;                       // plus a fractional amount to account for the difference
+  //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
+  //
+  // This would provide a useful higher-than-double precision format for everyone that we can use,
+  // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
+  // that use a head to tail double double for long double.
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+
+    int x;
+    long double testVal = test;
+    if( 0.5L != frexpl( reference, &x) )
+    { // Non-power of two and NaN
+        if( isinf( reference ) )
+        {
+            if( testVal == reference )
+                return 0.0f;
+
+            return (float) ( testVal - reference );
+        }
+
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        float result = (float) scalbnl( testVal - reference, ulp_exp );
+
+        // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+        if( sizeof(long double) == sizeof( double ) )
+            result += copysignf( 0.5f, result);
+
+        return result;
+
+    }
+
+    // reference is a normal power of two or a zero
+    // The unbiased exponent of the ulp unit place
+    int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    float result = (float) scalbnl( testVal - reference, ulp_exp );
+
+    // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+    if( sizeof(long double) == sizeof( double ) )
+        result += copysignf( 0.5f, result);
+
+    return result;
+}
+
+cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
+{
+  int error;
+  size_t size_ret;
+
+  // Does the program object exist?
+  if (program != NULL) {
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then allocate and query the device list from the context
+      cl_context context;
+      error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
+      test_error( error, "Unable to query program's context" );
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
+      test_error( error, "Unable to query context's device size" );
+      num_devices = size_ret / sizeof(cl_device_id);
+      device_list = (cl_device_id *) malloc(size_ret);
+      if (device_list == NULL) {
+          print_error( error, "malloc failed" );
+          return CL_OUT_OF_HOST_MEMORY;
+      }
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
+      test_error( error, "Unable to query context's devices" );
+
+    }
+
+    // For each device in the device_list
+    unsigned int i;
+    for (i = 0; i < num_devices; i++) {
+
+      // Get the build status
+      cl_build_status build_status;
+      error = clGetProgramBuildInfo(program,
+                                    device_list[i],
+                                    CL_PROGRAM_BUILD_STATUS,
+                                    sizeof(build_status),
+                                    &build_status,
+                                    &size_ret);
+      test_error( error, "Unable to query build status" );
+
+      // If the build failed then log the status, and allocate the build log, log it and free it
+      if (build_status != CL_BUILD_SUCCESS) {
+
+        log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to query build log size" );
+        char *build_log = (char *) malloc(size_ret);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to query build log" );
+        log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        free(build_log);
+
+      }
+
+    }
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then free the device list
+      free(device_list);
+
+    }
+
+  }
+
+  return CL_SUCCESS;
+}
+
+const char * subtests_requiring_opencl_1_2[] = {
+            "device_partition_equally",
+            "device_partition_by_counts",
+            "device_partition_by_affinity_domain_numa",
+            "device_partition_by_affinity_domain_l4_cache",
+            "device_partition_by_affinity_domain_l3_cache",
+            "device_partition_by_affinity_domain_l2_cache",
+            "device_partition_by_affinity_domain_l1_cache",
+            "device_partition_by_affinity_domain_next_partitionable",
+            "device_partition_all",
+    "buffer_fill_int",
+    "buffer_fill_uint",
+    "buffer_fill_short",
+    "buffer_fill_ushort",
+    "buffer_fill_char",
+    "buffer_fill_uchar",
+    "buffer_fill_long",
+    "buffer_fill_ulong",
+    "buffer_fill_float",
+    "buffer_fill_struct",
+  "test_mem_host_write_only_buffer",
+  "test_mem_host_write_only_subbuffer",
+  "test_mem_host_no_access_buffer",
+  "test_mem_host_no_access_subbuffer",
+  "test_mem_host_read_only_image",
+  "test_mem_host_write_only_image",
+  "test_mem_host_no_access_image",
+  // CL_MEM_HOST_{READ|WRITE}_ONLY api/
+    "get_buffer_info",
+    "get_image1d_info",
+    "get_image1d_array_info",
+    "get_image2d_array_info",
+  // gl/
+  "images_read_1D",
+  "images_write_1D",
+  "images_1D_getinfo",
+  "images_read_1Darray",
+  "images_write_1Darray",
+  "images_1Darray_getinfo",
+  "images_read_2Darray",
+  "images_write_2Darray",
+  "images_2Darray_getinfo",
+    "buffer_migrate",
+    "image_migrate",
+   // compiler/
+    "load_program_source",
+    "load_multistring_source",
+    "load_two_kernel_source",
+    "load_null_terminated_source",
+    "load_null_terminated_multi_line_source",
+    "load_null_terminated_partial_multi_line_source",
+    "load_discreet_length_source",
+    "get_program_source",
+    "get_program_build_info",
+    "get_program_info",
+    "large_compile",
+    "async_build",
+    "options_build_optimizations",
+    "options_build_macro",
+    "options_build_macro_existence",
+    "options_include_directory",
+    "options_denorm_cache",
+    "preprocessor_define_udef",
+    "preprocessor_include",
+    "preprocessor_line_error",
+    "preprocessor_pragma",
+    "compiler_defines_for_extensions",
+    "image_macro",
+    "simple_compile_only",
+    "simple_static_compile_only",
+    "simple_extern_compile_only",
+    "simple_compile_with_callback",
+    "simple_embedded_header_compile",
+    "simple_link_only",
+    "two_file_regular_variable_access",
+    "two_file_regular_struct_access",
+    "two_file_regular_function_access",
+    "simple_link_with_callback",
+    "simple_embedded_header_link",
+    "execute_after_simple_compile_and_link",
+    "execute_after_simple_compile_and_link_no_device_info",
+    "execute_after_simple_compile_and_link_with_defines",
+    "execute_after_simple_compile_and_link_with_callbacks",
+    "execute_after_simple_library_with_link",
+    "execute_after_two_file_link",
+    "execute_after_two_file_link",
+    "execute_after_embedded_header_link",
+    "execute_after_included_header_link",
+    "execute_after_serialize_reload_object",
+    "execute_after_serialize_reload_library",
+    "simple_library_only",
+    "simple_library_with_callback",
+    "simple_library_with_link",
+    "two_file_link",
+    "multi_file_libraries",
+    "multiple_files",
+    "multiple_libraries",
+    "multiple_files_multiple_libraries",
+    "multiple_embedded_headers",
+    "program_binary_type",
+    "compile_and_link_status_options_log",
+    // CL_PROGRAM_NUM_KERNELS, in api/
+    "get_kernel_arg_info",
+    "create_kernels_in_program",
+    // clEnqueue..WithWaitList, in events/
+    "event_enqueue_marker_with_event_list",
+    "event_enqueue_barrier_with_event_list",
+    "popcount"
+};
+
+const char * subtests_to_skip_with_offline_compiler[] = {
+            "get_kernel_arg_info",
+            "binary_create",
+            "load_program_source",
+            "load_multistring_source",
+            "load_two_kernel_source",
+            "load_null_terminated_source",
+            "load_null_terminated_multi_line_source",
+            "load_null_terminated_partial_multi_line_source",
+            "load_discreet_length_source",
+            "get_program_source",
+            "get_program_build_info",
+            "options_build_optimizations",
+            "options_build_macro",
+            "options_build_macro_existence",
+            "options_include_directory",
+            "options_denorm_cache",
+            "preprocessor_define_udef",
+            "preprocessor_include",
+            "preprocessor_line_error",
+            "preprocessor_pragma",
+            "compiler_defines_for_extensions",
+            "image_macro",
+            "simple_extern_compile_only",
+            "simple_embedded_header_compile",
+            "two_file_regular_variable_access",
+            "two_file_regular_struct_access",
+            "two_file_regular_function_access",
+            "simple_embedded_header_link",
+            "execute_after_simple_compile_and_link_with_defines",
+            "execute_after_simple_compile_and_link_with_callbacks",
+            "execute_after_embedded_header_link",
+            "execute_after_included_header_link",
+            "multi_file_libraries",
+            "multiple_files",
+            "multiple_libraries",
+            "multiple_files_multiple_libraries",
+            "multiple_embedded_headers",
+            "program_binary_type",
+            "compile_and_link_status_options_log",
+};
+
+int check_opencl_version_with_testname(const char *subtestname, cl_device_id device)
+{
+    int nRequiring12 = sizeof(subtests_requiring_opencl_1_2)/sizeof(char *);
+    size_t i;
+    for(i=0; i < nRequiring12; ++i) {
+        if(!strcmp(subtestname, subtests_requiring_opencl_1_2[i])) {
+            return check_opencl_version(device, 1, 2);
+        }
+    }
+    return 0;
+}
+
+int check_opencl_version(cl_device_id device, cl_uint requestedMajorVersion, cl_uint requestedMinorVersion) {
+    int error;
+    char device_version[1024];
+    cl_uint majorVersion = 0, minorVersion = 0;
+    const char * required_version_ocl_12="OpenCL 1.2 ";
+
+    memset( device_version, 0, sizeof( device_version ) );
+    error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
+    test_error(error, "unable to get CL_DEVICE_VERSION");
+
+    if ( strncmp( device_version, "OpenCL 1.2", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
+        majorVersion = 1;
+        minorVersion = 2;
+    } else if ( strncmp( device_version, "OpenCL 1.1", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
+        majorVersion = 1;
+        minorVersion = 1;
+    } else if ( strncmp( device_version, "OpenCL 2.0", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
+        majorVersion = 2;
+        minorVersion = 0;
+    } else if ( strncmp( device_version, "OpenCL 2.1", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
+        majorVersion = 2;
+        minorVersion = 1;
+    } else {
+        log_error( "ERROR: Unexpected version string: `%s'.\n", device_version );
+        return 1;
+    };
+
+    if (majorVersion >= requestedMajorVersion)
+        return 0;
+
+    if (minorVersion >= requestedMinorVersion)
+        return 0;
+
+    return 1;
+}
+
+int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device)
+{
+    if(gOfflineCompiler)
+    {
+        int nNotRequiredWithOfflineCompiler = sizeof(subtests_to_skip_with_offline_compiler)/sizeof(char *);
+        size_t i;
+        for(i=0; i < nNotRequiredWithOfflineCompiler; ++i) {
+            if(!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i])) {
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -0,0 +1,164 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _errorHelpers_h
+#define _errorHelpers_h
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+#include <stdlib.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
+
+// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
+// functions as described below. This is helpful for integration into an automated testing
+// system.
+#if USE_ATF
+// export BUILD_WITH_ATF=1
+    #include <ATF/ATF.h>
+    #define test_start() ATFTestStart()
+    #define log_info ATFLogInfo
+    #define log_error ATFLogError
+    #define log_missing_feature ATFLogMissingFeature
+    #define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
+    #define test_finish() ATFTestFinish()
+    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
+    #define vlog ATFLogInfo
+    #define vlog_error ATFLogError
+#else
+    #include <stdio.h>
+    #define test_start()
+    #define log_info printf
+    #define log_error printf
+    #define log_missing_feature printf
+    #define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,        \
+                        _higherBetter?"higher is better":"lower is better", _number )
+    #define test_finish()
+    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,    \
+                        _higherBetter?"higher is better":"lower is better" , _number)
+    #ifdef _WIN32
+        #ifdef __MINGW32__
+            // Use __mingw_printf since it supports "%a" format specifier
+            #define vlog __mingw_printf
+            #define vlog_error __mingw_printf
+        #else
+            // Use home-baked function that treats "%a" as "%f"
+        static int vlog_win32(const char *format, ...);
+        #define vlog vlog_win32
+        #define vlog_error vlog_win32
+        #endif
+    #else
+        #define vlog_error printf
+        #define vlog printf
+    #endif
+#endif
+
+#define ct_assert(b)          ct_assert_i(b, __LINE__)
+#define ct_assert_i(b, line)  ct_assert_ii(b, line)
+#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
+
+#define test_error(errCode,msg)    test_error_ret(errCode,msg,errCode)
+#define test_error_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
+#define print_error(errCode,msg)    log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
+
+#define test_missing_feature(errCode, msg) test_missing_feature_ret(errCode, msg, errCode)
+// this macro should always return CL_SUCCESS, but print the missing feature message
+#define test_missing_feature_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_missing_feature( errCode, msg ); return CL_SUCCESS ; } }
+#define print_missing_feature(errCode, msg) log_missing_feature("ERROR: Subtest %s tests a feature not supported by the device version! (from %s:%d)\n", msg, __FILE__, __LINE__ );
+
+#define test_missing_support_offline_cmpiler(errCode, msg) test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
+// this macro should always return CL_SUCCESS, but print the skip message on test not supported with offline compiler
+#define test_missing_support_offline_cmpiler_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { log_info( "INFO: Subtest %s tests is not supported in offline compiler execution path! (from %s:%d)\n", msg, __FILE__, __LINE__ ); return CL_SUCCESS ; } }
+
+// expected error code vs. what we got
+#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
+#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
+#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+
+extern const char    *IGetErrorString( int clErrorCode );
+
+extern float Ulp_Error_Half( cl_ushort test, float reference );
+extern float Ulp_Error( float test, double reference );
+extern float Ulp_Error_Double( double test, long double reference );
+
+extern const char *GetChannelTypeName( cl_channel_type type );
+extern int IsChannelTypeSupported( cl_channel_type type );
+extern const char *GetChannelOrderName( cl_channel_order order );
+extern int IsChannelOrderSupported( cl_channel_order order );
+extern const char *GetAddressModeName( cl_addressing_mode mode );
+
+extern const char *GetDeviceTypeName( cl_device_type type );
+int check_opencl_version_with_testname(const char *subtestname, cl_device_id device);
+int check_opencl_version(cl_device_id device, cl_uint requestedMajorVersion, cl_uint requestedMinorVersion);
+int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device);
+
+// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
+extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
+
+#if defined (_WIN32) && !defined(__MINGW32__)
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+static int vlog_win32(const char *format, ...)
+{
+    const char *new_format = format;
+
+    if (strstr(format, "%a")) {
+        char *temp;
+        if ((temp = strdup(format)) == NULL) {
+            printf("vlog_win32: Failed to allocate memory for strdup\n");
+            return -1;
+        }
+        new_format = temp;
+        while (*temp) {
+            // replace %a with %f
+            if ((*temp == '%') && (*(temp+1) == 'a')) {
+                *(temp+1) = 'f';
+            }
+            temp++;
+        }
+    }
+
+    va_list args;
+    va_start(args, format);
+    vprintf(new_format, args);
+    va_end(args);
+
+    if (new_format != format) {
+        free((void*)new_format);
+    }
+
+    return 0;
+}
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _errorHelpers_h
+
+
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -0,0 +1,104 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _fpcontrol_h
+#define _fpcontrol_h
+
+// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
+// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
+// in integer code, and have found this is the only way to correctly verify operation.
+//
+// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
+// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
+// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
+#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
+    typedef int     FPU_mode_type;
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+    extern __thread fpu_control_t fpu_control;
+#endif
+    // Set the reference hardware floating point unit to FTZ mode
+    static inline void ForceFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode | 0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control |= _FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
+#else
+        #error ForceFTZ needs an implentation
+#endif
+    }
+
+    // Disable the denorm flush to zero
+    static inline void DisableFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode & ~0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control &= ~_FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
+#else
+    #error DisableFTZ needs an implentation
+#endif
+    }
+
+    // Restore the reference hardware to floating point state indicated by *mode
+    static inline void RestoreFPState( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        _mm_setcsr( *mode );
+#elif defined( __PPC__)
+        fpu_control = *mode;
+#elif defined (__arm__)
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
+#else
+        #error RestoreFPState needs an implementation
+#endif
+    }
+#else
+        #error ForceFTZ and RestoreFPState need implentations
+#endif
+
+#endif
--- a/test_common/harness/genericThread.cpp
+++ b/test_common/harness/genericThread.cpp
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "genericThread.h"
+
+#if defined(_WIN32)
+#include <windows.h>
+#else // !_WIN32
+#include <pthread.h>
+#endif
+
+void * genericThread::IStaticReflector( void * data )
+{
+    genericThread *t = (genericThread *)data;
+    return t->IRun();
+}
+
+bool genericThread::Start( void )
+{
+#if defined(_WIN32)
+    mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
+    return ( mHandle != NULL );
+#else // !_WIN32
+    int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
+    return ( error == 0 );
+#endif // !_WIN32
+}
+
+void * genericThread::Join( void )
+{
+#if defined(_WIN32)
+    WaitForSingleObject( (HANDLE)mHandle, INFINITE );
+    return NULL;
+#else // !_WIN32
+    void * retVal;
+    int error = pthread_join( (pthread_t)mHandle, &retVal );
+    if( error != 0 )
+        retVal = NULL;
+    return retVal;
+#endif // !_WIN32
+}
--- a/test_common/harness/genericThread.h
+++ b/test_common/harness/genericThread.h
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _genericThread_h
+#define _genericThread_h
+
+#include <stdio.h>
+
+class genericThread
+{
+    public:
+
+        virtual ~genericThread() {}
+
+        bool    Start( void );
+        void *    Join( void );
+
+    protected:
+
+        virtual void *    IRun( void ) = 0;
+
+    private:
+
+        void* mHandle;
+
+        static void * IStaticReflector( void * data );
+};
+
+#endif // _genericThread_h
+
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h
@@ -0,0 +1,646 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _imageHelpers_h
+#define _imageHelpers_h
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <time.h>
+
+#include "errorHelpers.h"
+
+#include "conversions.h"
+#include "typeWrappers.h"
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include "mt19937.h"
+#include "rounding_mode.h"
+#include "clImageHelper.h"
+
+extern int gTestCount;
+extern int gTestFailure;
+extern cl_device_type gDeviceType;
+
+// Number of iterations per image format to test if not testing max images, rounding, or small images
+#define NUM_IMAGE_ITERATIONS 3
+
+
+// Definition for our own sampler type, to mirror the cl_sampler internals
+#define MAX_sRGB_TO_lRGB_CONVERSION_ERROR 0.5
+#define MAX_lRGB_TO_sRGB_CONVERSION_ERROR 0.6
+
+// Definition for our own sampler type, to mirror the cl_sampler internals
+typedef struct {
+ cl_addressing_mode addressing_mode;
+ cl_filter_mode     filter_mode;
+ bool               normalized_coords;
+} image_sampler_data;
+
+int round_to_even( float v );
+
+#define NORMALIZE( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : round_to_even( v * max ) ) )
+#define NORMALIZE_UNROUNDED( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max :  v * max ) )
+#define NORMALIZE_SIGNED( v, min, max ) ( v  < -1.0f ? min : ( v > 1.f ? max : round_to_even( v * max ) ) )
+#define NORMALIZE_SIGNED_UNROUNDED( v, min, max ) ( v  < -1.0f ? min : ( v > 1.f ? max : v * max ) )
+#define CONVERT_INT( v, min, max, max_val)  ( v < min ? min : ( v > max ? max_val : round_to_even( v ) ) )
+#define CONVERT_UINT( v, max, max_val)  ( v < 0 ? 0 : ( v > max ? max_val : round_to_even( v ) ) )
+
+extern void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0 );
+extern void print_write_header( cl_image_format *format, bool err);
+extern void print_header( cl_image_format *format, bool err );
+extern bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind );
+extern bool check_minimum_supported( cl_image_format *formatList, unsigned int numFormats, cl_mem_flags flags );
+
+extern size_t get_format_type_size( const cl_image_format *format );
+extern size_t get_channel_data_type_size( cl_channel_type channelType );
+extern size_t get_format_channel_count( const cl_image_format *format );
+extern size_t get_channel_order_channel_count( cl_channel_order order );
+cl_channel_type  get_channel_type_from_name( const char *name );
+cl_channel_order  get_channel_order_from_name( const char *name );
+extern int    is_format_signed( const cl_image_format *format );
+extern size_t get_pixel_size( cl_image_format *format );
+
+/* Helper to get any ol image format as long as it is 8-bits-per-channel */
+extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
+
+/* Helper to get any ol image format as long as it is 32-bits-per-channel */
+extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
+
+int random_in_range( int minV, int maxV, MTdata d );
+int random_log_in_range( int minV, int maxV, MTdata d );
+
+typedef struct
+{
+    size_t width;
+    size_t height;
+    size_t depth;
+    size_t rowPitch;
+    size_t slicePitch;
+    size_t arraySize;
+    cl_image_format *format;
+    cl_mem buffer;
+    cl_mem_object_type type;
+    cl_uint num_mip_levels;
+} image_descriptor;
+
+typedef struct
+{
+    float p[4];
+}FloatPixel;
+
+void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
+                   size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
+                   const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSize=0);
+extern size_t get_format_max_int( cl_image_format *format );
+
+extern cl_ulong get_image_size( image_descriptor const *imageInfo );
+extern cl_ulong get_image_size_mb( image_descriptor const *imageInfo );
+
+extern char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &Owner, MTdata d );
+
+extern int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                      void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod = 0 );
+
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     float *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+
+extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
+                            const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] );
+
+int has_alpha(cl_image_format *format);
+
+extern bool alpha_is_x(cl_image_format *format);
+
+extern bool is_sRGBA_order(cl_channel_order image_channel_order);
+
+inline float calculate_array_index( float coord, float extent );
+
+cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth);
+cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo);
+size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod);
+
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData, int lod )
+{
+    float convert_half_to_float( unsigned short halfValue );
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth, slice_pitch_lod = 0/*imageInfo->slicePitch*/ , row_pitch_lod = 0/*imageInfo->rowPitch*/;
+    width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod):1;
+
+    if ( imageInfo->type  != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
+        height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod):1;
+
+    if(imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+       depth_lod = ( imageInfo->depth >> lod) ? ( imageInfo->depth >> lod) : 1;
+    row_pitch_lod = (imageInfo->num_mip_levels > 0)? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+    slice_pitch_lod = (imageInfo->num_mip_levels > 0)? (row_pitch_lod * height_lod): imageInfo->slicePitch;
+
+    // correct depth_lod and height_lod for array image types in order to avoid
+    // return
+    if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1 && depth_lod == 1) {
+    depth_lod = 0;
+    height_lod = 0;
+
+    }
+
+    if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1) {
+      depth_lod = 0;
+    }
+
+    if ( x < 0 || x >= (int)width_lod
+               || ( height_lod != 0 && ( y < 0 || y >= (int)height_lod ) )
+               || ( depth_lod != 0 && ( z < 0 || z >= (int)depth_lod ) )
+               || ( imageInfo->arraySize != 0 && ( z < 0 || z >= (int)imageInfo->arraySize ) ) )
+    {
+        // Border color
+        if (imageInfo->format->image_channel_order == CL_DEPTH)
+        {
+            outData[ 0 ] = 1;
+        }
+        else {
+            outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
+            if (!has_alpha(imageInfo->format))
+                outData[3] = 1;
+        }
+        return;
+    }
+
+    cl_image_format *format = imageInfo->format;
+
+    unsigned int i;
+    T tempData[ 4 ];
+
+    // Advance to the right spot
+    char *ptr = (char *)imageData;
+    size_t pixelSize = get_pixel_size( format );
+
+    ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
+
+    // OpenCL only supports reading floats from certain formats
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        {
+            cl_char *dPtr = (cl_char *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SIGNED_INT8:
+        {
+            cl_char *dPtr = (cl_char *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar*)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SNORM_INT16:
+        {
+            cl_short *dPtr = (cl_short *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_INT16:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SIGNED_INT16:
+        {
+            cl_short *dPtr = (cl_short *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT16:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)convert_half_to_float( dPtr[ i ] );
+            break;
+        }
+
+        case CL_SIGNED_INT32:
+        {
+            cl_int *dPtr = (cl_int *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT32:
+        {
+            cl_uint *dPtr = (cl_uint *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_SHORT_565:
+        {
+            cl_ushort *dPtr = (cl_ushort*)ptr;
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+#ifdef OBSOLETE_FORMAT
+        case CL_UNORM_SHORT_565_REV:
+        {
+            unsigned short *dPtr = (unsigned short *)ptr;
+            tempData[ 2 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_SHORT_555_REV:
+        {
+            unsigned short *dPtr = (unsigned short *)ptr;
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_INT_8888:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 3 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0xff );
+            break;
+        }
+        case CL_UNORM_INT_8888_REV:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 3 ] = (T)( dPtr[ 0 ] & 0xff );
+            break;
+        }
+
+        case CL_UNORM_INT_101010_REV:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0x3ff );
+            break;
+        }
+#endif
+        case CL_UNORM_SHORT_555:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_INT_101010:
+        {
+            cl_uint *dPtr = (cl_uint *)ptr;
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 0x3ff );
+            break;
+        }
+
+        case CL_FLOAT:
+        {
+            cl_float *dPtr = (cl_float *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+        {
+            cl_float *dPtr = (cl_float *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ] + 0x4000;
+            break;
+        }
+#endif
+    }
+
+
+    outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
+    outData[ 3 ] = 1;
+
+    if( format->image_channel_order == CL_A )
+    {
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_R   )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_Rx   )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_RA )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 1 ];
+    }
+    else if( format->image_channel_order == CL_RG  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+    }
+    else if( format->image_channel_order == CL_RGx  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+    }
+    else if(( format->image_channel_order == CL_RGB  ) || ( format->image_channel_order == CL_sRGB  ))
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+    }
+    else if(( format->image_channel_order == CL_RGBx  ) || ( format->image_channel_order == CL_sRGBx  ))
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = 0;
+    }
+    else if(( format->image_channel_order == CL_RGBA ) || ( format->image_channel_order == CL_sRGBA ))
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = tempData[ 3 ];
+    }
+    else if( format->image_channel_order == CL_ARGB )
+    {
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if(( format->image_channel_order == CL_BGRA ) || ( format->image_channel_order == CL_sBGRA ))
+    {
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 3 ];
+    }
+    else if( format->image_channel_order == CL_INTENSITY )
+    {
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_LUMINANCE )
+    {
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_DEPTH  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+    }
+#ifdef CL_1RGB_APPLE
+    else if( format->image_channel_order == CL_1RGB_APPLE )
+    {
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = 0xff;
+    }
+#endif
+#ifdef CL_BGR1_APPLE
+    else if( format->image_channel_order == CL_BGR1_APPLE )
+    {
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = 0xff;
+    }
+#endif
+    else
+    {
+        log_error("Invalid format:");
+        print_header(format, true);
+    }
+}
+
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData )
+{
+  read_image_pixel<T>( imageData, imageInfo, x, y, z, outData, 0);
+}
+
+// Stupid template rules
+bool get_integer_coords( float x, float y, float z,
+                        size_t width, size_t height, size_t depth,
+                        image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                        int &outX, int &outY, int &outZ );
+bool get_integer_coords_offset( float x, float y, float z,
+                               float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                               size_t width, size_t height, size_t depth,
+                               image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                               int &outX, int &outY, int &outZ );
+
+
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData, int lod )
+{
+    int iX = 0, iY = 0, iZ = 0;
+
+    float max_w = imageInfo->width;
+    float max_h;
+    float max_d;
+
+    switch (imageInfo->type) {
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            max_h = imageInfo->arraySize;
+            max_d = 0;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            max_h = imageInfo->height;
+            max_d = imageInfo->arraySize;
+            break;
+        default:
+            max_h = imageInfo->height;
+            max_d = imageInfo->depth;
+            break;
+    }
+
+    if( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
+    {
+        switch (imageInfo->type) {
+            case CL_MEM_OBJECT_IMAGE3D:
+                max_d = (float)((imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1);
+            case CL_MEM_OBJECT_IMAGE2D:
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                max_h = (float)((imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1);
+                break;
+            default:
+                ;
+
+        }
+        max_w = (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
+    }
+    get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, max_w, max_h, max_d, imageSampler, imageInfo, iX, iY, iZ );
+
+    read_image_pixel<T>( imageData, imageInfo, iX, iY, iZ, outData, lod );
+}
+
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData)
+{
+  sample_image_pixel_offset<T>( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
+    imageSampler,  outData, 0);
+}
+
+template <class T> void sample_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, image_sampler_data *imageSampler, T *outData )
+{
+    return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData);
+}
+
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
+
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
+
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
+
+
+extern void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results,  float *errors );
+
+extern char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer = false );
+
+// deprecated
+//extern bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue );
+
+extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
+extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
+extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
+extern int issubnormal(float);
+
+
+#define errMax( _x , _y )       ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
+
+static inline cl_uint abs_diff_uint( cl_uint x, cl_uint y )
+{
+    return y > x ? y - x : x - y;
+}
+
+static inline cl_uint abs_diff_int( cl_int x, cl_int y )
+{
+    return (cl_uint) (y > x ? y - x : x - y);
+}
+
+static inline cl_float relative_error( float test, float expected )
+{
+    // 0-0/0 is 0 in this case, not NaN
+    if( test == 0.0f && expected == 0.0f )
+        return 0.0f;
+
+    return (test - expected) / expected;
+}
+
+extern float random_float(float low, float high);
+
+class CoordWalker
+{
+public:
+    CoordWalker( void * coords, bool useFloats, size_t vecSize );
+    ~CoordWalker();
+
+    cl_float    Get( size_t idx, size_t el );
+
+protected:
+    cl_float * mFloatCoords;
+    cl_int * mIntCoords;
+    size_t    mVecSize;
+};
+
+extern int  DetectFloatToHalfRoundingMode( cl_command_queue );  // Returns CL_SUCCESS on success
+
+int inline is_half_nan( cl_ushort half ){ return (half & 0x7fff) > 0x7c00; }
+
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
+
+extern double sRGBmap(float fc);
+
+#endif // _imageHelpers_h
--- a/test_common/harness/kernelHelpers.c
+++ b/test_common/harness/kernelHelpers.c
--- a/test_common/harness/kernelHelpers.h
+++ b/test_common/harness/kernelHelpers.h
@@ -0,0 +1,133 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _kernelHelpers_h
+#define _kernelHelpers_h
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined (__MINGW32__)
+#include <malloc.h>
+#endif
+
+#include <string.h>
+
+#ifdef __APPLE__
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+/*
+ *  The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
+ *
+ *  const char *source = {
+ *      INIT_OPENCL_DEBUG_INFO
+ *      "__kernel void foo( int x )\n"
+ *      "{\n"
+ *      "   ...\n"
+ *      "}\n"
+ *  };
+ */
+#define INIT_OPENCL_DEBUG_INFO                      SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
+#define SET_OPENCL_LINE_INFO(_line, _file)          "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
+#ifndef STRINGIFY_VALUE
+    #define STRINGIFY_VALUE(_x)                     STRINGIFY(_x)
+#endif
+#ifndef STRINGIFY
+    #define STRINGIFY(_x)                           #_x
+#endif
+
+const int MAX_LEN_FOR_KERNEL_LIST = 20;
+
+/* Helper that creates a single program and kernel from a single-kernel program source */
+extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const char *buildOptions=NULL );
+extern int create_single_kernel_helper_with_build_options( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines,
+                                                          const char **kernelProgram, const char *kernelName, const char *buildOptions );
+extern int create_single_kernel_helper_create_program(cl_context context, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, const char *buildOptions = NULL);
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
+
+/* Helper to get major/minor number for a device */
+extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
+
+/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
+extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
+
+/* Helper to determine if an extension is supported by a device */
+extern int is_extension_available( cl_device_id device, const char *extensionName );
+
+/* Helper to determine if a device supports an image format */
+extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
+
+/* Helper to get pixel size for a pixel format */
+size_t get_pixel_bytes( const cl_image_format *fmt );
+
+/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
+extern int verifyImageSupport( cl_device_id device );
+
+/* Checks that the given device supports images. Same as verify, but doesn't print an error */
+extern int checkForImageSupport( cl_device_id device );
+extern int checkFor3DImageSupport( cl_device_id device );
+
+/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
+extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
+
+/* Helper for aligned memory allocation */
+void * align_malloc(size_t size, size_t alignment);
+void   align_free(void *);
+
+/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
+size_t get_min_alignment(cl_context context);
+
+/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
+cl_device_fp_config get_default_rounding_mode( cl_device_id device );
+
+#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device )    \
+    if( checkForImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support images. Skipping test...\n" );    \
+        return 0;    \
+    }
+
+#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )    \
+    if( checkFor3DImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" );    \
+        return 0;    \
+    }
+
+/* Prints out the standard device header for all tests given the device to print for */
+extern int printDeviceHeader( cl_device_id device );
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // _kernelHelpers_h
--- a/test_common/harness/mingw_compat.c
+++ b/test_common/harness/mingw_compat.c
@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined(__MINGW32__)
+
+#include "mingw_compat.h"
+#include <stdio.h>
+#include <string.h>
+
+//This function is unavailable on various mingw compilers,
+//especially 64 bit so implementing it here
+const char *basename_dot=".";
+char*
+basename(char *path)
+{
+    char *p = path, *b = NULL;
+    int len = strlen(path);
+
+    if (path == NULL) {
+        return (char*)basename_dot;
+    }
+
+    // Not absolute path on windows
+    if (path[1] != ':') {
+        return path;
+    }
+
+    // Trim trailing path seperators
+    if (path[len - 1]  == '\\' ||
+        path[len - 1]  == '/' ) {
+        len--;
+        path[len] = '\0';
+    }
+
+    while (len) {
+        while((*p != '\\' || *p != '/')  && len) {
+            p++;
+            len--;
+        }
+        p++;
+        b = p;
+     }
+
+     return b;
+}
+
+#endif
--- a/test_common/harness/mingw_compat.h
+++ b/test_common/harness/mingw_compat.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef MINGW_COMPAT_H
+#define MINGW_COMPAT_H
+
+#if defined(__MINGW32__)
+char *basename(char *path);
+#include <malloc.h>
+
+#if defined(__MINGW64__)
+//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
+#define __mingw_aligned_malloc _aligned_malloc
+#define __mingw_aligned_free _aligned_free
+#include <stddef.h>
+#endif //(__MINGW64__)
+
+#endif //(__MINGW32__)
+#endif // MINGW_COMPAT_H
--- a/test_common/harness/msvc9.c
+++ b/test_common/harness/msvc9.c
@@ -0,0 +1,772 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "compat.h"
+
+#if defined ( _MSC_VER )
+
+#include <limits.h>
+#include <stdlib.h>
+
+#include <CL/cl.h>
+
+#include <windows.h>
+
+#if ! defined( __INTEL_COMPILER )
+
+///////////////////////////////////////////////////////////////////
+//
+//                   rint, rintf
+//
+///////////////////////////////////////////////////////////////////
+
+float copysignf( float x, float y )
+{
+    union{ cl_uint u; float f; }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
+
+    return ux.f;
+}
+
+double copysign( double x, double y )
+{
+    union{ cl_ulong u; double f; }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
+
+    return ux.f;
+}
+
+long double copysignl( long double x, long double y )
+{
+    union
+    {
+        long double f;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
+
+    return ux.f;
+}
+
+float rintf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        x = copysignf( rounded, x );
+    }
+
+    return x;
+}
+
+double rint(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        x = copysign( rounded, x );
+    }
+
+    return x;
+}
+
+long double rintl(long double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
+    {
+        long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
+        long double rounded = x + magic;
+        rounded -= magic;
+        x = copysignl( rounded, x );
+    }
+
+    return x;
+}
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                   ilogb, ilogbf, ilogbl
+//
+///////////////////////////////////////////////////////////////////
+#ifndef FP_ILOGB0
+    #define FP_ILOGB0   INT_MIN
+#endif
+
+#ifndef FP_ILOGBNAN
+    #define FP_ILOGBNAN INT_MIN
+#endif
+
+int ilogb (double x)
+{
+    union{ double f; cl_ulong u;} u;
+    u.f = x;
+
+    cl_ulong absx = u.u & CL_LONG_MAX;
+    if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
+    {
+        switch( absx )
+        {
+            case 0:
+                return FP_ILOGB0;
+            case 0x7ff0000000000000ULL:
+                return INT_MAX;
+            default:
+                if( absx > 0x7ff0000000000000ULL )
+                    return FP_ILOGBNAN;
+
+                // subnormal
+                u.u = absx | 0x3ff0000000000000ULL;
+                u.f -= 1.0;
+                return (u.u >> 52) - (1023 + 1022);
+        }
+    }
+
+    return (absx >> 52) - 1023;
+}
+
+
+int ilogbf (float x)
+{
+    union{ float f; cl_uint u;} u;
+    u.f = x;
+
+    cl_uint absx = u.u & 0x7fffffff;
+    if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
+    {
+        switch( absx )
+        {
+            case 0:
+                return FP_ILOGB0;
+            case 0x7f800000U:
+                return INT_MAX;
+            default:
+                if( absx > 0x7f800000 )
+                    return FP_ILOGBNAN;
+
+                // subnormal
+                u.u = absx | 0x3f800000U;
+                u.f -= 1.0f;
+                return (u.u >> 23) - (127 + 126);
+        }
+    }
+
+    return (absx >> 23) - 127;
+}
+
+int ilogbl (long double x)
+{
+    union
+    {
+        long double f;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    } u;
+    u.f = x;
+
+    int exp = u.u.sexp & 0x7fff;
+    if( 0 == exp )
+    {
+        if( 0 == u.u.m )
+            return FP_ILOGB0;
+
+        //subnormal
+        u.u.sexp = 0x3fff;
+        u.f -= 1.0f;
+        exp = u.u.sexp & 0x7fff;
+
+        return exp - (0x3fff + 0x3ffe);
+    }
+    else if( 0x7fff == exp )
+    {
+        if( u.u.m & CL_LONG_MAX )
+            return FP_ILOGBNAN;
+
+        return INT_MAX;
+    }
+
+    return exp - 0x3fff;
+}
+
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                 fmax, fmin, fmaxf, fminf
+//
+///////////////////////////////////////////////////////////////////
+
+static void GET_BITS_SP32(float fx, unsigned int* ux)
+{
+    volatile union {float f; unsigned int u;} _bitsy;
+    _bitsy.f = (fx);
+    *ux = _bitsy.u;
+}
+/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
+/* { */
+/*     volatile union {float f; unsigned int i;} _bitsy; */
+/*     _bitsy.f = (fx); */
+/*     *ux = _bitsy.i; */
+/* } */
+static void PUT_BITS_SP32(unsigned int ux, float* fx)
+{
+    volatile union {float f; unsigned int u;} _bitsy;
+    _bitsy.u = (ux);
+    *fx = _bitsy.f;
+}
+/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
+/* { */
+/*     volatile union {float f; unsigned int i;} _bitsy; */
+/*     _bitsy.i = (ux); */
+/*     *fx = _bitsy.f; */
+/* } */
+static void GET_BITS_DP64(double dx, unsigned __int64* lx)
+{
+    volatile union {double d; unsigned __int64 l;} _bitsy;
+    _bitsy.d = (dx);
+    *lx = _bitsy.l;
+}
+static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
+{
+    volatile union {double d; unsigned __int64 l;} _bitsy;
+    _bitsy.l = (lx);
+    *dx = _bitsy.d;
+}
+
+#if 0
+int SIGNBIT_DP64(double x )
+{
+    int hx;
+    _GET_HIGH_WORD(hx,x);
+    return((hx>>31));
+}
+#endif
+
+/* fmax(x, y) returns the larger (more positive) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+/* This works so long as the compiler knows that (x != x) means
+   that x is NaN; gcc does. */
+double fmax(double x, double y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+
+/* fmin(x, y) returns the smaller (more negative) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+double fmin(double x, double y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+
+float fmaxf( float x, float y )
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+/* fminf(x, y) returns the smaller (more negative) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+float fminf(float x, float y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+long double scalblnl(long double x, long n)
+{
+    union
+    {
+        long double d;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
+    u.u.m = CL_LONG_MIN;
+
+    if( x == 0.0L || n < -2200)
+        return copysignl( 0.0L, x );
+
+    if( n > 2200 )
+        return INFINITY;
+
+    if( n < 0 )
+    {
+        u.u.sexp = 0x3fff - 1022;
+        while( n <= -1022 )
+        {
+            x *= u.d;
+            n += 1022;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    if( n > 0 )
+    {
+        u.u.sexp = 0x3fff + 1023;
+        while( n >= 1023 )
+        {
+            x *= u.d;
+            n -= 1023;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    return x;
+}
+
+///////////////////////////////////////////////////////////////////
+//
+//                          log2
+//
+///////////////////////////////////////////////////////////////////
+const static cl_double log_e_base2   = 1.4426950408889634074;
+const static cl_double log_10_base2  = 3.3219280948873623478;
+
+//double log10(double x);
+
+double log2(double x)
+{
+    return 1.44269504088896340735992468100189214 * log(x);
+}
+
+long double log2l(long double x)
+{
+    return 1.44269504088896340735992468100189214L * log(x);
+}
+
+double trunc(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
+    {
+        cl_long rounded = x;
+        x = copysign( (double) rounded, x );
+    }
+
+    return x;
+}
+
+float  truncf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        cl_int rounded = x;
+        x = copysignf( (float) rounded, x );
+    }
+
+    return x;
+}
+
+long lround(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 0.5 )
+        return 0;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
+    {
+        absx += 0.5;
+        cl_long rounded = absx;
+        absx = rounded;
+        x = copysign( absx, x );
+    }
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    return (long) x;
+}
+
+long lroundf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 0.5f )
+        return 0;
+
+    if( absx < 8388608.0f )
+    {
+        absx += 0.5f;
+        cl_int rounded = absx;
+        absx = rounded;
+        x = copysignf(  absx, x );
+    }
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    return (long) x;
+}
+
+double round(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 0.5 )
+        return copysign( 0.0, x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
+    {
+        absx += 0.5;
+        cl_long rounded = absx;
+        absx = rounded;
+        x = copysign( absx, x );
+    }
+
+    return x;
+}
+
+float  roundf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 0.5f )
+        return copysignf( 0.0f, x);
+
+    if( absx < 8388608.0f )
+    {
+        absx += 0.5f;
+        cl_int rounded = absx;
+        absx = rounded;
+        x = copysignf( absx, x );
+    }
+
+    return x;
+}
+
+long double roundl(long double x)
+{
+    long double absx = fabsl(x);
+
+    if( absx < 0.5L )
+        return copysignl( 0.0L, x);
+
+    if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
+    {
+        absx += 0.5L;
+        cl_ulong rounded = absx;
+        absx = rounded;
+        x = copysignl( absx, x );
+    }
+
+    return x;
+}
+
+float cbrtf( float x )
+{
+    float z = pow( fabs((double) x), 1.0 / 3.0 );
+    return copysignf( z, x );
+}
+
+double cbrt( double x )
+{
+    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
+}
+
+long int lrint (double x)
+{
+    double absx = fabs(x);
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+long int lrintf (float x)
+{
+    float absx = fabsf(x);
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                  fenv functions
+//
+///////////////////////////////////////////////////////////////////
+
+int fetestexcept(int excepts)
+{
+    unsigned int status = _statusfp();
+    return excepts & (
+        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
+        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
+        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
+        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
+        ((status & _SW_INVALID) ? FE_INVALID : 0)
+    );
+}
+
+int feclearexcept(int excepts)
+{
+    _clearfp();
+    return 0;
+}
+
+#endif // __INTEL_COMPILER
+
+#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
+
+float make_nan()
+{
+/* This is the IEEE 754 single-precision format:
+    unsigned int mantissa:  22;
+    unsigned int quiet_nan:  1;
+    unsigned int exponent:   8;
+    unsigned int negative:   1;
+*/
+     //const static unsigned
+     static const int32_t _nan = 0x7fc00000;
+     return *(const float*)(&_nan);
+}
+
+float nanf( const char* str)
+{
+    cl_uint u = atoi( str );
+    u |= 0x7fc00000U;
+    return *( float*)(&u);
+}
+
+
+double nan( const char* str)
+{
+    cl_ulong u = atoi( str );
+    u |= 0x7ff8000000000000ULL;
+    return *( double*)(&u);
+}
+
+// double check this implementatation
+long double nanl( const char* str)
+{
+    union
+    {
+        long double f;
+        struct { cl_ulong m; cl_ushort sexp; }u;
+    }u;
+    u.u.sexp = 0x7fff;
+    u.u.m = 0x8000000000000000ULL | atoi( str );
+
+    return u.f;
+}
+
+#endif
+
+///////////////////////////////////////////////////////////////////
+//
+//                  misc functions
+//
+///////////////////////////////////////////////////////////////////
+
+/*
+// This function is commented out because the Windows implementation should never call munmap.
+// If it is calling it, we have a bug. Please file a bugzilla.
+int munmap(void *addr, size_t len)
+{
+// FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
+
+    return (int)VirtualAlloc( (LPVOID)addr, len,
+                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
+}
+*/
+
+uint64_t ReadTime( void )
+{
+    LARGE_INTEGER current;
+    QueryPerformanceCounter(&current);
+    return (uint64_t)current.QuadPart;
+}
+
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    static double PerformanceFrequency = 0.0;
+
+    if (PerformanceFrequency == 0.0) {
+        LARGE_INTEGER frequency;
+        QueryPerformanceFrequency(&frequency);
+        PerformanceFrequency = (double) frequency.QuadPart;
+    }
+
+    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
+}
+
+int cf_signbit(double x)
+{
+    union
+    {
+        double f;
+        cl_ulong u;
+    }u;
+    u.f = x;
+    return u.u >> 63;
+}
+
+int cf_signbitf(float x)
+{
+    union
+    {
+        float f;
+        cl_uint u;
+    }u;
+    u.f = x;
+    return u.u >> 31;
+}
+
+float int2float (int32_t ix)
+{
+    union {
+        float   f;
+        int32_t i;
+    } u;
+    u.i = ix;
+    return u.f;
+}
+
+int32_t float2int (float   fx)
+{
+    union {
+        float   f;
+        int32_t i;
+    } u;
+    u.f = fx;
+    return u.i;
+}
+
+#if !defined(_WIN64)
+/** Returns the number of leading 0-bits in x,
+    starting at the most significant bit position.
+    If x is 0, the result is undefined.
+*/
+int __builtin_clz(unsigned int pattern)
+{
+#if 0
+    int res;
+    __asm {
+        mov eax, pattern
+        bsr eax, eax
+        mov res, eax
+    }
+    return 31 - res;
+#endif
+    unsigned long index;
+    unsigned char res = _BitScanReverse( &index, pattern);
+    if (res) {
+        return 8*sizeof(int) - 1 - index;
+    } else {
+        return 8*sizeof(int);
+    }
+}
+#else
+int __builtin_clz(unsigned int pattern)
+{
+   int count;
+   if (pattern == 0u) {
+       return 32;
+   }
+   count = 31;
+   if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
+   if (pattern >=  1u<<8) { pattern >>=  8; count -=  8; }
+   if (pattern >=  1u<<4) { pattern >>=  4; count -=  4; }
+   if (pattern >=  1u<<2) { pattern >>=  2; count -=  2; }
+   if (pattern >=  1u<<1) {                 count -=  1; }
+   return count;
+}
+
+#endif // !defined(_WIN64)
+
+#include <intrin.h>
+#include <emmintrin.h>
+
+int usleep(int usec)
+{
+    Sleep((usec + 999) / 1000);
+    return 0;
+}
+
+unsigned int sleep( unsigned int sec )
+{
+    Sleep( sec * 1000 );
+    return 0;
+}
+
+#endif // defined( _MSC_VER )
--- a/test_common/harness/mt19937.c
+++ b/test_common/harness/mt19937.c
@@ -0,0 +1,280 @@
+/*
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+
+   Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mt19937.h"
+#include "mingw_compat.h"
+
+#ifdef __SSE2__
+    #include <emmintrin.h>
+#endif
+
+static void * align_malloc(size_t size, size_t alignment)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    return _aligned_malloc(size, alignment);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
+#if defined(__ANDROID__)
+    ptr = memalign(alignment, size);
+    if ( ptr )
+        return ptr;
+#else
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
+#endif
+    return NULL;
+#elif defined(__MINGW32__)
+    return __mingw_aligned_malloc(size, alignment);
+#else
+    #error "Please add support OS for aligned malloc"
+#endif
+}
+
+static void   align_free(void * ptr)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    _aligned_free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
+#elif defined(__MINGW32__)
+    return __mingw_aligned_free(ptr);
+#else
+    #error "Please add support OS for aligned free"
+#endif
+}
+
+
+/* Period parameters */
+#define N 624   /* vector code requires multiple of 4 here */
+#define M 397
+#define MATRIX_A    (cl_uint) 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK  (cl_uint) 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK  (cl_uint) 0x7fffffffUL /* least significant r bits */
+
+typedef struct _MTdata
+{
+    cl_uint mt[N];
+#ifdef __SSE2__
+    cl_uint cache[N];
+#endif
+    cl_int  mti;
+}_MTdata;
+
+/* initializes mt[N] with a seed */
+MTdata init_genrand(cl_uint s)
+{
+    MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
+    if( NULL != r )
+    {
+        cl_uint *mt = r->mt;
+        int mti = 0;
+        mt[0]= s; // & 0xffffffffUL;
+        for (mti=1; mti<N; mti++) {
+            mt[mti] = (cl_uint)
+            (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+            /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+            /* In the previous versions, MSBs of the seed affect   */
+            /* only MSBs of the array mt[].                        */
+            /* 2002/01/09 modified by Makoto Matsumoto             */
+    //        mt[mti] &= 0xffffffffUL;
+            /* for >32 bit machines */
+        }
+        r->mti = mti;
+    }
+
+    return r;
+}
+
+void    free_mtdata( MTdata d )
+{
+    if(d)
+        align_free(d);
+}
+
+/* generates a random number on [0,0xffffffff]-interval */
+cl_uint genrand_int32( MTdata d)
+{
+    /* mag01[x] = x * MATRIX_A  for x=0,1 */
+    static const cl_uint mag01[2]={0x0UL, MATRIX_A};
+#ifdef __SSE2__
+    static volatile int init = 0;
+    static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
+#endif
+
+
+    cl_uint *mt = d->mt;
+    cl_uint y;
+
+    if (d->mti == N)
+    { /* generate N words at one time */
+        int kk;
+
+#ifdef __SSE2__
+        if( 0 == init )
+        {
+            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
+            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
+            one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
+            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
+            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
+            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
+            init = 1;
+        }
+#endif
+
+        kk = 0;
+#ifdef __SSE2__
+        // vector loop
+        for( ; kk + 4 <= N-M; kk += 4 )
+        {
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
+
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) );    // mt[kk+M] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
+        }
+#endif
+        for ( ;kk<N-M;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+
+#ifdef __SSE2__
+        // advance to next aligned location
+        for (;kk<N-1 && (kk & 3);kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+
+        // vector loop
+        for( ; kk + 4 <= N-1; kk += 4 )
+        {
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
+
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) );          // mt[kk+M-N] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
+        }
+#endif
+
+        for (;kk<N-1;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+        y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
+        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+
+#ifdef __SSE2__
+        // Do the tempering ahead of time in vector code
+        for( kk = 0; kk + 4 <= N; kk += 4 )
+        {
+            __m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) );                            // y = mt[k];
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) );                             // y ^= (y >> 11);
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) );        // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) );       // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) );                             // y ^= (y >> 18);
+            _mm_store_si128( (__m128i*)(d->cache+kk), vy );
+        }
+#endif
+
+        d->mti = 0;
+    }
+#ifdef __SSE2__
+    y = d->cache[d->mti++];
+#else
+    y = mt[d->mti++];
+
+    /* Tempering */
+    y ^= (y >> 11);
+    y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+    y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+    y ^= (y >> 18);
+#endif
+
+
+    return y;
+}
+
+cl_ulong genrand_int64( MTdata d)
+{
+    return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
+}
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1(MTdata d)
+{
+    return genrand_int32(d)*(1.0/4294967295.0);
+    /* divided by 2^32-1 */
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2(MTdata d)
+{
+    return genrand_int32(d)*(1.0/4294967296.0);
+    /* divided by 2^32 */
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3(MTdata d)
+{
+    return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
+    /* divided by 2^32 */
+}
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53(MTdata d)
+{
+    unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
+    return(a*67108864.0+b)*(1.0/9007199254740992.0);
+}
--- a/test_common/harness/mt19937.h
+++ b/test_common/harness/mt19937.h
@@ -0,0 +1,99 @@
+
+/*
+ *  mt19937.h
+ *
+ *  Mersenne Twister.
+ *
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+ */
+
+#ifndef MT19937_H
+#define MT19937_H   1
+
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_platform.h>
+#else
+    #include <CL/cl_platform.h>
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+/*
+ *      Interfaces here have been modified from original sources so that they
+ *      are safe to call reentrantly, so long as a different MTdata is used
+ *      on each thread.
+ */
+
+typedef struct _MTdata  *MTdata;
+
+/* Create the random number generator with seed */
+MTdata init_genrand( cl_uint /*seed*/ );
+
+/* release memory used by a MTdata private data */
+void   free_mtdata( MTdata /*data*/ );
+
+/* generates a random number on [0,0xffffffff]-interval */
+cl_uint genrand_int32( MTdata /*data*/);
+
+/* generates a random number on [0,0xffffffffffffffffULL]-interval */
+cl_ulong genrand_int64( MTdata /*data*/);
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1( MTdata /*data*/);
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2( MTdata /*data*/);
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3( MTdata /*data*/);
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53( MTdata /*data*/ );
+
+
+#ifdef __cplusplus
+    }
+#endif
+
+#endif  /* MT19937_H */
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -0,0 +1,564 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "os_helpers.h"
+#include "errorHelpers.h"
+
+// =================================================================================================
+// C++ interface.
+// =================================================================================================
+
+#include <cerrno>     // errno, error constants
+#include <climits>    // PATH_MAX
+#include <cstdlib>    // abort, _splitpath, _makepath
+#include <cstring>    // strdup, strerror_r
+#include <sstream>
+
+#include <vector>
+
+#define CHECK_PTR( ptr )    \
+    if ( (ptr) == NULL ) {  \
+        abort();            \
+    }
+
+typedef std::vector< char > buffer_t;
+
+#if ! defined( PATH_MAX )
+    #define PATH_MAX 1000
+#endif
+
+int const _size  = PATH_MAX + 1;    // Initial buffer size for path.
+int const _count = 8;               // How many times we will try to double buffer size.
+
+// -------------------------------------------------------------------------------------------------
+// MacOS X
+// -------------------------------------------------------------------------------------------------
+
+#if defined( __APPLE__ )
+
+
+    #include <mach-o/dyld.h>    // _NSGetExecutablePath
+    #include <libgen.h>         // dirname
+
+
+    static
+    std::string
+    _err_msg(
+        int err,     // Error number (e. g. errno).
+        int level    // Nesting level, for avoiding infinite recursion.
+    ) {
+
+        /*
+            There are 3 incompatible versions of strerror_r:
+
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // BSD version
+                int    strerror_r( int, char *, size_t );  // XSI version
+
+            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
+
+        */
+
+        // BSD version of strerror_r.
+        buffer_t buffer( 100 );
+        int      count = _count;
+        for ( ; ; ) {
+            int rc = strerror_r( err, & buffer.front(), buffer.size() );
+            if ( rc == EINVAL ) {
+                // Error code is not recognized, but anyway we got the message.
+                return & buffer.front();
+            } else if ( rc == ERANGE ) {
+                // Buffer is not enough.
+                if ( count > 0 ) {
+                    // Enlarge the buffer.
+                    -- count;
+                    buffer.resize( buffer.size() * 2 );
+                } else {
+                    std::stringstream ostr;
+                    ostr
+                        << "Error " << err << " "
+                        << "(Getting error message failed: "
+                        << "Buffer of " << buffer.size() << " bytes is still too small"
+                        << ")";
+                    return ostr.str();
+                }; // if
+            } else if ( rc == 0 ) {
+                // We got the message.
+                return & buffer.front();
+            } else {
+                std::stringstream ostr;
+                ostr
+                    << "Error " << err << " "
+                    << "(Getting error message failed: "
+                    << ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
+                    << ")";
+                return ostr.str();
+            }; // if
+        }; // forever
+
+    } // _err_msg
+
+
+    std::string
+    dir_sep(
+    ) {
+        return "/";
+    } // dir_sep
+
+
+    std::string
+    exe_path(
+    ) {
+        buffer_t path( _size );
+        int      count = _count;
+        for ( ; ; ) {
+            uint32_t size = path.size();
+            int rc = _NSGetExecutablePath( & path.front(), & size );
+            if ( rc == 0 ) {
+                break;
+            }; // if
+            if ( count > 0 ) {
+                -- count;
+                path.resize( size );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
+        return & path.front();
+    } // exe_path
+
+
+    std::string
+    exe_dir(
+    ) {
+        std::string path = exe_path();
+        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
+        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
+        return dirname( & buffer.front() );
+    } // exe_dir
+
+
+#endif // __APPLE__
+
+// -------------------------------------------------------------------------------------------------
+// Linux
+// -------------------------------------------------------------------------------------------------
+
+#if defined( __linux__ )
+
+
+    #include <cerrno>      // errno
+    #include <libgen.h>    // dirname
+    #include <unistd.h>    // readlink
+
+
+    static
+    std::string
+    _err_msg(
+        int err,
+        int level
+    ) {
+
+        /*
+            There are 3 incompatible versions of strerror_r:
+
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // BSD version
+                int    strerror_r( int, char *, size_t );  // XSI version
+
+            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
+
+        */
+
+        #if defined(__ANDROID__) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
+
+            // XSI version of strerror_r.
+            #warning Not tested!
+            buffer_t buffer( 200 );
+            int      count = _count;
+            for ( ; ; ) {
+                int rc = strerror_r( err, & buffer.front(), buffer.size() );
+                if ( rc == -1 ) {
+                    int _err = errno;
+                    if ( _err == ERANGE ) {
+                        if ( count > 0 ) {
+                            // Enlarge the buffer.
+                            -- count;
+                            buffer.resize( buffer.size() * 2 );
+                        } else {
+                            std::stringstream ostr;
+                            ostr
+                                << "Error " << err << " "
+                                << "(Getting error message failed: "
+                                << "Buffer of " << buffer.size() << " bytes is still too small"
+                                << ")";
+                            return ostr.str();
+                        }; // if
+                    } else {
+                        std::stringstream ostr;
+                        ostr
+                            << "Error " << err << " "
+                            << "(Getting error message failed: "
+                            << ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
+                            << ")";
+                        return ostr.str();
+                    }; // if
+                } else {
+                    // We got the message.
+                    return & buffer.front();
+                }; // if
+            }; // forever
+
+        #else
+
+            // GNU version of strerror_r.
+            char buffer[ 2000 ];
+            return strerror_r( err, buffer, sizeof( buffer ) );
+
+        #endif
+
+    } // _err_msg
+
+
+    std::string
+    dir_sep(
+    ) {
+        return "/";
+    } // dir_sep
+
+
+    std::string
+    exe_path(
+    ) {
+
+        static std::string const exe = "/proc/self/exe";
+
+        buffer_t    path( _size );
+        int         count = _count;  // Max number of iterations.
+
+        for ( ; ; ) {
+
+            ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
+
+            if ( len < 0 ) {
+                // Oops.
+                int err = errno;
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Reading symlink `%s' failed: %s\n",
+                    exe.c_str(), err_msg( err ).c_str()
+                );
+                exit( 2 );
+            }; // if
+
+            if ( len < path.size() ) {
+                // We got the path.
+                path.resize( len );
+                break;
+            }; // if
+
+            // Oops, buffer is too small.
+            if ( count > 0 ) {
+                -- count;
+                // Enlarge the buffer.
+                path.resize( path.size() * 2 );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
+                    exe.c_str(),
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+
+        }; // forever
+
+        return std::string( & path.front(), path.size() );
+
+    } // exe_path
+
+
+    std::string
+    exe_dir(
+    ) {
+        std::string path = exe_path();
+        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
+        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
+        return dirname( & buffer.front() );
+    } // exe_dir
+
+#endif // __linux__
+
+// -------------------------------------------------------------------------------------------------
+// MS Windows
+// -------------------------------------------------------------------------------------------------
+
+#if defined( _WIN32 )
+
+
+    #include <windows.h>
+    #if defined( max )
+        #undef max
+    #endif
+
+    #include <cctype>
+    #include <algorithm>
+
+
+    static
+    std::string
+    _err_msg(
+        int err,
+        int level
+    ) {
+
+        std::string msg;
+
+        LPSTR  buffer = NULL;
+        DWORD  flags  =
+            FORMAT_MESSAGE_ALLOCATE_BUFFER |
+            FORMAT_MESSAGE_FROM_SYSTEM |
+            FORMAT_MESSAGE_IGNORE_INSERTS;
+
+        DWORD len =
+            FormatMessageA(
+                flags,
+                NULL,
+                err,
+                LANG_USER_DEFAULT,
+                reinterpret_cast< LPSTR >( & buffer ),
+                0,
+                NULL
+            );
+
+        if ( buffer == NULL || len == 0 ) {
+
+            int _err = GetLastError();
+            char str[1024] = { 0 };
+            snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
+            msg = std::string(str);
+
+        } else {
+
+            // Trim trailing whitespace (including `\r' and `\n').
+            while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
+                -- len;
+            }; // while
+
+            // Drop trailing full stop.
+            if ( len > 0 && buffer[ len - 1 ] == '.' ) {
+                -- len;
+            }; // if
+
+            msg.assign( buffer, len );
+
+        }; //if
+
+        if ( buffer != NULL ) {
+            LocalFree( buffer );
+        }; // if
+
+        return msg;
+
+    } // _get_err_msg
+
+
+    std::string
+    dir_sep(
+    ) {
+        return "\\";
+    } // dir_sep
+
+
+    std::string
+    exe_path(
+    ) {
+
+        buffer_t path( _size );
+        int      count = _count;
+
+        for ( ; ; ) {
+
+            DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
+
+            if ( len == 0 ) {
+                int err = GetLastError();
+                log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
+                exit( 2 );
+            }; // if
+
+            if ( len < path.size() ) {
+                path.resize( len );
+                break;
+            }; // if
+
+            // Buffer too small.
+            if ( count > 0 ) {
+                -- count;
+                path.resize( path.size() * 2 );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Buffer of %lu bytes is still too small\n",
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+
+        }; // forever
+
+        return std::string( & path.front(), path.size() );
+
+    } // exe_path
+
+
+    std::string
+    exe_dir(
+    ) {
+
+        std::string exe = exe_path();
+        int count = 0;
+
+        // Splitting path into components.
+        buffer_t drv( _MAX_DRIVE );
+        buffer_t dir( _MAX_DIR   );
+        count = _count;
+#if defined(_MSC_VER)
+        for ( ; ; ) {
+            int rc =
+                _splitpath_s(
+                    exe.c_str(),
+                    & drv.front(), drv.size(),
+                    & dir.front(), dir.size(),
+                    NULL, 0,   // We need neither name
+                    NULL, 0    // nor extension
+                );
+            if ( rc == 0 ) {
+                break;
+            } else if ( rc == ERANGE ) {
+                if ( count > 0 ) {
+                    -- count;
+                    // Buffer is too small, but it is not clear which one.
+                    // So we have to enlarge all.
+                    drv.resize( drv.size() * 2 );
+                    dir.resize( dir.size() * 2 );
+                } else {
+                    log_error(
+                        "ERROR: Getting executable path failed: "
+                        "Splitting path `%s' to components failed: "
+                        "Buffers of %lu and %lu bytes are still too small\n",
+                        exe.c_str(),
+                        (unsigned long) drv.size(),
+                        (unsigned long) dir.size()
+                    );
+                    exit( 2 );
+                }; // if
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Splitting path `%s' to components failed: %s\n",
+                    exe.c_str(),
+                    err_msg( rc ).c_str()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
+
+#else // __MINGW32__
+
+        // MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
+        _splitpath(
+            exe.c_str(),
+            & drv.front(),
+            & dir.front(),
+            NULL,   // We need neither name
+            NULL    // nor extension
+        );
+#endif // __MINGW32__
+
+        // Combining components back to path.
+        // I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
+        // ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
+        // So let us try to guess the size of result and go with insecure `_makepath'.
+        buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
+        _makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
+
+        return & path.front();
+
+    } // exe_dir
+
+
+#endif // _WIN32
+
+
+std::string
+err_msg(
+    int err
+) {
+
+    return _err_msg( err, 0 );
+
+} // err_msg
+
+
+// =================================================================================================
+// C interface.
+// =================================================================================================
+
+
+char *
+get_err_msg(
+    int err
+) {
+    char * msg = strdup( err_msg( err ).c_str() );
+    CHECK_PTR( msg );
+    return msg;
+} // get_err_msg
+
+
+char *
+get_dir_sep(
+) {
+    char * sep = strdup( dir_sep().c_str() );
+    CHECK_PTR( sep );
+    return sep;
+} // get_dir_sep
+
+
+char *
+get_exe_path(
+) {
+    char * path = strdup( exe_path().c_str() );
+    CHECK_PTR( path );
+    return path;
+} // get_exe_path
+
+
+char *
+get_exe_dir(
+) {
+    char * dir = strdup( exe_dir().c_str() );
+    CHECK_PTR( dir );
+    return dir;
+} // get_exe_dir
+
+
+// end of file //
--- a/test_common/harness/os_helpers.h
+++ b/test_common/harness/os_helpers.h
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __os_helpers_h__
+#define __os_helpers_h__
+
+#include "compat.h"
+
+// -------------------------------------------------------------------------------------------------
+// C++ interface.
+// -------------------------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+
+    #include <string>
+
+    std::string err_msg( int err );
+    std::string dir_sep();
+    std::string exe_path();
+    std::string exe_dir();
+
+#endif // __cplusplus
+
+// -------------------------------------------------------------------------------------------------
+// C interface.
+// -------------------------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+char * get_err_msg( int err );  // Returns system error message. Subject to free.
+char * get_dir_sep();           // Returns dir separator. Subject to free.
+char * get_exe_path();          // Returns path of current executable. Subject to free.
+char * get_exe_dir();           // Returns dir of current executable. Subject to free.
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif // __cplusplus
+
+#endif // __os_helpers_h__
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -0,0 +1,130 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "parseParameters.h"
+
+#include "errorHelpers.h"
+#include "testHarness.h"
+#include "ThreadPool.h"
+
+#include <iostream>
+#include <sstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+using namespace std;
+
+bool             gOfflineCompiler = false;
+bool             gForceSpirVCache = false;
+bool             gForceSpirVGenerate = false;
+std::string      gSpirVPath = ".";
+OfflineCompilerOutputType gOfflineCompilerOutputType;
+
+void helpInfo ()
+{
+  log_info("  '-offlineCompiler <output_type:binary|source|spir_v>': use offline compiler\n");
+  log_info("  '                  output_type binary - \"../build_script_binary.py\" is invoked\n");
+  log_info("  '                  output_type source - \"../build_script_source.py\"  is invoked\n");
+  log_info("  '                  output_type spir_v <mode:generate|cache> - \"../cl_build_script_spir_v.py\" is invoked\n, optional modes: generate, cache");
+  log_info("  '                                     mode generate <path> - force binary generation");
+  log_info("  '                                     mode cache <path> - force reading binary files from cache");
+  log_info("\n");
+}
+
+int parseCustomParam (int argc, const char *argv[], const char *ignore)
+{
+  int delArg = 0;
+
+  for (int i=1; i<argc; i++)
+  {
+    if(ignore != 0)
+    {
+      // skip parameters that require special/different treatment in application
+      // (generic interpretation and parameter removal will not be performed)
+      const char * ptr = strstr(ignore, argv[i]);
+      if(ptr != 0 &&
+        (ptr == ignore || ptr[-1] == ' ') && //first on list or ' ' before
+        (ptr[strlen(argv[i])] == 0 || ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
+        continue;
+    }
+    if (i < 0) i = 0;
+	  delArg = 0;
+	  if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
+	  	  helpInfo ();	  
+
+    else if (!strcmp(argv[i], "-offlineCompiler"))
+    {
+        log_info(" Offline Compiler enabled\n");
+        delArg = 1;
+        if ((i + 1) < argc)
+        {
+            gOfflineCompiler = true;
+
+            if (!strcmp(argv[i + 1], "binary"))
+            {
+                gOfflineCompilerOutputType = kBinary;
+                delArg++;
+            }
+            else if (!strcmp(argv[i + 1], "source"))
+            {
+                gOfflineCompilerOutputType = kSource;
+                delArg++;
+            }
+            else if (!strcmp(argv[i + 1], "spir_v"))
+            {
+                gOfflineCompilerOutputType = kSpir_v;
+                delArg++;
+                if ((i + 3) < argc)
+                {
+                    if (!strcmp(argv[i + 2], "cache"))
+                    {
+                        gForceSpirVCache = true;
+                        gSpirVPath = argv[i + 3];
+                        log_info(" SpirV reading from cache enabled.\n");
+                        delArg += 2;
+                    }
+                    else if (!strcmp(argv[i + 2], "generate"))
+                    {
+                        gForceSpirVGenerate = true;
+                        gSpirVPath = argv[i + 3];
+                        log_info(" SpirV force generate binaries enabled.\n");
+                        delArg += 2;
+                    }
+                }
+            }
+            else
+            {
+                log_error(" Offline Compiler output type not supported: %s\n", argv[i + 1]);
+                return -1;
+            }
+        }
+        else
+        {
+            log_error(" Offline Compiler parameters are incorrect. Usage:\n");
+            log_error("       -offlineCompiler <input> <output> <output_type:binary | source | spir_v>\n");
+            return -1;
+        }
+    }
+
+    //cleaning parameters from argv tab
+	  for (int j=i; j<argc-delArg; j++)
+		  argv[j] = argv[j+delArg];
+	  argc -= delArg ;
+	  i -= delArg;
+  }
+  return argc;
+}
+
--- a/test_common/harness/parseParameters.h
+++ b/test_common/harness/parseParameters.h
@@ -0,0 +1,37 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _parseParameters_h
+#define _parseParameters_h
+
+#include <string>
+
+extern bool gOfflineCompiler;
+extern bool gForceSpirVCache;
+extern bool gForceSpirVGenerate;
+extern std::string gSpirVPath;
+
+enum OfflineCompilerOutputType
+{
+    kBinary = 0,
+    kSource,
+    kSpir_v
+};
+
+extern OfflineCompilerOutputType gOfflineCompilerOutputType;
+
+extern int parseCustomParam (int argc, const char *argv[], const char *ignore = 0 );
+
+#endif // _parseParameters_h
--- a/test_common/harness/ref_counting.h
+++ b/test_common/harness/ref_counting.h
@@ -0,0 +1,49 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _ref_counting_h
+#define _ref_counting_h
+
+#define MARK_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
+    test_error( error, "Unable to check reference count for " #type );
+
+#define TEST_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount_new; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
+    test_error( error, "Unable to check reference count for " #type ); \
+    if( c##_refCount != c##_refCount_new ) \
+    {    \
+        log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new );    \
+        return -1; \
+    }
+
+#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
+#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
+
+#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
+#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
+
+#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+
+#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
+#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
+
+#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
+#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
+
+#endif // _ref_counting_h
--- a/test_common/harness/rounding_mode.c
+++ b/test_common/harness/rounding_mode.c
@@ -0,0 +1,241 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "rounding_mode.h"
+
+#if (defined( __arm__ ) || defined(__aarch64__))
+    #define FPSCR_FZ    (1 << 24)       // Flush-To-Zero mode
+    #define FPSCR_ROUND_MASK (3 << 22)  // Rounding mode:
+
+    #define _ARM_FE_FTZ     0x1000000
+    #define _ARM_FE_NFTZ    0x0
+    #if defined(__aarch64__)
+        #define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
+        #define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
+    #else
+        #define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
+        #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
+    #endif
+#endif
+
+#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
+#define _ARM_FE_TONEAREST           0x0
+#define _ARM_FE_UPWARD              0x400000
+#define _ARM_FE_DOWNWARD            0x800000
+#define _ARM_FE_TOWARDZERO          0xc00000
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
+                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
+                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
+    const int *p = int_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
+
+    int fpscr = 0;
+    RoundingMode oldRound = get_round();
+
+    _FPU_GETCW(fpscr);
+    _FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
+
+    return oldRound;
+}
+
+RoundingMode get_round( void )
+{
+    int fpscr;
+    int oldRound;
+
+    _FPU_GETCW(fpscr);
+    oldRound = (fpscr & FPSCR_ROUND_MASK);
+
+    switch( oldRound )
+    {
+        case _ARM_FE_TONEAREST:
+            return kRoundToNearestEven;
+        case _ARM_FE_UPWARD:
+            return kRoundUp;
+        case _ARM_FE_DOWNWARD:
+            return kRoundDown;
+        case _ARM_FE_TOWARDZERO:
+            return kRoundTowardZero;
+    }
+
+    return kDefaultRoundingMode;
+}
+
+#elif !(defined(_WIN32) && defined(_MSC_VER))
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    const int *p = int_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
+    int oldRound = fegetround();
+    fesetround( p[r] );
+
+    switch( oldRound )
+    {
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+        default:
+            abort();    // ??!
+    }
+    return kDefaultRoundingMode;    //never happens
+}
+
+RoundingMode get_round( void )
+{
+    int oldRound = fegetround();
+
+    switch( oldRound )
+    {
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+    }
+
+    return kDefaultRoundingMode;
+}
+
+#else
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
+    unsigned int oldRound;
+
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    if (err) {
+        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
+        return kDefaultRoundingMode;    //what else never happens
+    }
+
+    oldRound &= _MCW_RC;
+
+    RoundingMode old =
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
+
+    _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
+    return old;    //returning old rounding mode
+}
+
+RoundingMode get_round( void )
+{
+    unsigned int oldRound;
+
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    oldRound &= _MCW_RC;
+    return
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
+}
+
+#endif
+
+//
+// FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
+// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
+// software by testing against FLT_MIN or DBL_MIN in that file.
+//
+// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of
+// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
+// operators do (e.g. add, subtract, multiply, divide, etc.)
+//
+// Configuring hardware to FTZ mode varies by platform.
+// CAUTION: Some C implementations may also fail to behave properly in this mode.
+//
+//  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
+//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
+//          is used for floating point computation! If your OS uses x87, you'll need to figure out how
+//          to turn that off for the conversions code in basic_test_conversions.c so that they flush to
+//          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
+//          in which case, these function are at liberty to do nothing.
+//
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+#endif
+void *FlushToZero( void )
+{
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ int i;  void *p; }u = { _mm_getcsr() };
+        _mm_setcsr( u.i | 0x8040 );
+        return u.p;
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        int fpscr;
+        _FPU_GETCW(fpscr);
+        _FPU_SETCW(fpscr | FPSCR_FZ);
+        return NULL;
+    #elif defined( __PPC__ )
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags |= _FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        return NULL;
+        #else
+        #error Unknown arch
+    #endif
+#else
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#endif
+}
+
+// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
+void UnFlushToZero( void *p)
+{
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ void *p; int i;  }u = { p };
+        _mm_setcsr( u.i );
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        int fpscr;
+        _FPU_GETCW(fpscr);
+        _FPU_SETCW(fpscr & ~FPSCR_FZ);
+    #elif defined( __PPC__)
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags &= ~_FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        #else
+        #error Unknown arch
+    #endif
+#else
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#endif
+}
--- a/test_common/harness/rounding_mode.h
+++ b/test_common/harness/rounding_mode.h
@@ -0,0 +1,69 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __ROUNDING_MODE_H__
+#define __ROUNDING_MODE_H__
+
+#include "compat.h"
+
+#if (defined(_WIN32) && defined (_MSC_VER))
+#include "errorHelpers.h"
+#include "testHarness.h"
+#endif
+
+typedef enum
+{
+    kDefaultRoundingMode = 0,
+    kRoundToNearestEven,
+    kRoundUp,
+    kRoundDown,
+    kRoundTowardZero,
+
+    kRoundingModeCount
+}RoundingMode;
+
+typedef enum
+{
+    kuchar = 0,
+    kchar = 1,
+    kushort = 2,
+    kshort = 3,
+    kuint = 4,
+    kint = 5,
+    kfloat = 6,
+    kdouble = 7,
+    kulong = 8,
+    klong = 9,
+
+    //This goes last
+    kTypeCount
+}Type;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern RoundingMode set_round( RoundingMode r, Type outType );
+extern RoundingMode get_round( void );
+extern void *FlushToZero( void );
+extern void UnFlushToZero( void *p);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* __ROUNDING_MODE_H__ */
--- a/test_common/harness/testHarness.c
+++ b/test_common/harness/testHarness.c
@@ -0,0 +1,851 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testHarness.h"
+#include "compat.h"
+#include <stdio.h>
+#include <string.h>
+#include "threadTesting.h"
+#include "errorHelpers.h"
+#include "kernelHelpers.h"
+#include "fpcontrol.h"
+#include "typeWrappers.h"
+#include "parseParameters.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <time.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#endif
+
+int gTestsPassed = 0;
+int gTestsFailed = 0;
+cl_uint gRandomSeed = 0;
+cl_uint gReSeed = 0;
+
+int     gFlushDenormsToZero = 0;
+int     gInfNanSupport = 1;
+int     gIsEmbedded = 0;
+int     gIsOpenCL_C_1_0_Device = 0;
+int     gIsOpenCL_1_0_Device = 0;
+int     gHasLong = 1;
+
+#define DEFAULT_NUM_ELEMENTS        0x4000
+
+int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
+                   basefn fnList[], const char *fnNames[],
+                   int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
+{
+    return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
+                          ( imageSupportRequired ) ? verifyImageSupport : NULL );
+}
+
+int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
+                 basefn fnList[], const char *fnNames[],
+                int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
+                DeviceCheckFn deviceCheckFn )
+{
+    test_start();
+
+    cl_device_type    device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_uint            num_platforms = 0;
+    cl_platform_id     *platforms;
+    cl_device_id       device;
+    int                num_elements = DEFAULT_NUM_ELEMENTS;
+    cl_uint            num_devices = 0;
+    cl_device_id       *devices = NULL;
+    cl_uint            choosen_device_index = 0;
+    cl_uint            choosen_platform_index = 0;
+
+    int            err, ret;
+    char *endPtr;
+    unsigned int            i;
+    int based_on_env_var = 0;
+
+
+    /* Check for environment variable to set device type */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+        based_on_env_var = 1;
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
+            abort();
+        }
+    }
+
+#if defined( __APPLE__ )
+    {
+        // report on any unusual library search path indirection
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
+
+        // report on any unusual framework search path indirection
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
+    }
+#endif
+
+    env_mode = getenv( "CL_DEVICE_INDEX" );
+    if( env_mode != NULL )
+    {
+        choosen_device_index = atoi(env_mode);
+    }
+
+    env_mode = getenv( "CL_PLATFORM_INDEX" );
+    if( env_mode != NULL )
+    {
+        choosen_platform_index = atoi(env_mode);
+    }
+
+    /* Process the command line arguments */
+
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return 0;
+    }
+
+    /* Special case: just list the tests */
+    if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
+    {
+        log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
+        log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
+        log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
+        log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
+        log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
+
+        for( i = 0; i < num_fns - 1; i++ )
+        {
+            log_info( "\t\t%s\n", fnNames[ i ] );
+        }
+        test_finish();
+        return 0;
+    }
+
+    /* How are we supposed to seed the random # generators? */
+    if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
+    {
+        gRandomSeed = (cl_uint) time( NULL );
+        log_info( "Random seed: %u.\n", gRandomSeed );
+        gReSeed = 1;
+        argc--;
+    }
+    else
+    {
+        log_info(" Initializing random seed to 0.\n");
+    }
+
+    /* Do we have an integer to specify the number of elements to pass to tests? */
+    if( argc > 1 )
+    {
+        ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
+        if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
+        {
+            /* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
+            /* (hence why we stored the result in ret first) */
+            num_elements = ret;
+            log_info( "Testing with num_elements of %d\n", num_elements );
+            argc--;
+        }
+    }
+
+    /* Do we have a CPU/GPU specification? */
+    if( argc > 1 )
+    {
+        if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_GPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_CPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific device index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
+        {
+            choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific platform index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
+        {
+            choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
+            argc--;
+        }
+    }
+
+    switch( device_type )
+    {
+        case CL_DEVICE_TYPE_GPU:            log_info( "Requesting GPU device " ); break;
+        case CL_DEVICE_TYPE_CPU:            log_info( "Requesting CPU device " ); break;
+        case CL_DEVICE_TYPE_ACCELERATOR:    log_info( "Requesting Accelerator device " ); break;
+        case CL_DEVICE_TYPE_DEFAULT:        log_info( "Requesting Default device " ); break;
+        default:                            log_error( "Requesting unknown device "); return -1;
+    }
+    log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
+    log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
+
+#if defined( __APPLE__ )
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define    kHasSSE3                0x00000008
+#define kHasSupplementalSSE3    0x00000100
+#define    kHasSSE4_1              0x00000400
+#define    kHasSSE4_2              0x00000800
+    /* check our environment for a hint to disable SSE variants */
+    {
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
+        {
+            extern int _cpu_capabilities;
+            int mask = 0;
+            if( 0 == strcasecmp( env, "SSE4.1" ) )
+                mask = kHasSSE4_2;
+            else if( 0 == strcasecmp( env, "SSSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1;
+            else if( 0 == strcasecmp( env, "SSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
+            else if( 0 == strcasecmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+            else
+            {
+                log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
+                return -2;
+            }
+
+            log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
+            _cpu_capabilities &= ~mask;
+        }
+    }
+#endif
+#endif
+
+    /* Get the platform */
+    err = clGetPlatformIDs(0, NULL, &num_platforms);
+    if (err) {
+        print_error(err, "clGetPlatformIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
+    if (!platforms || choosen_platform_index >= num_platforms) {
+        log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
+        test_finish();
+        return -1;
+    }
+    BufferOwningPtr<cl_platform_id> platformsBuf(platforms);
+
+    err = clGetPlatformIDs(num_platforms, platforms, NULL);
+    if (err) {
+        print_error(err, "clGetPlatformIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    /* Get the number of requested devices */
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, 0, NULL, &num_devices );
+    if (err) {
+        print_error(err, "clGetDeviceIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if (!devices || choosen_device_index >= num_devices) {
+        log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
+        test_finish();
+        return -1;
+    }
+    BufferOwningPtr<cl_device_id> devicesBuf(devices);
+
+
+    /* Get the requested device */
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, num_devices, devices, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    device = devices[choosen_device_index];
+
+    if( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    cl_device_fp_config fpconfig = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
+        test_finish();
+        return -1;
+    }
+
+    gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
+    log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
+    log_info( "sizeof( void*) = %d  (host)\n", (int) sizeof( void* ) );
+
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        test_finish();
+        return -1;
+    }
+    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+
+    //detect the floating point capabilities
+    cl_device_fp_config floatCapabilities = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
+        test_finish();
+        return -1;
+    }
+
+    // Check for problems that only embedded will have
+    if( gIsEmbedded )
+    {
+        //If the device is embedded, we need to detect if the device supports Infinity and NaN
+        if ((floatCapabilities & CL_FP_INF_NAN) == 0)
+            gInfNanSupport = 0;
+
+        // check the extensions list to see if ulong and long are supported
+        size_t extensionsStringSize = 0;
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
+        {
+            print_error( err, "Unable to get extensions string size for embedded device" );
+            test_finish();
+            return -1;
+        }
+        char *extensions_string = (char*) malloc(extensionsStringSize);
+        if( NULL == extensions_string )
+        {
+            print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
+            test_finish();
+            return -1;
+        }
+        BufferOwningPtr<char> extensions_stringBuf(extensions_string);
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
+        {
+            print_error( err, "Unable to get extensions string for embedded device" );
+            test_finish();
+            return -1;
+        }
+
+        if( extensions_string[extensionsStringSize-1] != '\0' )
+        {
+            log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
+            test_finish();
+            return -1;
+        }
+
+        if( NULL == strstr( extensions_string, "cles_khr_int64" ))
+            gHasLong = 0;
+    }
+
+    if( getenv( "OPENCL_1_0_DEVICE" ) )
+    {
+        char c_version[1024];
+        gIsOpenCL_1_0_Device = 1;
+        memset( c_version, 0, sizeof( c_version ) );
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
+        {
+            log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
+            test_finish();
+            return -1;
+        }
+
+        if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
+        {
+            gIsOpenCL_C_1_0_Device = 1;
+            log_info( "Device is a OpenCL C 1.0 device\n" );
+        }
+        else
+            log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
+    }
+
+    cl_uint device_address_bits = 0;
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
+    {
+        print_error( err, "Unable to obtain device address bits" );
+        test_finish();
+        return -1;
+    }
+    if( device_address_bits )
+        log_info( "sizeof( void*) = %d  (device)\n", device_address_bits/8 );
+    else
+    {
+        log_error("Invalid device address bit size returned by device.\n");
+        test_finish();
+        return -1;
+    }
+
+
+    /* If we have a device checking function, run it */
+    if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    if (num_elements <= 0)
+        num_elements = DEFAULT_NUM_ELEMENTS;
+
+        // On most platforms which support denorm, default is FTZ off. However,
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
+#if defined(__APPLE__) && defined(__arm__)
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
+#endif
+
+    int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
+
+ #if defined(__APPLE__) && defined(__arm__)
+     // Restore the old FP mode before leaving.
+    RestoreFPState( &oldMode );
+#endif
+
+    return error;
+}
+
+static int find_wildcard_matching_functions( const char *fnNames[], unsigned char fnsToCall[], unsigned int num_fns,
+                                             const char *wildcard )
+{
+    int found_tests = 0;
+    size_t wildcard_length = strlen( wildcard ) - 1; /* -1 for the asterisk */
+
+    for( unsigned int fnIndex = 0; fnIndex < num_fns; fnIndex++ )
+    {
+        if( strncmp( fnNames[ fnIndex ], wildcard, wildcard_length ) == 0 )
+        {
+            if( fnsToCall[ fnIndex ] )
+            {
+                log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
+                return EXIT_FAILURE;
+            }
+
+            fnsToCall[ fnIndex ] = 1;
+            found_tests = 1;
+        }
+    }
+
+    if( !found_tests )
+    {
+        log_error( "ERROR: The wildcard '%s' did not match any test names.\n", wildcard );
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+static int find_argument_matching_function( const char *fnNames[], unsigned char *fnsToCall, unsigned int num_fns,
+                                            const char *argument )
+{
+    unsigned int fnIndex;
+
+    for( fnIndex = 0; fnIndex < num_fns; fnIndex++ )
+    {
+        if( strcmp( argument, fnNames[ fnIndex ] ) == 0 )
+        {
+            if( fnsToCall[ fnIndex ] )
+            {
+                log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
+                return EXIT_FAILURE;
+            }
+            else
+            {
+                fnsToCall[ fnIndex ] = 1;
+                break;
+            }
+        }
+    }
+
+    if( fnIndex == num_fns )
+    {
+        log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
+                                  basefn fnList[], const char *fnNames[], int forceNoContextCreation,
+                                  cl_command_queue_properties queueProps, int num_elements )
+{
+    int ret = EXIT_SUCCESS;
+
+    unsigned char *fnsToCall = ( unsigned char* ) calloc( num_fns, 1 );
+
+    if( argc == 1 )
+    {
+        /* No actual arguments, all tests will be run. */
+        memset( fnsToCall, 1, num_fns );
+    }
+    else
+    {
+        for( int argIndex = 1; argIndex < argc; argIndex++ )
+        {
+            if( strchr( argv[ argIndex ], '*' ) != NULL )
+            {
+                ret = find_wildcard_matching_functions( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
+            }
+            else
+            {
+                if( strcmp( argv[ argIndex ], "all" ) == 0 )
+                {
+                    memset( fnsToCall, 1, num_fns );
+                    break;
+                }
+                else
+                {
+                    ret = find_argument_matching_function( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
+                }
+            }
+
+            if( ret == EXIT_FAILURE )
+            {
+                break;
+            }
+        }
+    }
+
+    if( ret == EXIT_SUCCESS )
+    {
+        ret = callTestFunctions( fnList, fnNames, fnsToCall, num_fns, device, forceNoContextCreation, num_elements, queueProps );
+
+        if( gTestsFailed == 0 )
+        {
+            if( gTestsPassed > 1 )
+            {
+                log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
+            }
+            else if( gTestsPassed > 0 )
+            {
+                log_info("PASSED test.\n");
+            }
+        }
+        else if( gTestsFailed > 0 )
+        {
+            if( gTestsFailed+gTestsPassed > 1 )
+            {
+                log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
+            }
+            else
+            {
+                log_error("FAILED test.\n");
+            }
+        }
+    }
+
+    test_finish();
+
+    free( fnsToCall );
+
+    return ret;
+}
+
+int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
+                       int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
+                       int numElementsToUse, cl_command_queue_properties queueProps )
+{
+    int numErrors = 0;
+
+    for( int i = 0; i < numFunctions; ++i )
+    {
+        if( functionsToCall[ i ] )
+        {
+            /* Skip any unimplemented tests. */
+            if( functionList[ i ] != NULL )
+            {
+                numErrors += callSingleTestFunction( functionList[ i ], functionNames[ i ], deviceToUse,
+                                                     forceNoContextCreation, numElementsToUse, queueProps );
+            }
+            else
+            {
+                log_info( "%s test currently not implemented\n", functionNames[ i ] );
+            }
+        }
+    }
+
+    return numErrors;
+}
+
+void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    log_info( "%s\n", errinfo );
+}
+
+// Actual function execution
+int callSingleTestFunction( basefn functionToCall, const char *functionName,
+                           cl_device_id deviceToUse, int forceNoContextCreation,
+                           int numElementsToUse, const cl_queue_properties queueProps )
+{
+    int numErrors = 0, ret;
+    cl_int error;
+    cl_context context = NULL;
+    cl_command_queue queue = NULL;
+    const cl_command_queue_properties cmd_queueProps = (queueProps)?CL_QUEUE_PROPERTIES:0;
+    cl_command_queue_properties queueCreateProps[] = {cmd_queueProps, queueProps, 0};
+
+    /* Create a context to work with, unless we're told not to */
+    if( !forceNoContextCreation )
+    {
+        context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
+        if (!context)
+        {
+            print_error( error, "Unable to create testing context" );
+            return 1;
+        }
+
+        queue = clCreateCommandQueueWithProperties( context, deviceToUse, &queueCreateProps[0], &error );
+        if( queue == NULL )
+        {
+            print_error( error, "Unable to create testing command queue" );
+            return 1;
+        }
+    }
+
+    /* Run the test and print the result */
+    log_info( "%s...\n", functionName );
+    fflush( stdout );
+
+    error = check_opencl_version_with_testname(functionName, deviceToUse);
+    test_missing_feature(error, functionName);
+    
+    error = check_functions_for_offline_compiler(functionName, deviceToUse);
+    test_missing_support_offline_cmpiler(error, functionName);
+
+    ret = functionToCall( deviceToUse, context, queue, numElementsToUse);        //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
+    if( ret == TEST_NOT_IMPLEMENTED )
+    {
+        /* Tests can also let us know they're not implemented yet */
+        log_info("%s test currently not implemented\n\n", functionName);
+    }
+    else
+    {
+        /* Print result */
+        if( ret == 0 ) {
+            log_info( "%s passed\n", functionName );
+            gTestsPassed++;
+        }
+        else
+        {
+            numErrors++;
+            log_error( "%s FAILED\n", functionName );
+            gTestsFailed++;
+        }
+    }
+
+    /* Release the context */
+    if( !forceNoContextCreation )
+    {
+        int error = clFinish(queue);
+        if (error) {
+            log_error("clFinish failed: %d", error);
+            numErrors++;
+        }
+        clReleaseCommandQueue( queue );
+        clReleaseContext( context );
+    }
+
+    return numErrors;
+}
+
+void checkDeviceTypeOverride( cl_device_type *inOutType )
+{
+    /* Check if we are forced to CPU mode */
+    char *force_cpu = getenv( "CL_DEVICE_TYPE" );
+    if( force_cpu != NULL )
+    {
+        if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_DEFAULT;
+    }
+
+    switch( *inOutType )
+    {
+        case CL_DEVICE_TYPE_GPU:            log_info( "Requesting GPU device " ); break;
+        case CL_DEVICE_TYPE_CPU:            log_info( "Requesting CPU device " ); break;
+        case CL_DEVICE_TYPE_ACCELERATOR:    log_info( "Requesting Accelerator device " ); break;
+        case CL_DEVICE_TYPE_DEFAULT:        log_info( "Requesting Default device " ); break;
+        default: break;
+    }
+    log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
+
+#if defined( __APPLE__ )
+    {
+        // report on any unusual library search path indirection
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
+
+        // report on any unusual framework search path indirection
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
+    }
+#endif
+
+}
+
+#if ! defined( __APPLE__ )
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
+{
+    uint32_t pat = ((uint32_t*) src_pattern)[0];
+    size_t count = bytes / 4;
+    size_t i;
+    uint32_t *d = (uint32_t*)dest;
+
+    for( i = 0; i < count; i++ )
+        d[i] = pat;
+
+    d += i;
+
+    bytes &= 3;
+    if( bytes )
+        memcpy( d, src_pattern, bytes );
+}
+#endif
+
+extern cl_device_type GetDeviceType( cl_device_id d )
+{
+    cl_device_type result = -1;
+    cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
+    if( CL_SUCCESS != err )
+        log_error( "ERROR: Unable to get device type for device %p\n", d );
+    return result;
+}
+
+
+cl_device_id GetOpposingDevice( cl_device_id device )
+{
+    cl_int error;
+    cl_device_id *otherDevices;
+    cl_uint actualCount;
+    cl_platform_id plat;
+
+    // Get the platform of the device to use for getting a list of devices
+    error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get device's platform" );
+        return NULL;
+    }
+
+    // Get a list of all devices
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get list of devices size" );
+        return NULL;
+    }
+    otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
+    if (NULL == otherDevices) {
+        print_error( error, "Unable to allocate list of other devices." );
+        return NULL;
+    }
+    BufferOwningPtr<cl_device_id> otherDevicesBuf(otherDevices);
+
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get list of devices" );
+        return NULL;
+    }
+
+    if( actualCount == 1 )
+    {
+        return device;    // NULL means error, returning self means we couldn't find another one
+    }
+
+    // Loop and just find one that isn't the one we were given
+    cl_uint i;
+    for( i = 0; i < actualCount; i++ )
+    {
+        if( otherDevices[ i ] != device )
+        {
+            cl_device_type newType;
+            error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
+            if( error != CL_SUCCESS )
+            {
+                print_error( error, "Unable to get device type for other device" );
+                return NULL;
+            }
+            cl_device_id result = otherDevices[ i ];
+            return result;
+        }
+    }
+
+    // Should never get here
+    return NULL;
+}
+
+
--- a/test_common/harness/testHarness.h
+++ b/test_common/harness/testHarness.h
@@ -0,0 +1,102 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testHarness_h
+#define _testHarness_h
+
+#include "threadTesting.h"
+#include "clImageHelper.h"
+
+#include <string>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern cl_uint gReSeed;
+extern cl_uint gRandomSeed;
+
+// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
+// setup work, and then call each function in turn as dictatated by the passed arguments.
+extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
+                            basefn fnList[], const char *fnNames[],
+                            int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
+
+// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
+typedef int (*DeviceCheckFn)( cl_device_id device );
+
+// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
+extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
+                              basefn fnList[], const char *fnNames[],
+                              int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
+
+// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
+extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
+                                        basefn *fnList, const char *fnNames[],
+                                        int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
+
+// Call this function if you need to do all the setup work yourself, and just need the function list called/
+// managed.
+//    functionList is the actual array of functions
+//    functionNames is an array of strings representing the name of each function
+//    functionsToCall is an array of integers (treated as bools) which tell which function is to be called,
+//       each element at index i, corresponds to the element in functionList at index i
+//    numFunctions is the number of elements in the arrays
+//    contextProps are used to create a testing context for each test
+//    deviceToUse and numElementsToUse are all just passed to each test function
+extern int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
+                              int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
+                              int numElementsToUse, cl_command_queue_properties queueProps );
+
+// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
+extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
+                                   cl_device_id deviceToUse, int forceNoContextCreation,
+                                   int numElementsToUse, cl_command_queue_properties queueProps );
+
+///// Miscellaneous steps
+
+// Given a pre-existing device type choice, check the environment for an override, then print what
+// choice was made and how (and return the overridden choice, if there is one)
+extern void checkDeviceTypeOverride( cl_device_type *inOutType );
+
+// standard callback function for context pfn_notify
+extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
+
+extern cl_device_type GetDeviceType( cl_device_id );
+
+// Given a device (most likely passed in by the harness, but not required), will attempt to find
+// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
+// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
+// is the only device available, the SAME device is returned, so check!
+extern cl_device_id GetOpposingDevice( cl_device_id device );
+
+
+extern int      gFlushDenormsToZero;    // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
+extern int      gInfNanSupport;         // This is set to 1 if the device supports infinities and NaNs
+extern int        gIsEmbedded;            // This is set to 1 if the device is an embedded device
+extern int        gHasLong;               // This is set to 1 if the device suppots long and ulong types in OpenCL C.
+extern int      gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
+
+#if ! defined( __APPLE__ )
+    void     memset_pattern4(void *, const void *, size_t);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _testHarness_h
+
+
--- a/test_common/harness/test_mt19937.c
+++ b/test_common/harness/test_mt19937.c
@@ -0,0 +1,51 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "mt19937.h"
+#include <stdio.h>
+
+int main( void )
+{
+    MTdata d = init_genrand(42);
+    int i;
+    const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
+                                    0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
+                                    0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
+                                    0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
+    int errcount = 0;
+
+    for( i = 0; i < 65536; i++ )
+    {
+        cl_uint u = genrand_int32( d );
+        if( 0 == (i & 4095) )
+        {
+            if( u != reference[i>>12] )
+            {
+                printf("ERROR: expected *0x%8.8x at %d.  Got 0x%8.8x\n", reference[i>>12], i, u );
+                errcount++;
+            }
+        }
+    }
+
+    free_mtdata(d);
+
+    if( errcount )
+        printf("mt19937 test failed.\n");
+    else
+        printf("mt19937 test passed.\n");
+
+
+    return 0;
+}
--- a/test_common/harness/threadTesting.c
+++ b/test_common/harness/threadTesting.c
@@ -0,0 +1,100 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "compat.h"
+#include "threadTesting.h"
+#include "errorHelpers.h"
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <pthread.h>
+#endif
+
+#if 0 // Disabed for now
+
+typedef struct
+{
+    basefn            mFunction;
+    cl_device_id    mDevice;
+    cl_context        mContext;
+    int                mNumElements;
+} TestFnArgs;
+
+////////////////////////////////////////////////////////////////////////////////
+// Thread-based testing. Spawns a new thread to run the given test function,
+// then waits for it to complete. The entire idea is that, if the thread crashes,
+// we can catch it and report it as a failure instead of crashing the entire suite
+////////////////////////////////////////////////////////////////////////////////
+
+void *test_thread_wrapper( void *data )
+{
+    TestFnArgs *args;
+    int retVal;
+    cl_context context;
+
+    args = (TestFnArgs *)data;
+
+    /* Create a new context to use (contexts can't cross threads) */
+    context = clCreateContext(NULL, args->mDeviceGroup);
+    if( context == NULL )
+    {
+        log_error("clCreateContext failed for new thread\n");
+        return (void *)(-1);
+    }
+
+    /* Call function */
+    retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
+
+    clReleaseContext( context );
+
+    return (void *)retVal;
+}
+
+int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int error;
+    pthread_t threadHdl;
+    void *retVal;
+    TestFnArgs args;
+
+
+    args.mFunction = fnToTest;
+    args.mDeviceGroup = deviceGroup;
+    args.mDevice = device;
+    args.mContext = context;
+    args.mNumElements = numElements;
+
+
+    error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
+    if( error != 0 )
+    {
+        log_error( "ERROR: Unable to create thread for testing!\n" );
+        return -1;
+    }
+
+    /* Thread has been started, now just wait for it to complete (or crash) */
+    error = pthread_join( threadHdl, &retVal );
+    if( error != 0 )
+    {
+        log_error( "ERROR: Unable to join testing thread!\n" );
+        return -1;
+    }
+
+    return (int)((intptr_t)retVal);
+}
+#endif
+
+
--- a/test_common/harness/threadTesting.h
+++ b/test_common/harness/threadTesting.h
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _threadTesting_h
+#define _threadTesting_h
+
+#ifdef __APPLE__
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#define TEST_NOT_IMPLEMENTED        -99
+
+typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+
+#endif // _threadTesting_h
+
+
--- a/test_common/harness/typeWrappers.cpp
+++ b/test_common/harness/typeWrappers.cpp
@@ -0,0 +1,481 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "typeWrappers.h"
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include <stdlib.h>
+#include "clImageHelper.h"
+
+#define ROUND_SIZE_UP( _size, _align )      (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
+
+#if defined( __APPLE__ )
+    #define kPageSize       4096
+    #include <sys/mman.h>
+    #include <stdlib.h>
+#elif defined(__linux__)
+    #include <unistd.h>
+    #define kPageSize  (getpagesize())
+#endif
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+    int protect_pages = 1;
+    cl_device_id devices[16];
+    size_t number_of_devices;
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+    number_of_devices /= sizeof(cl_device_id);
+    for (int i=0; i<(int)number_of_devices; i++) {
+        cl_device_type type;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+        if (type == CL_DEVICE_TYPE_GPU) {
+            protect_pages = 0;
+            break;
+        }
+    }
+
+    if (protect_pages) {
+        size_t pixelBytes = get_pixel_bytes(fmt);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+        size_t rowStride = rowBytes + kPageSize;
+
+        // create backing store
+        backingStoreSize = rowStride + 8 * rowStride;
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+        // add guard pages
+        size_t row;
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+        p += rowBytes;
+        mprotect( p, kPageSize, PROT_NONE );        p += rowStride;
+        p -= rowBytes;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
+        {
+            static int spewEnv = 1;
+            if(spewEnv)
+            {
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+                spewEnv = 0;
+            }
+            imagePtr += rowBytes - pixelBytes * width;
+        }
+
+        image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+    } else {
+        backingStore = NULL;
+        image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
+    }
+#else
+
+    backingStore = NULL;
+    image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
+#endif
+    return error;
+}
+
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width, height );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
+    {
+      static int spewEnv = 1;
+      if(spewEnv)
+      {
+        log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+        spewEnv = 0;
+      }
+      imagePtr += rowBytes - pixelBytes * width;
+    }
+
+      image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+      image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+  }
+#else
+
+  backingStore = NULL;
+  image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+#endif
+    return error;
+}
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width, height, depth );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
+{
+    cl_int error;
+
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * depth * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height*depth; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
+    {
+        static int spewEnv = 1;
+        if(spewEnv)
+        {
+            log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+            spewEnv = 0;
+        }
+        imagePtr += rowBytes - pixelBytes * width;
+    }
+
+    image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+  }
+#else
+
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+
+#endif
+
+    return error;
+}
+
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+    int protect_pages = 1;
+    cl_device_id devices[16];
+    size_t number_of_devices;
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+    number_of_devices /= sizeof(cl_device_id);
+    for (int i=0; i<(int)number_of_devices; i++) {
+        cl_device_type type;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+        if (type == CL_DEVICE_TYPE_GPU) {
+            protect_pages = 0;
+            break;
+        }
+    }
+
+    if (protect_pages) {
+        size_t pixelBytes = get_pixel_bytes(fmt);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+        size_t rowStride = rowBytes + kPageSize;
+
+        // create backing store
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                backingStoreSize = rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                backingStoreSize = height * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                backingStoreSize = height * depth * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                backingStoreSize = arraySize * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
+                break;
+        }
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+        // add guard pages
+        size_t row;
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+        p += rowBytes;
+        size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
+        for( row = 0; row < sz; row++ )
+        {
+            mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+        }
+        p -= rowBytes;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
+        {
+            static int spewEnv = 1;
+            if(spewEnv)
+            {
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+                spewEnv = 0;
+            }
+            imagePtr += rowBytes - pixelBytes * width;
+        }
+
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
+                break;
+        }
+    } else {
+        backingStore = NULL;
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
+                break;
+        }
+
+    }
+#else
+
+    backingStore = NULL;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
+            break;
+    }
+#endif
+    return error;
+}
+
+
+
+/*******
+ * clProtectedArray implementation
+ *******/
+clProtectedArray::clProtectedArray()
+{
+    mBuffer = mValidBuffer = NULL;
+}
+
+clProtectedArray::clProtectedArray( size_t sizeInBytes )
+{
+    mBuffer = mValidBuffer = NULL;
+    Allocate( sizeInBytes );
+}
+
+clProtectedArray::~clProtectedArray()
+{
+    if( mBuffer != NULL ) {
+#if defined( __APPLE__ )
+        int error = munmap( mBuffer, mRealSize );
+      if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
+#else
+    free( mBuffer );
+#endif
+  }
+}
+
+void clProtectedArray::Allocate( size_t sizeInBytes )
+{
+
+#if defined( __APPLE__ )
+
+    // Allocate enough space to: round up our actual allocation to an even number of pages
+    // and allocate two pages on either side
+    mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
+    mRealSize = mRoundedSize + kPageSize * 2;
+
+    // Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
+    mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    mValidBuffer = mBuffer + kPageSize;
+
+    // Protect guard area from access
+    mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
+    mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
+#else
+  mRoundedSize = mRealSize = sizeInBytes;
+  mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
+#endif
+}
+
+
--- a/test_common/harness/typeWrappers.h
+++ b/test_common/harness/typeWrappers.h
@@ -0,0 +1,332 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _typeWrappers_h
+#define _typeWrappers_h
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#include "compat.h"
+#include <stdio.h>
+#include "mt19937.h"
+#include "errorHelpers.h"
+#include "kernelHelpers.h"
+
+extern "C" cl_uint gReSeed;
+extern "C" cl_uint gRandomSeed;
+
+/* cl_context wrapper */
+
+class clContextWrapper
+{
+    public:
+        clContextWrapper() { mContext = NULL; }
+        clContextWrapper( cl_context program ) { mContext = program; }
+        ~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
+
+        clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
+        operator cl_context() const { return mContext; }
+
+        cl_context * operator&() { return &mContext; }
+
+        bool operator==( const cl_context &rhs ) { return mContext == rhs; }
+
+    protected:
+
+        cl_context mContext;
+};
+
+/* cl_program wrapper */
+
+class clProgramWrapper
+{
+    public:
+        clProgramWrapper() { mProgram = NULL; }
+        clProgramWrapper( cl_program program ) { mProgram = program; }
+        ~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
+
+        clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
+        operator cl_program() const { return mProgram; }
+
+        cl_program * operator&() { return &mProgram; }
+
+        bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
+
+    protected:
+
+        cl_program mProgram;
+};
+
+/* cl_kernel wrapper */
+
+class clKernelWrapper
+{
+    public:
+        clKernelWrapper() { mKernel = NULL; }
+        clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
+        ~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
+
+        clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
+        operator cl_kernel() const { return mKernel; }
+
+        cl_kernel * operator&() { return &mKernel; }
+
+        bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
+
+    protected:
+
+        cl_kernel mKernel;
+};
+
+/* cl_mem (stream) wrapper */
+
+class clMemWrapper
+{
+    public:
+        clMemWrapper() { mMem = NULL; }
+        clMemWrapper( cl_mem mem ) { mMem = mem; }
+        ~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
+
+        clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
+        operator cl_mem() const { return mMem; }
+
+        cl_mem * operator&() { return &mMem; }
+
+        bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_mem mMem;
+};
+
+class clProtectedImage
+{
+    public:
+        clProtectedImage() { image = NULL; backingStore = NULL; }
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
+        ~clProtectedImage()
+        {
+            if( image != NULL )
+                clReleaseMemObject( image );
+
+#if defined( __APPLE__ )
+            if(backingStore)
+                munmap(backingStore, backingStoreSize);
+#endif
+        }
+
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
+        cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
+
+        clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
+        operator cl_mem() { return image; }
+
+        cl_mem * operator&() { return &image; }
+
+        bool operator==( const cl_mem &rhs ) { return image == rhs; }
+
+    protected:
+        void *backingStore;
+        size_t backingStoreSize;
+        cl_mem  image;
+};
+
+/* cl_command_queue wrapper */
+class clCommandQueueWrapper
+{
+    public:
+        clCommandQueueWrapper() { mMem = NULL; }
+        clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
+  ~clCommandQueueWrapper() { if( mMem != NULL ) { clReleaseCommandQueue( mMem ); } }
+
+        clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
+        operator cl_command_queue() const { return mMem; }
+
+        cl_command_queue * operator&() { return &mMem; }
+
+        bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_command_queue mMem;
+};
+
+/* cl_sampler wrapper */
+class clSamplerWrapper
+{
+    public:
+        clSamplerWrapper() { mMem = NULL; }
+        clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
+        ~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
+
+        clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
+        operator cl_sampler() const { return mMem; }
+
+        cl_sampler * operator&() { return &mMem; }
+
+        bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_sampler mMem;
+};
+
+/* cl_event wrapper */
+class clEventWrapper
+{
+    public:
+        clEventWrapper() { mMem = NULL; }
+        clEventWrapper( cl_event mem ) { mMem = mem; }
+        ~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
+
+        clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
+        operator cl_event() const { return mMem; }
+
+        cl_event * operator&() { return &mMem; }
+
+        bool operator==( const cl_event &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_event mMem;
+};
+
+/* Generic protected memory buffer, for verifying access within bounds */
+class clProtectedArray
+{
+    public:
+        clProtectedArray();
+        clProtectedArray( size_t sizeInBytes );
+        virtual ~clProtectedArray();
+
+        void    Allocate( size_t sizeInBytes );
+
+        operator void *()        { return (void *)mValidBuffer; }
+        operator const void *() const { return (const void *)mValidBuffer; }
+
+    protected:
+
+         char *    mBuffer;
+         char * mValidBuffer;
+        size_t    mRealSize, mRoundedSize;
+};
+
+class RandomSeed
+{
+    public:
+        RandomSeed( cl_uint seed  ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
+        ~RandomSeed()
+        {
+            if( gReSeed )
+                gRandomSeed = genrand_int32( mtData );
+            free_mtdata(mtData);
+        }
+
+        operator MTdata ()     {return mtData;}
+
+    protected:
+        MTdata mtData;
+};
+
+
+template <typename T> class BufferOwningPtr
+{
+  BufferOwningPtr(BufferOwningPtr const &); // do not implement
+    void operator=(BufferOwningPtr const &);  // do not implement
+
+    void *ptr;
+    void *map;
+  size_t mapsize;   // Bytes allocated total, pointed to by map.
+  size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
+  bool aligned;
+  public:
+  explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
+  explicit BufferOwningPtr(void *p, void *m, size_t s)
+    : ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
+      {
+#if ! defined( __APPLE__ )
+        if(m)
+        {
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
+            abort();
+        }
+#endif
+      }
+    ~BufferOwningPtr() {
+      if (map) {
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+#endif
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+    }
+  void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
+      if (map){
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+#else
+        log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
+        abort();
+#endif
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+      ptr = p;
+      map = m;
+      mapsize = mapsize_;
+      allocsize =  (ptr != NULL) ? allocsize_ : 0; // Force allocsize to zero if ptr is NULL.
+      aligned = aligned_;
+#if ! defined( __APPLE__ )
+        if(m)
+        {
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
+            abort();
+        }
+#endif
+    }
+    operator T*() { return (T*)ptr; }
+
+      size_t getSize() const { return allocsize; };
+};
+
+#endif // _typeWrappers_h
+