Synchronise with Khronos-private Gitlab branch

The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
2026-03-25 16:29:03 +00:00 · 2019-02-20 16:36:05 +00:00
parent 95196e7fb4
commit d8733efc0f
576 changed files with 212486 additions and 191776 deletions
--- a/clean_tests.py
+++ b/clean_tests.py
@@ -0,0 +1,104 @@
 #!/usr/bin/python
 import sys, os, re
 from subprocess import Popen, PIPE
 from optparse import OptionParser
 # trail_spaces: This method removes the trailing whitespaces and trailing tabs
 def trail_spaces(line):
    newline=line
    carreturn = 0
    if re.search("\r\n",line):
        carreturn = 1
    status = re.search("\s+$",line)
    if status:
        if carreturn:
            newline = re.sub("\s+$","\r\n",line)
        else:
            newline = re.sub("\s+$","\n",line)
    status = re.search("\t+$",newline)
    if status:
        newline = re.sub("\t+$","",newline)
    return newline
 #convert_tabs: This methos converts tabs to 4 spaces
 def convert_tabs(line):
    newline=line
    status = re.search("\t",line)
    if status:
        newline = re.sub("\t","    ",line)
    return newline
 #convert_lineends: This method converts lineendings from DOS to Unix
 def convert_lineends(line):
    newline=line
    status = re.search("\r\n",line)
    if status:
        newline = re.sub("\r\n","\n",line)
    return newline
 #processfile: This method processes each file passed to it depending
 #             on the flags passed
 def processfile(file,tabs, lineends,trails,verbose):
    processed_data = []
    if verbose:
        print "processing file: "+file
    with open(file,'r') as fr:
        data = fr.readlines()
    for line in data:
        if tabs:
            line = convert_tabs(line)
        if lineends:
            line = convert_lineends(line)
        if trails:
            line = trail_spaces(line)
        processed_data.append(line)
    with open(file,'w') as fw:
        fw.writelines(processed_data)
 #findfiles: This method finds all the code files present in current
 #            directory and subdirectories.
 def findfiles(tabs,lineends,trails,verbose):
    testfiles = []
    for root, dirs, files in os.walk("./"):
        for file in files:
            for extn in ('.c','.cpp','.h','.hpp'):
                if file.endswith(extn):
                    testfiles.append(os.path.join(root, file))
    for file in testfiles:
        processfile(file,tabs,lineends,trails,verbose)
 # Main function
 def main():
    parser = OptionParser()
    parser.add_option("--notabs", dest="tabs", action="store_false", default=True, help="Disable converting tabs to 4 spaces.")
    parser.add_option("--notrails", dest="trails", action="store_false", default=True, help="Disable removing trailing whitespaces and trailing tabs.")
    parser.add_option("--nolineends", dest="lineends", action="store_false", default=True, help=" Disable converting line endings to Unix from DOS.")
    parser.add_option("--verbose", dest="verbose", action="store_true", default=False, help="Prints out the files being processed.")
    parser.add_option("--git", dest="SHA1", default="", help="Processes only the files present in the particular <SHA1> commit.")
    parser.add_option('-o', action="store", default=True, help="Default: All the code files (.c,.cpp,.h,.hpp) in the current directory and subdirectories will be processed")
    (options, args) = parser.parse_args()
    if options.SHA1:
        pl = Popen(["git","show", "--pretty=format:", "--name-only",options.SHA1], stdout=PIPE)
        cmdout = pl.communicate()[0]
        gitout=cmdout.split("\n")
        for file in gitout:
            print file
            if file:
                processfile(file,options.tabs,options.lineends,options.trails,options.verbose)
    if not options.SHA1:
        findfiles(options.tabs,options.lineends,options.trails,options.verbose)
 # start the process by calling main
 main()
--- a/test_common/harness/ThreadPool.c
+++ b/test_common/harness/ThreadPool.c
@@ -32,6 +32,9 @@
 #include <pthread.h>
 #include <unistd.h>
 #include <sys/errno.h>
 #ifdef __linux__
 #include <sched.h>
 #endif
 #endif // !_WIN32
 // declarations
@@ -251,7 +254,6 @@ void *ThreadPool_WorkerFunc( void *p )
 {
    cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
    cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
    ThreadPool_AtomicAdd( &gRunning, 1 );
 //    log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
    while( MAX_COUNT > item )
@@ -444,7 +446,6 @@ void ThreadPool_Init(void)
    // Check for manual override of multithreading code. We add this for better debuggability.
    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
    {
        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
        gThreadCount = 1;
        return;
    }
@@ -458,7 +459,9 @@ void ThreadPool_Init(void)
        GetLogicalProcessorInformation( NULL, &length );
        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
-        if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
+        if( buffer != NULL )
        {
            if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
        {
            PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
            while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
@@ -475,6 +478,7 @@ void ThreadPool_Init(void)
                }
                ++ptr;
            }
            }
            free(buffer);
        }
 #elif defined (__MINGW32__)
@@ -484,6 +488,20 @@ void ThreadPool_Init(void)
            GetSystemInfo( &sysinfo );
            gThreadCount = sysinfo.dwNumberOfProcessors;
        }
 #elif defined (__linux__) && !defined(__ANDROID__)
        cpu_set_t    affinity;
        if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
        {
 #if !(defined(CPU_COUNT))
        gThreadCount = 1;
 #else
            gThreadCount = CPU_COUNT(&affinity);
 #endif
        }
        else
        {
            gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
        }
 #else // !_WIN32
        gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
 #endif // !_WIN32
@@ -493,6 +511,18 @@ void ThreadPool_Init(void)
            gThreadCount = 2;
    }
    // When working in 32 bit limit the thread number to 12
    // This fix was made due to memory issues in integer_ops test
    // When running integer_ops, the test opens as many threads as the
    // machine has and each thread allocates a fixed amount of memory
    // When running this test on dual socket machine in 32-bit, the
    // process memory is not sufficient and the test fails
    #if defined(_WIN32) && !defined(_M_X64)
        if (gThreadCount > 12) {
            gThreadCount = 12;
        }
    #endif
    //Allow the app to set thread count to <0 for debugging purposes.  This will cause the test to run single threaded.
    if( gThreadCount < 2 )
    {
@@ -532,6 +562,7 @@ void ThreadPool_Init(void)
    }
 #endif // !_WIN32
    gRunning = gThreadCount;
    // init threads
    for( i = 0; i < gThreadCount; i++ )
    {
@@ -745,6 +776,7 @@ cl_int ThreadPool_Do( TPFuncPtr func_ptr,
    gUserInfo = userInfo;
 #if defined( _WIN32 )
    ResetEvent(caller_event);
    _WakeAllConditionVariable( cond_var );
    LeaveCriticalSection( cond_lock );
 #else // !_WIN32
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -17,27 +17,51 @@
 #define _COMPAT_H_
 #if defined(_WIN32) && defined (_MSC_VER)
 #include <Windows.h>
 #include <Winbase.h>
 #include <CL/cl.h>
 #include <float.h>
 #include <xmmintrin.h>
 #define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
 #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
 #define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
 #define isfinite(x) _finite(x)
 #if !defined(__cplusplus)
 typedef char bool;
 #define inline
 #else
 extern "C" {
 #endif
 #ifdef __cplusplus
    #define EXTERN_C extern "C"
 #else
    #define EXTERN_C
 #endif
 //
 // stdlib.h
 //
 #include <stdlib.h>     // On Windows, _MAX_PATH defined there.
 // llabs appeared in MS C v16 (VS 10/2010).
 #if defined( _MSC_VER ) && _MSC_VER <= 1500
    EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
 #endif
 //
 // stdbool.h
 //
 // stdbool.h appeared in MS C v18 (VS 12/2013).
 #if defined( _MSC_VER ) && MSC_VER <= 1700
 #if !defined(__cplusplus)
 typedef char bool;
        #define true  1
        #define false 0
    #endif
 #else
    #include <stdbool.h>
 #endif
 //
 // stdint.h
 //
 // stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
 #if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
 typedef unsigned char       uint8_t;
 typedef char                int8_t;
 typedef unsigned short      uint16_t;
@@ -46,25 +70,83 @@ typedef unsigned int        uint32_t;
 typedef int                 int32_t;
 typedef unsigned long long  uint64_t;
 typedef long long           int64_t;
-
+#else
-#define MAXPATHLEN MAX_PATH
+#ifndef __STDC_LIMIT_MACROS
-
+#define __STDC_LIMIT_MACROS
-typedef unsigned short ushort;
+#endif
-typedef unsigned int   uint;
+    #include <stdint.h>
-typedef unsigned long  ulong;
+#endif
-#define INFINITY    (FLT_MAX + FLT_MAX)
+
-//#define NAN (INFINITY | 1)
+//
-//const static int PINFBITPATT_SP32  = INFINITY;
+// float.h
 //
 #include <float.h>
 //
 // fenv.h
 //
 // fenv.h appeared in MS C v18 (VS 12/2013).
 #if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
    // reimplement fenv.h because windows doesn't have it
    #define FE_INEXACT          0x0020
    #define FE_UNDERFLOW        0x0010
    #define FE_OVERFLOW         0x0008
    #define FE_DIVBYZERO        0x0004
    #define FE_INVALID          0x0001
    #define FE_ALL_EXCEPT       0x003D
    int fetestexcept(int excepts);
    int feclearexcept(int excepts);
 #else
    #include <fenv.h>
 #endif
 //
 // math.h
 //
 #if defined( __INTEL_COMPILER )
    #include <mathimf.h>
 #else
    #include <math.h>
 #endif
 #if defined( _MSC_VER )
    #ifdef __cplusplus
        extern "C" {
    #endif
 #ifndef M_PI
    #define M_PI    3.14159265358979323846264338327950288
 #endif
    #if ! defined( __INTEL_COMPILER )
        #ifndef NAN
            #define NAN  (INFINITY - INFINITY)
        #endif
        #ifndef HUGE_VALF
            #define HUGE_VALF (float)HUGE_VAL
        #endif
        #ifndef INFINITY
            #define INFINITY    (FLT_MAX + FLT_MAX)
        #endif
        #ifndef isfinite
            #define isfinite(x) _finite(x)
        #endif
        #ifndef isnan
 #define    isnan( x )       ((x) != (x))
        #endif
        #ifndef isinf
 #define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
        #endif
 double rint( double x);
 float  rintf( float x);
@@ -98,27 +180,6 @@ long double remquol( long double x, long double y, int *quo);
 long double scalblnl(long double x, long n);
 inline long long
 llabs(long long __x) { return __x >= 0 ? __x : -__x; }
 // end of math functions
 uint64_t ReadTime( void );
 double SubtractTime( uint64_t endTime, uint64_t startTime );
 #define sleep(X)   Sleep(1000*X)
 #define snprintf   sprintf_s
 //#define hypotl     _hypot
 float   make_nan();
 float nanf( const char* str);
 double  nan( const char* str);
 long double nanl( const char* str);
 //#if defined USE_BOOST
 //#include <boost/math/tr1.hpp>
 //double hypot(double x, double y);
 float hypotf(float x, float y);
 long double hypotl(long double x, long double y) ;
 double lgamma(double x);
@@ -143,58 +204,190 @@ double round(double x);
 float  roundf(float x);
 long double roundl(long double x);
-int signbit(double x);
+        int cf_signbit(double x);
-int signbitf(float x);
+        int cf_signbitf(float x);
-//bool signbitl(long double x)         { return boost::math::tr1::signbit<long double>(x); }
+// Added in _MSC_VER == 1800 (Visual Studio 2013)
-//#endif // USE_BOOST
+#if _MSC_VER < 1800
        static int signbit(double x) { return  cf_signbit(x); }
 #endif
        static int signbitf(float x) { return cf_signbitf(x); }
 long int lrint (double flt);
 long int lrintf (float flt);
 float   int2float (int32_t ix);
 int32_t float2int (float   fx);
    #endif
    #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
        // These functions appeared in Intel C v13.
        float  nanf( const char* str);
        double nan( const char* str);
        long double nanl( const char* str);
    #endif
    #ifdef __cplusplus
        }
    #endif
 #endif
 #if defined( __ANDROID__ )
    #define log2(X)  (log(X)/log(2))
 #endif
 //
 // stdio.h
 //
 #if defined(_MSC_VER)
 	// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
 	#if _MSC_VER < 1900
 		#define snprintf   sprintf_s
 	#endif
 #endif
 //
 // unistd.h
 //
 #if defined( _MSC_VER )
    EXTERN_C unsigned int sleep( unsigned int sec );
    EXTERN_C int usleep( int usec );
 #endif
 //
 // syscall.h
 //
 #if defined( __ANDROID__ )
    // Android bionic's isn't providing SYS_sysctl wrappers.
    #define SYS__sysctl  __NR__sysctl
 #elif defined( __aarch64__ )
    // Enable deprecated syscalls on arm 64-bit.
    #define __ARCH_WANT_SYSCALL_DEPRECATED
    // And use the NR variant of syscall too.
    #define SYS__sysctl  __NR__sysctl
 #endif
 // Some tests use _malloca which defined in malloc.h.
 #if !defined (__APPLE__)
 #include <malloc.h>
 #endif
 //
 // ???
 //
 #if defined( _MSC_VER )
    #define MAXPATHLEN _MAX_PATH
    EXTERN_C uint64_t ReadTime( void );
    EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
 /** Returns the number of leading 0-bits in x,
    starting at the most significant bit position.
    If x is 0, the result is undefined.
 */
-int __builtin_clz(unsigned int pattern);
+    EXTERN_C int __builtin_clz(unsigned int pattern);
 static const double zero=  0.00000000000000000000e+00;
 #define NAN  (INFINITY - INFINITY)
 #define HUGE_VALF (float)HUGE_VAL
 int usleep(int usec);
 // reimplement fenv.h because windows doesn't have it
 #define FE_INEXACT          0x0020
 #define FE_UNDERFLOW        0x0010
 #define FE_OVERFLOW         0x0008
 #define FE_DIVBYZERO        0x0004
 #define FE_INVALID          0x0001
 #define FE_ALL_EXCEPT       0x003D
 int fetestexcept(int excepts);
 int feclearexcept(int excepts);
 #ifdef __cplusplus
 }
 #endif
 #else // !((defined(_WIN32) && defined(_MSC_VER)
 #if defined(__MINGW32__)
 #include <windows.h>
 #define sleep(X)   Sleep(1000*X)
 #endif
 #ifndef MIN
    #define MIN(x,y) (((x)<(y))?(x):(y))
 #endif
 #ifndef MAX
    #define MAX(x,y) (((x)>(y))?(x):(y))
 #endif
 /*
    ------------------------------------------------------------------------------------------------
    WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
    This is a typical usage of the macros:
        double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
     (taken from math_brute_force/reference_math.c). There are two problems:
        1.  There is an error here. On Windows in will produce incorrect result
            `0x1.5555555555555p+50'. To have a correct result it should be written as
            `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
            third argument is not obvious -- sometimes it should be the same as exponent of the
            first argument, but sometimes not.
        2.  Information is duplicated. It is easy to make a mistake.
    Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
    ------------------------------------------------------------------------------------------------
 */
 #if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
    #define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
    #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
    #define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
 #else
 // Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
 #define MAKE_HEX_FLOAT(x,y,z) x
 #define MAKE_HEX_DOUBLE(x,y,z) x
 #define MAKE_HEX_LONG(x,y,z) x
-#endif // !((defined(_WIN32) && defined(_MSC_VER)
+#endif
 /*
    ------------------------------------------------------------------------------------------------
    HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
    double respectively. Arguments:
        sm    -- sign of number,
        int   -- integer part of mantissa (without `0x' prefix),
        fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
        se    -- sign of exponent,
        exp   -- absolute value of (binary) exponent.
    Example:
        double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
    Note:
        We have to pass signs as separate arguments because gcc pass negative integer values
        (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
        `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
        literal.
    ------------------------------------------------------------------------------------------------
 */
 #if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
    // If compiler does not support hex floating point literals:
    #define HEX_FLT(  sm, int, fract, se, exp ) sm ldexpf(       (float)( 0x ## int ## fract ## UL  ), se exp + ilogbf(       (float) 0x ## int ) - ilogbf(       ( float )( 0x ## int ## fract ## UL  ) ) )
    #define HEX_DBL(  sm, int, fract, se, exp ) sm ldexp(       (double)( 0x ## int ## fract ## ULL ), se exp + ilogb(       (double) 0x ## int ) - ilogb(       ( double )( 0x ## int ## fract ## ULL ) ) )
    #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
 #else
    // If compiler supports hex floating point literals: just concatenate all the parts into a literal.
    #define HEX_FLT(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
    #define HEX_DBL(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
    #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
 #endif
 #if defined(__MINGW32__)
    #include <Windows.h>
    #define sleep(sec) Sleep((sec) * 1000)
 #endif
 #endif // _COMPAT_H_
--- a/test_common/harness/conversions.h
+++ b/test_common/harness/conversions.h
@@ -16,15 +16,14 @@
 #ifndef _conversions_h
 #define _conversions_h
 #include "compat.h"
 #include "errorHelpers.h"
 #include "mt19937.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <float.h>
 #include <string.h>
 #include <sys/types.h>
 #include "compat.h"
 #if defined(__cplusplus)
 extern "C" {
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -25,7 +25,7 @@
 // rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
 #if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
    typedef int     FPU_mode_type;
-#if defined( __i386__ ) || defined( __x86_64__ )
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
    #include <xmmintrin.h>
 #elif defined( __PPC__ )
    #include <fpu_control.h>
@@ -45,6 +45,12 @@
        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
        *mode = fpscr;
        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
        // Add 64 bit support
 #elif defined (__aarch64__)
        unsigned fpcr;
        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpcr));
        *mode = fpcr;
        __asm__ volatile ("msr fpcr, %0" :: "r"(fpcr | (1U << 24)));
 #else
        #error ForceFTZ needs an implentation
 #endif
@@ -64,6 +70,12 @@
        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
        *mode = fpscr;
        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
        // Add 64 bit support
 #elif defined (__aarch64__)
        unsigned fpcr;
        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpcr));
        *mode = fpcr;
        __asm__ volatile ("msr fpcr, %0" :: "r"(fpcr & ~(1U << 24)));
 #else
 #error DisableFTZ needs an implentation
 #endif
@@ -78,6 +90,9 @@
        fpu_control = *mode;
 #elif defined (__arm__)
        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
        // Add 64 bit support
 #elif defined (__aarch64__)
        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
 #else
        #error RestoreFPState needs an implementation
 #endif
--- a/test_common/harness/kernelHelpers.c
+++ b/test_common/harness/kernelHelpers.c
@@ -395,7 +395,7 @@ int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_ob
    }
    free( list );
-    return ( i < count ) ? true : false;
+    return ( i < count ) ? 1 : 0;
 }
 size_t get_pixel_bytes( const cl_image_format *fmt );
@@ -545,8 +545,19 @@ void * align_malloc(size_t size, size_t alignment)
    return _aligned_malloc(size, alignment);
 #elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
    void * ptr = NULL;
    // alignemnt must be a power of two and multiple of sizeof(void *).
    if ( alignment < sizeof( void * ) )
    {
        alignment = sizeof( void * );
    }
 #if defined(__ANDROID__)
    ptr = memalign(alignment, size);
    if ( ptr )
        return ptr;
 #else
    if (0 == posix_memalign(&ptr, alignment, size))
        return ptr;
 #endif
    return NULL;
 #elif defined(__MINGW32__)
    return __mingw_aligned_malloc(size, alignment);
@@ -555,6 +566,7 @@ void * align_malloc(size_t size, size_t alignment)
 #endif
 }
 void   align_free(void * ptr)
 {
 #if defined(_WIN32) && defined(_MSC_VER)
--- a/test_common/harness/msvc9.c
+++ b/test_common/harness/msvc9.c
@@ -13,15 +13,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #if defined(_WIN32) && defined (_MSC_VER)
 #include "compat.h"
 #include <math.h>
 #include <float.h>
 #include <assert.h>
 #include <CL/cl_platform.h>
 #if defined ( _MSC_VER )
 #include <limits.h>
 #include <stdlib.h>
 #include <CL/cl.h>
 #include <windows.h>
 #if ! defined( __INTEL_COMPILER )
 ///////////////////////////////////////////////////////////////////
 //
@@ -387,86 +390,6 @@ long double log2l(long double x)
    return 1.44269504088896340735992468100189214L * log(x);
 }
 ///////////////////////////////////////////////////////////////////
 //
 //                  misc functions
 //
 ///////////////////////////////////////////////////////////////////
 /*
 // This function is commented out because the Windows implementation should never call munmap.
 // If it is calling it, we have a bug. Please file a bugzilla.
 int munmap(void *addr, size_t len)
 {
 // FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
    return (int)VirtualAlloc( (LPVOID)addr, len, 
                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
 }
 */
 uint64_t ReadTime( void )
 {
    LARGE_INTEGER current;      
    QueryPerformanceCounter(&current);
    return (uint64_t)current.QuadPart;
 }
 double SubtractTime( uint64_t endTime, uint64_t startTime )
 {
    static double PerformanceFrequency = 0.0;
    if (PerformanceFrequency == 0.0) {
        LARGE_INTEGER frequency;
        QueryPerformanceFrequency(&frequency);
        PerformanceFrequency = (double) frequency.QuadPart;
    }
    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
 }
 float make_nan()
 {
 /* This is the IEEE 754 single-precision format:
    unsigned int mantissa:  22;
    unsigned int quiet_nan:  1;
    unsigned int exponent:   8;
    unsigned int negative:   1; 
 */
     //const static unsigned 
     static const int32_t _nan = 0x7fc00000;
     return *(const float*)(&_nan);
 }
 float nanf( const char* str)
 {
    cl_uint u = atoi( str );
    u |= 0x7fc00000U;
    return *( float*)(&u);
 }
 double nan( const char* str)
 {
    cl_ulong u = atoi( str );
    u |= 0x7ff8000000000000ULL;
    return *( double*)(&u);
 }
 // double check this implementatation 
 long double nanl( const char* str)
 {
    union
    { 
        long double f; 
        struct { cl_ulong m; cl_ushort sexp; }u;
    }u;
    u.u.sexp = 0x7fff;
    u.u.m = 0x8000000000000000ULL | atoi( str );
    return u.f;
 }
 double trunc(double x)
 {
    double absx = fabs(x);
@@ -589,7 +512,167 @@ long double roundl(long double x)
    return x;
 }
-int signbit(double x)               
+float cbrtf( float x )
 {
    float z = pow( fabs((double) x), 1.0 / 3.0 );
    return copysignf( z, x );
 }
 double cbrt( double x )
 {
    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
 }
 long int lrint (double x)
 {
    double absx = fabs(x);
    if( x >= (double) LONG_MAX )
        return LONG_MAX;
    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
    {
        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
        double rounded = x + magic;
        rounded -= magic;
        return (long int) rounded;
    }
    return (long int) x;
 }
 long int lrintf (float x)
 {
    float absx = fabsf(x);
    if( x >= (float) LONG_MAX )
        return LONG_MAX;
    if( absx < 8388608.0f /* 0x1.0p23f */ )
    {
        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
        float rounded = x + magic;
        rounded -= magic;
        return (long int) rounded;
    }
    return (long int) x;
 }
 ///////////////////////////////////////////////////////////////////
 //
 //                  fenv functions
 //
 ///////////////////////////////////////////////////////////////////
 #if _MSC_VER < 1900
 int fetestexcept(int excepts)
 {
    unsigned int status = _statusfp();
    return excepts & (
        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
        ((status & _SW_INVALID) ? FE_INVALID : 0)
    );
 }
 int feclearexcept(int excepts)
 {
    _clearfp();
    return 0;
 }
 #endif
 #endif // __INTEL_COMPILER
 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1300)
 float make_nan()
 {
 /* This is the IEEE 754 single-precision format:
    unsigned int mantissa:  22;
    unsigned int quiet_nan:  1;
    unsigned int exponent:   8;
    unsigned int negative:   1;
 */
     //const static unsigned
     static const int32_t _nan = 0x7fc00000;
     return *(const float*)(&_nan);
 }
 float nanf( const char* str)
 {
    cl_uint u = atoi( str );
    u |= 0x7fc00000U;
    return *( float*)(&u);
 }
 double nan( const char* str)
 {
    cl_ulong u = atoi( str );
    u |= 0x7ff8000000000000ULL;
    return *( double*)(&u);
 }
 // double check this implementatation
 long double nanl( const char* str)
 {
    union
    {
        long double f;
        struct { cl_ulong m; cl_ushort sexp; }u;
    }u;
    u.u.sexp = 0x7fff;
    u.u.m = 0x8000000000000000ULL | atoi( str );
    return u.f;
 }
 #endif
 ///////////////////////////////////////////////////////////////////
 //
 //                  misc functions
 //
 ///////////////////////////////////////////////////////////////////
 /*
 // This function is commented out because the Windows implementation should never call munmap.
 // If it is calling it, we have a bug. Please file a bugzilla.
 int munmap(void *addr, size_t len)
 {
 // FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
    return (int)VirtualAlloc( (LPVOID)addr, len,
                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
 }
 */
 uint64_t ReadTime( void )
 {
    LARGE_INTEGER current;
    QueryPerformanceCounter(&current);
    return (uint64_t)current.QuadPart;
 }
 double SubtractTime( uint64_t endTime, uint64_t startTime )
 {
    static double PerformanceFrequency = 0.0;
    if (PerformanceFrequency == 0.0) {
        LARGE_INTEGER frequency;
        QueryPerformanceFrequency(&frequency);
        PerformanceFrequency = (double) frequency.QuadPart;
    }
    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
 }
 int cf_signbit(double x)
 {
    union
    {
@@ -600,7 +683,7 @@ int signbit(double x)
    return u.u >> 63;
 }
-int signbitf(float x)               
+int cf_signbitf(float x)
 {
    union
    {
@@ -611,17 +694,6 @@ int signbitf(float x)
    return u.u >> 31;
 }
 float cbrtf( float x )
 {
    float z = pow( fabs((double) x), 1.0 / 3.0 );
    return copysignf( z, x );
 }
 double cbrt( double x )
 {
    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
 }
 float int2float (int32_t ix)
 {
    union {
@@ -642,7 +714,7 @@ int32_t float2int (float   fx)
    return u.i;
 }
-#if defined(_MSC_VER) && !defined(_WIN64)
+#if !defined(_WIN64)
 /** Returns the number of leading 0-bits in x,
    starting at the most significant bit position.
    If x is 0, the result is undefined.
@@ -682,45 +754,10 @@ int __builtin_clz(unsigned int pattern)
   return count;
 }
-#endif //defined(_MSC_VER) && !defined(_WIN64)
+#endif // !defined(_WIN64)
 #include <intrin.h>
 #include <emmintrin.h>
 long int lrint (double x)
 {
    double absx = fabs(x);
    if( x >= (double) LONG_MAX )
        return LONG_MAX;
    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
    {
        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
        double rounded = x + magic;
        rounded -= magic;
        return (long int) rounded;
    }
    return (long int) x;
 }	
 long int lrintf (float x)
 {
    float absx = fabsf(x);
    if( x >= (float) LONG_MAX )
        return LONG_MAX;
    if( absx < 8388608.0f /* 0x1.0p23f */ )
    {
        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
        float rounded = x + magic;
        rounded -= magic;
        return (long int) rounded;
    }
    return (long int) x;
 }
 int usleep(int usec)
 {
@@ -728,22 +765,10 @@ int usleep(int usec)
    return 0;
 }
-int fetestexcept(int excepts)
+unsigned int sleep( unsigned int sec )
 {
-    unsigned int status = _statusfp();
+    Sleep( sec * 1000 );
    return excepts & (
        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
        ((status & _SW_INVALID) ? FE_INVALID : 0)
    );    
 }
 int feclearexcept(int excepts)
 {
    _clearfp();
    return 0;
 }
-#endif //defined(_WIN32)
+#endif // defined( _MSC_VER )
--- a/test_common/harness/mt19937.c
+++ b/test_common/harness/mt19937.c
@@ -59,8 +59,14 @@ static void * align_malloc(size_t size, size_t alignment)
    return _aligned_malloc(size, alignment);
 #elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
    void * ptr = NULL;
 #if defined(__ANDROID__)
    ptr = memalign(alignment, size);
    if ( ptr )
        return ptr;
 #else
    if (0 == posix_memalign(&ptr, alignment, size))
        return ptr;
 #endif
    return NULL;
 #elif defined(__MINGW32__)
    return __mingw_aligned_malloc(size, alignment);
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -0,0 +1,564 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #include "os_helpers.h"
 #include "errorHelpers.h"
 // =================================================================================================
 // C++ interface.
 // =================================================================================================
 #include <cerrno>     // errno, error constants
 #include <climits>    // PATH_MAX
 #include <cstdlib>    // abort, _splitpath, _makepath
 #include <cstring>    // strdup, strerror_r
 #include <sstream>
 #include <vector>
 #define CHECK_PTR( ptr )    \
    if ( (ptr) == NULL ) {  \
        abort();            \
    }
 typedef std::vector< char > buffer_t;
 #if ! defined( PATH_MAX )
    #define PATH_MAX 1000
 #endif
 int const _size  = PATH_MAX + 1;    // Initial buffer size for path.
 int const _count = 8;               // How many times we will try to double buffer size.
 // -------------------------------------------------------------------------------------------------
 // MacOS X
 // -------------------------------------------------------------------------------------------------
 #if defined( __APPLE__ )
    #include <mach-o/dyld.h>    // _NSGetExecutablePath
    #include <libgen.h>         // dirname
    static
    std::string
    _err_msg(
        int err,     // Error number (e. g. errno).
        int level    // Nesting level, for avoiding infinite recursion.
    ) {
        /*
            There are 3 incompatible versions of strerror_r:
                char * strerror_r( int, char *, size_t );  // GNU version
                int    strerror_r( int, char *, size_t );  // BSD version
                int    strerror_r( int, char *, size_t );  // XSI version
            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
        */
        // BSD version of strerror_r.
        buffer_t buffer( 100 );
        int      count = _count;
        for ( ; ; ) {
            int rc = strerror_r( err, & buffer.front(), buffer.size() );
            if ( rc == EINVAL ) {
                // Error code is not recognized, but anyway we got the message.
                return & buffer.front();
            } else if ( rc == ERANGE ) {
                // Buffer is not enough.
                if ( count > 0 ) {
                    // Enlarge the buffer.
                    -- count;
                    buffer.resize( buffer.size() * 2 );
                } else {
                    std::stringstream ostr;
                    ostr
                        << "Error " << err << " "
                        << "(Getting error message failed: "
                        << "Buffer of " << buffer.size() << " bytes is still too small"
                        << ")";
                    return ostr.str();
                }; // if
            } else if ( rc == 0 ) {
                // We got the message.
                return & buffer.front();
            } else {
                std::stringstream ostr;
                ostr
                    << "Error " << err << " "
                    << "(Getting error message failed: "
                    << ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
                    << ")";
                return ostr.str();
            }; // if
        }; // forever
    } // _err_msg
    std::string
    dir_sep(
    ) {
        return "/";
    } // dir_sep
    std::string
    exe_path(
    ) {
        buffer_t path( _size );
        int      count = _count;
        for ( ; ; ) {
            uint32_t size = path.size();
            int rc = _NSGetExecutablePath( & path.front(), & size );
            if ( rc == 0 ) {
                break;
            }; // if
            if ( count > 0 ) {
                -- count;
                path.resize( size );
            } else {
                log_error(
                    "ERROR: Getting executable path failed: "
                    "_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
                    (unsigned long) path.size()
                );
                exit( 2 );
            }; // if
        }; // forever
        return & path.front();
    } // exe_path
    std::string
    exe_dir(
    ) {
        std::string path = exe_path();
        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
        return dirname( & buffer.front() );
    } // exe_dir
 #endif // __APPLE__
 // -------------------------------------------------------------------------------------------------
 // Linux
 // -------------------------------------------------------------------------------------------------
 #if defined( __linux__ )
    #include <cerrno>      // errno
    #include <libgen.h>    // dirname
    #include <unistd.h>    // readlink
    static
    std::string
    _err_msg(
        int err,
        int level
    ) {
        /*
            There are 3 incompatible versions of strerror_r:
                char * strerror_r( int, char *, size_t );  // GNU version
                int    strerror_r( int, char *, size_t );  // BSD version
                int    strerror_r( int, char *, size_t );  // XSI version
            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
        */
        #if defined(__ANDROID__) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
            // XSI version of strerror_r.
            #warning Not tested!
            buffer_t buffer( 200 );
            int      count = _count;
            for ( ; ; ) {
                int rc = strerror_r( err, & buffer.front(), buffer.size() );
                if ( rc == -1 ) {
                    int _err = errno;
                    if ( _err == ERANGE ) {
                        if ( count > 0 ) {
                            // Enlarge the buffer.
                            -- count;
                            buffer.resize( buffer.size() * 2 );
                        } else {
                            std::stringstream ostr;
                            ostr
                                << "Error " << err << " "
                                << "(Getting error message failed: "
                                << "Buffer of " << buffer.size() << " bytes is still too small"
                                << ")";
                            return ostr.str();
                        }; // if
                    } else {
                        std::stringstream ostr;
                        ostr
                            << "Error " << err << " "
                            << "(Getting error message failed: "
                            << ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
                            << ")";
                        return ostr.str();
                    }; // if
                } else {
                    // We got the message.
                    return & buffer.front();
                }; // if
            }; // forever
        #else
            // GNU version of strerror_r.
            char buffer[ 2000 ];
            return strerror_r( err, buffer, sizeof( buffer ) );
        #endif
    } // _err_msg
    std::string
    dir_sep(
    ) {
        return "/";
    } // dir_sep
    std::string
    exe_path(
    ) {
        static std::string const exe = "/proc/self/exe";
        buffer_t    path( _size );
        int         count = _count;  // Max number of iterations.
        for ( ; ; ) {
            ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
            if ( len < 0 ) {
                // Oops.
                int err = errno;
                log_error(
                    "ERROR: Getting executable path failed: "
                    "Reading symlink `%s' failed: %s\n",
                    exe.c_str(), err_msg( err ).c_str()
                );
                exit( 2 );
            }; // if
            if ( len < path.size() ) {
                // We got the path.
                path.resize( len );
                break;
            }; // if
            // Oops, buffer is too small.
            if ( count > 0 ) {
                -- count;
                // Enlarge the buffer.
                path.resize( path.size() * 2 );
            } else {
                log_error(
                    "ERROR: Getting executable path failed: "
                    "Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
                    exe.c_str(),
                    (unsigned long) path.size()
                );
                exit( 2 );
            }; // if
        }; // forever
        return std::string( & path.front(), path.size() );
    } // exe_path
    std::string
    exe_dir(
    ) {
        std::string path = exe_path();
        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
        return dirname( & buffer.front() );
    } // exe_dir
 #endif // __linux__
 // -------------------------------------------------------------------------------------------------
 // MS Windows
 // -------------------------------------------------------------------------------------------------
 #if defined( _WIN32 )
    #include <windows.h>
    #if defined( max )
        #undef max
    #endif
    #include <cctype>
    #include <algorithm>
    static
    std::string
    _err_msg(
        int err,
        int level
    ) {
        std::string msg;
        LPSTR  buffer = NULL;
        DWORD  flags  =
            FORMAT_MESSAGE_ALLOCATE_BUFFER |
            FORMAT_MESSAGE_FROM_SYSTEM |
            FORMAT_MESSAGE_IGNORE_INSERTS;
        DWORD len =
            FormatMessageA(
                flags,
                NULL,
                err,
                LANG_USER_DEFAULT,
                reinterpret_cast< LPSTR >( & buffer ),
                0,
                NULL
            );
        if ( buffer == NULL || len == 0 ) {
            int _err = GetLastError();
            char str[1024] = { 0 };
            snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
            msg = std::string(str);
        } else {
            // Trim trailing whitespace (including `\r' and `\n').
            while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
                -- len;
            }; // while
            // Drop trailing full stop.
            if ( len > 0 && buffer[ len - 1 ] == '.' ) {
                -- len;
            }; // if
            msg.assign( buffer, len );
        }; //if
        if ( buffer != NULL ) {
            LocalFree( buffer );
        }; // if
        return msg;
    } // _get_err_msg
    std::string
    dir_sep(
    ) {
        return "\\";
    } // dir_sep
    std::string
    exe_path(
    ) {
        buffer_t path( _size );
        int      count = _count;
        for ( ; ; ) {
            DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
            if ( len == 0 ) {
                int err = GetLastError();
                log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
                exit( 2 );
            }; // if
            if ( len < path.size() ) {
                path.resize( len );
                break;
            }; // if
            // Buffer too small.
            if ( count > 0 ) {
                -- count;
                path.resize( path.size() * 2 );
            } else {
                log_error(
                    "ERROR: Getting executable path failed: "
                    "Buffer of %lu bytes is still too small\n",
                    (unsigned long) path.size()
                );
                exit( 2 );
            }; // if
        }; // forever
        return std::string( & path.front(), path.size() );
    } // exe_path
    std::string
    exe_dir(
    ) {
        std::string exe = exe_path();
        int count = 0;
        // Splitting path into components.
        buffer_t drv( _MAX_DRIVE );
        buffer_t dir( _MAX_DIR   );
        count = _count;
 #if defined(_MSC_VER)
        for ( ; ; ) {
            int rc =
                _splitpath_s(
                    exe.c_str(),
                    & drv.front(), drv.size(),
                    & dir.front(), dir.size(),
                    NULL, 0,   // We need neither name
                    NULL, 0    // nor extension
                );
            if ( rc == 0 ) {
                break;
            } else if ( rc == ERANGE ) {
                if ( count > 0 ) {
                    -- count;
                    // Buffer is too small, but it is not clear which one.
                    // So we have to enlarge all.
                    drv.resize( drv.size() * 2 );
                    dir.resize( dir.size() * 2 );
                } else {
                    log_error(
                        "ERROR: Getting executable path failed: "
                        "Splitting path `%s' to components failed: "
                        "Buffers of %lu and %lu bytes are still too small\n",
                        exe.c_str(),
                        (unsigned long) drv.size(),
                        (unsigned long) dir.size()
                    );
                    exit( 2 );
                }; // if
            } else {
                log_error(
                    "ERROR: Getting executable path failed: "
                    "Splitting path `%s' to components failed: %s\n",
                    exe.c_str(),
                    err_msg( rc ).c_str()
                );
                exit( 2 );
            }; // if
        }; // forever
 #else // __MINGW32__
        // MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
        _splitpath(
            exe.c_str(),
            & drv.front(),
            & dir.front(),
            NULL,   // We need neither name
            NULL    // nor extension
        );
 #endif // __MINGW32__
        // Combining components back to path.
        // I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
        // ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
        // So let us try to guess the size of result and go with insecure `_makepath'.
        buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
        _makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
        return & path.front();
    } // exe_dir
 #endif // _WIN32
 std::string
 err_msg(
    int err
 ) {
    return _err_msg( err, 0 );
 } // err_msg
 // =================================================================================================
 // C interface.
 // =================================================================================================
 char *
 get_err_msg(
    int err
 ) {
    char * msg = strdup( err_msg( err ).c_str() );
    CHECK_PTR( msg );
    return msg;
 } // get_err_msg
 char *
 get_dir_sep(
 ) {
    char * sep = strdup( dir_sep().c_str() );
    CHECK_PTR( sep );
    return sep;
 } // get_dir_sep
 char *
 get_exe_path(
 ) {
    char * path = strdup( exe_path().c_str() );
    CHECK_PTR( path );
    return path;
 } // get_exe_path
 char *
 get_exe_dir(
 ) {
    char * dir = strdup( exe_dir().c_str() );
    CHECK_PTR( dir );
    return dir;
 } // get_exe_dir
 // end of file //
--- a/test_common/harness/os_helpers.h
+++ b/test_common/harness/os_helpers.h
@@ -0,0 +1,53 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #ifndef __os_helpers_h__
 #define __os_helpers_h__
 #include "compat.h"
 // -------------------------------------------------------------------------------------------------
 // C++ interface.
 // -------------------------------------------------------------------------------------------------
 #ifdef __cplusplus
    #include <string>
    std::string err_msg( int err );
    std::string dir_sep();
    std::string exe_path();
    std::string exe_dir();
 #endif // __cplusplus
 // -------------------------------------------------------------------------------------------------
 // C interface.
 // -------------------------------------------------------------------------------------------------
 #ifdef __cplusplus
    extern "C" {
 #endif // __cplusplus
 char * get_err_msg( int err );  // Returns system error message. Subject to free.
 char * get_dir_sep();           // Returns dir separator. Subject to free.
 char * get_exe_path();          // Returns path of current executable. Subject to free.
 char * get_exe_dir();           // Returns dir of current executable. Subject to free.
 #ifdef __cplusplus
    } // extern "C"
 #endif // __cplusplus
 #endif // __os_helpers_h__
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -0,0 +1,42 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
 // 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #include "parseParameters.h"
 #include "errorHelpers.h"
 #include <string.h>
 bool is_power_of_two(int number)
 {
    return number && !(number & (number - 1));
 }
 extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor)
 {
    const char *arg_temp = strchr(&arg[1], ']');
    if (arg_temp != 0)
    {
        int new_factor = atoi(&arg[1]);
        arg = arg_temp; // Advance until ']'
        if (is_power_of_two(new_factor))
        {
            log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor);
            wimpyReductionFactor = new_factor;
        }
        else
        {
            log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor);
        }
    }
 }
--- a/test_common/harness/parseParameters.h
+++ b/test_common/harness/parseParameters.h
@@ -0,0 +1,24 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
 // 
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #ifndef _parseParameters_h
 #define _parseParameters_h
 #include "compat.h"
 #include <string>
 extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor);
 #endif // _parseParameters_h
--- a/test_common/harness/rounding_mode.c
+++ b/test_common/harness/rounding_mode.c
@@ -15,7 +15,69 @@
 //
 #include "rounding_mode.h"
-#if !(defined(_WIN32) && defined(_MSC_VER))
+#if (defined( __arm__ ) || defined(__aarch64__))
    #define FPSCR_FZ    (1 << 24)       // Flush-To-Zero mode
    #define FPSCR_ROUND_MASK (3 << 22)  // Rounding mode:
    #define _ARM_FE_FTZ     0x1000000
    #define _ARM_FE_NFTZ    0x0
    #if defined(__aarch64__)
        #define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
        #define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
    #else
        #define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
        #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
    #endif
 #endif
 #if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
 #define _ARM_FE_TONEAREST           0x0
 #define _ARM_FE_UPWARD              0x400000
 #define _ARM_FE_DOWNWARD            0x800000
 #define _ARM_FE_TOWARDZERO          0xc00000
 RoundingMode set_round( RoundingMode r, Type outType )
 {
    static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
    static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
    const int *p = int_rounds;
    if( outType == kfloat || outType == kdouble )
        p = flt_rounds;
    int fpscr = 0;
    RoundingMode oldRound = get_round();
    _FPU_GETCW(fpscr);
    _FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
    return oldRound;
 }
 RoundingMode get_round( void )
 {
    int fpscr;
    int oldRound;
    _FPU_GETCW(fpscr);
    oldRound = (fpscr & FPSCR_ROUND_MASK);
    switch( oldRound )
    {
        case _ARM_FE_TONEAREST:
            return kRoundToNearestEven;
        case _ARM_FE_UPWARD:
            return kRoundUp;
        case _ARM_FE_DOWNWARD:
            return kRoundDown;
        case _ARM_FE_TOWARDZERO:
            return kRoundTowardZero;
    }
    return kDefaultRoundingMode;
 }
 #elif !(defined(_WIN32) && defined(_MSC_VER))
 RoundingMode set_round( RoundingMode r, Type outType )
 {
    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
@@ -135,8 +197,10 @@ void *FlushToZero( void )
        union{ int i;  void *p; }u = { _mm_getcsr() };
        _mm_setcsr( u.i | 0x8040 );
        return u.p;
-    #elif defined( __arm__ )
+    #elif defined( __arm__ ) || defined(__aarch64__)
-        // processor is already in FTZ mode -- do nothing
+        int fpscr;
        _FPU_GETCW(fpscr);
        _FPU_SETCW(fpscr | FPSCR_FZ);
        return NULL;
    #elif defined( __PPC__ )
        fpu_control_t flags = 0;
@@ -159,8 +223,10 @@ void UnFlushToZero( void *p)
    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
        union{ void *p; int i;  }u = { p };
        _mm_setcsr( u.i );
-    #elif defined( __arm__ )
+    #elif defined( __arm__ ) || defined(__aarch64__)
-        // processor is already in FTZ mode -- do nothing
+        int fpscr;
        _FPU_GETCW(fpscr);
        _FPU_SETCW(fpscr & ~FPSCR_FZ);
    #elif defined( __PPC__)
        fpu_control_t flags = 0;
        _FPU_GETCW(flags);
--- a/test_common/harness/rounding_mode.h
+++ b/test_common/harness/rounding_mode.h
@@ -16,15 +16,11 @@
 #ifndef __ROUNDING_MODE_H__
 #define __ROUNDING_MODE_H__
-#include <stdlib.h>
+#include "compat.h"
 #if (defined(_WIN32) && defined (_MSC_VER))
 // need for _controlfp_s and rouinding modes in RoundingMode
 #include <float.h>
 #include "errorHelpers.h"
 #include "testHarness.h"
 #else
    #include <fenv.h>
 #endif
 typedef enum
--- a/test_common/harness/threadTesting.c
+++ b/test_common/harness/threadTesting.c
@@ -13,16 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #include "compat.h"
 #include "threadTesting.h"
 #include "errorHelpers.h"
 #include <stdio.h>
 #include <stdlib.h>
 #if !defined(_WIN32)
 #include <stdbool.h>
 #endif
 #include <math.h>
 #include <string.h>
 #if !defined(_WIN32)
--- a/test_common/miniz/CMakeLists.txt
+++ b/test_common/miniz/CMakeLists.txt
@@ -0,0 +1,8 @@
 set(TARGET_NAME miniz)
 add_library(
  ${TARGET_NAME}
  STATIC
  miniz.c
  miniz.h
 )
--- a/test_common/miniz/miniz.c
+++ b/test_common/miniz/miniz.c
--- a/test_common/miniz/miniz.h
+++ b/test_common/miniz/miniz.h
@@ -0,0 +1,749 @@
 #ifndef MINIZ_HEADER_INCLUDED
 #define MINIZ_HEADER_INCLUDED
 #include <stdlib.h>
 #if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
  // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux
  #define MINIZ_NO_TIME
 #endif
 #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
  #include <time.h>
 #endif
 #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
 // MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
 #define MINIZ_X86_OR_X64_CPU 1
 #endif
 #if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
 // Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
 #define MINIZ_LITTLE_ENDIAN 1
 #endif
 #if MINIZ_X86_OR_X64_CPU
 // Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses.
 #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
 #endif
 #if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
 // Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions).
 #define MINIZ_HAS_64BIT_REGISTERS 1
 #endif
 // Return status codes. MZ_PARAM_ERROR is non-standard.
 enum {
  MZ_OK = 0,
  MZ_STREAM_END = 1,
  MZ_NEED_DICT = 2,
  MZ_ERRNO = -1,
  MZ_STREAM_ERROR = -2,
  MZ_DATA_ERROR = -3,
  MZ_MEM_ERROR = -4,
  MZ_BUF_ERROR = -5,
  MZ_VERSION_ERROR = -6,
  MZ_PARAM_ERROR = -10000
 };
 typedef unsigned long mz_ulong;
 #ifdef __cplusplus
 extern "C" {
 #endif
 // ------------------- zlib-style API Definitions.
 // mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap.
 void mz_free(void *p);
 #define MZ_ADLER32_INIT (1)
 // mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL.
 mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
 #define MZ_CRC32_INIT (0)
 // mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL.
 mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
 // Compression strategies.
 enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 };
 // Method
 #define MZ_DEFLATED 8
 #ifndef MINIZ_NO_ZLIB_APIS
 // Heap allocation callbacks.
 // Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long.
 typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
 typedef void (*mz_free_func)(void *opaque, void *address);
 typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
 #define MZ_VERSION          "9.1.15"
 #define MZ_VERNUM           0x91F0
 #define MZ_VER_MAJOR        9
 #define MZ_VER_MINOR        1
 #define MZ_VER_REVISION     15
 #define MZ_VER_SUBREVISION  0
 // Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs).
 enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 };
 // Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
 enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 };
 // Window bits
 #define MZ_DEFAULT_WINDOW_BITS 15
 struct mz_internal_state;
 // Compression/decompression stream struct.
 typedef struct mz_stream_s
 {
  const unsigned char *next_in;     // pointer to next byte to read
  unsigned int avail_in;            // number of bytes available at next_in
  mz_ulong total_in;                // total number of bytes consumed so far
  unsigned char *next_out;          // pointer to next byte to write
  unsigned int avail_out;           // number of bytes that can be written to next_out
  mz_ulong total_out;               // total number of bytes produced so far
  char *msg;                        // error msg (unused)
  struct mz_internal_state *state;  // internal state, allocated by zalloc/zfree
  mz_alloc_func zalloc;             // optional heap allocation function (defaults to malloc)
  mz_free_func zfree;               // optional heap free function (defaults to free)
  void *opaque;                     // heap alloc function user pointer
  int data_type;                    // data_type (unused)
  mz_ulong adler;                   // adler32 of the source or uncompressed data
  mz_ulong reserved;                // not used
 } mz_stream;
 typedef mz_stream *mz_streamp;
 // Returns the version string of miniz.c.
 const char *mz_version(void);
 // mz_deflateInit() initializes a compressor with default options:
 // Parameters:
 //  pStream must point to an initialized mz_stream struct.
 //  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
 //  level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio.
 //  (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
 // Return values:
 //  MZ_OK on success.
 //  MZ_STREAM_ERROR if the stream is bogus.
 //  MZ_PARAM_ERROR if the input parameters are bogus.
 //  MZ_MEM_ERROR on out of memory.
 int mz_deflateInit(mz_streamp pStream, int level);
 // mz_deflateInit2() is like mz_deflate(), except with more control:
 // Additional parameters:
 //   method must be MZ_DEFLATED
 //   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer)
 //   mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
 int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
 // Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
 int mz_deflateReset(mz_streamp pStream);
 // mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible.
 // Parameters:
 //   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
 //   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH.
 // Return values:
 //   MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full).
 //   MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore.
 //   MZ_STREAM_ERROR if the stream is bogus.
 //   MZ_PARAM_ERROR if one of the parameters is invalid.
 //   MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.)
 int mz_deflate(mz_streamp pStream, int flush);
 // mz_deflateEnd() deinitializes a compressor:
 // Return values:
 //  MZ_OK on success.
 //  MZ_STREAM_ERROR if the stream is bogus.
 int mz_deflateEnd(mz_streamp pStream);
 // mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH.
 mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
 // Single-call compression functions mz_compress() and mz_compress2():
 // Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure.
 int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
 int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
 // mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress().
 mz_ulong mz_compressBound(mz_ulong source_len);
 // Initializes a decompressor.
 int mz_inflateInit(mz_streamp pStream);
 // mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer:
 // window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate).
 int mz_inflateInit2(mz_streamp pStream, int window_bits);
 // Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible.
 // Parameters:
 //   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
 //   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
 //   On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster).
 //   MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data.
 // Return values:
 //   MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full.
 //   MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified.
 //   MZ_STREAM_ERROR if the stream is bogus.
 //   MZ_DATA_ERROR if the deflate stream is invalid.
 //   MZ_PARAM_ERROR if one of the parameters is invalid.
 //   MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again
 //   with more input data, or with more room in the output buffer (except when using single call decompression, described above).
 int mz_inflate(mz_streamp pStream, int flush);
 // Deinitializes a decompressor.
 int mz_inflateEnd(mz_streamp pStream);
 // Single-call decompression.
 // Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure.
 int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
 // Returns a string description of the specified error code, or NULL if the error code is invalid.
 const char *mz_error(int err);
 // Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports.
 // Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project.
 #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
  typedef unsigned char Byte;
  typedef unsigned int uInt;
  typedef mz_ulong uLong;
  typedef Byte Bytef;
  typedef uInt uIntf;
  typedef char charf;
  typedef int intf;
  typedef void *voidpf;
  typedef uLong uLongf;
  typedef void *voidp;
  typedef void *const voidpc;
  #define Z_NULL                0
  #define Z_NO_FLUSH            MZ_NO_FLUSH
  #define Z_PARTIAL_FLUSH       MZ_PARTIAL_FLUSH
  #define Z_SYNC_FLUSH          MZ_SYNC_FLUSH
  #define Z_FULL_FLUSH          MZ_FULL_FLUSH
  #define Z_FINISH              MZ_FINISH
  #define Z_BLOCK               MZ_BLOCK
  #define Z_OK                  MZ_OK
  #define Z_STREAM_END          MZ_STREAM_END
  #define Z_NEED_DICT           MZ_NEED_DICT
  #define Z_ERRNO               MZ_ERRNO
  #define Z_STREAM_ERROR        MZ_STREAM_ERROR
  #define Z_DATA_ERROR          MZ_DATA_ERROR
  #define Z_MEM_ERROR           MZ_MEM_ERROR
  #define Z_BUF_ERROR           MZ_BUF_ERROR
  #define Z_VERSION_ERROR       MZ_VERSION_ERROR
  #define Z_PARAM_ERROR         MZ_PARAM_ERROR
  #define Z_NO_COMPRESSION      MZ_NO_COMPRESSION
  #define Z_BEST_SPEED          MZ_BEST_SPEED
  #define Z_BEST_COMPRESSION    MZ_BEST_COMPRESSION
  #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
  #define Z_DEFAULT_STRATEGY    MZ_DEFAULT_STRATEGY
  #define Z_FILTERED            MZ_FILTERED
  #define Z_HUFFMAN_ONLY        MZ_HUFFMAN_ONLY
  #define Z_RLE                 MZ_RLE
  #define Z_FIXED               MZ_FIXED
  #define Z_DEFLATED            MZ_DEFLATED
  #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
  #define alloc_func            mz_alloc_func
  #define free_func             mz_free_func
  #define internal_state        mz_internal_state
  #define z_stream              mz_stream
  #define deflateInit           mz_deflateInit
  #define deflateInit2          mz_deflateInit2
  #define deflateReset          mz_deflateReset
  #define deflate               mz_deflate
  #define deflateEnd            mz_deflateEnd
  #define deflateBound          mz_deflateBound
  #define compress              mz_compress
  #define compress2             mz_compress2
  #define compressBound         mz_compressBound
  #define inflateInit           mz_inflateInit
  #define inflateInit2          mz_inflateInit2
  #define inflate               mz_inflate
  #define inflateEnd            mz_inflateEnd
  #define uncompress            mz_uncompress
  #define crc32                 mz_crc32
  #define adler32               mz_adler32
  #define MAX_WBITS             15
  #define MAX_MEM_LEVEL         9
  #define zError                mz_error
  #define ZLIB_VERSION          MZ_VERSION
  #define ZLIB_VERNUM           MZ_VERNUM
  #define ZLIB_VER_MAJOR        MZ_VER_MAJOR
  #define ZLIB_VER_MINOR        MZ_VER_MINOR
  #define ZLIB_VER_REVISION     MZ_VER_REVISION
  #define ZLIB_VER_SUBREVISION  MZ_VER_SUBREVISION
  #define zlibVersion           mz_version
  #define zlib_version          mz_version()
 #endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
 #endif // MINIZ_NO_ZLIB_APIS
 // ------------------- Types and macros
 typedef unsigned char mz_uint8;
 typedef signed short mz_int16;
 typedef unsigned short mz_uint16;
 typedef unsigned int mz_uint32;
 typedef unsigned int mz_uint;
 typedef long long mz_int64;
 typedef unsigned long long mz_uint64;
 typedef int mz_bool;
 #define MZ_FALSE (0)
 #define MZ_TRUE (1)
 // An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message.
 #ifdef _MSC_VER
   #define MZ_MACRO_END while (0, 0)
 #else
   #define MZ_MACRO_END while (0)
 #endif
 // ------------------- ZIP archive reading/writing
 #ifndef MINIZ_NO_ARCHIVE_APIS
 enum
 {
  MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024,
  MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
  MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
 };
 typedef struct
 {
  mz_uint32 m_file_index;
  mz_uint32 m_central_dir_ofs;
  mz_uint16 m_version_made_by;
  mz_uint16 m_version_needed;
  mz_uint16 m_bit_flag;
  mz_uint16 m_method;
 #ifndef MINIZ_NO_TIME
  time_t m_time;
 #endif
  mz_uint32 m_crc32;
  mz_uint64 m_comp_size;
  mz_uint64 m_uncomp_size;
  mz_uint16 m_internal_attr;
  mz_uint32 m_external_attr;
  mz_uint64 m_local_header_ofs;
  mz_uint32 m_comment_size;
  char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
  char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
 } mz_zip_archive_file_stat;
 typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n);
 typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
 struct mz_zip_internal_state_tag;
 typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
 typedef enum
 {
  MZ_ZIP_MODE_INVALID = 0,
  MZ_ZIP_MODE_READING = 1,
  MZ_ZIP_MODE_WRITING = 2,
  MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
 } mz_zip_mode;
 typedef struct mz_zip_archive_tag
 {
  mz_uint64 m_archive_size;
  mz_uint64 m_central_directory_file_ofs;
  mz_uint m_total_files;
  mz_zip_mode m_zip_mode;
  mz_uint m_file_offset_alignment;
  mz_alloc_func m_pAlloc;
  mz_free_func m_pFree;
  mz_realloc_func m_pRealloc;
  void *m_pAlloc_opaque;
  mz_file_read_func m_pRead;
  mz_file_write_func m_pWrite;
  void *m_pIO_opaque;
  mz_zip_internal_state *m_pState;
 } mz_zip_archive;
 typedef enum
 {
  MZ_ZIP_FLAG_CASE_SENSITIVE                = 0x0100,
  MZ_ZIP_FLAG_IGNORE_PATH                   = 0x0200,
  MZ_ZIP_FLAG_COMPRESSED_DATA               = 0x0400,
  MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
 } mz_zip_flags;
 // ZIP archive reading
 // Inits a ZIP archive reader.
 // These functions read and validate the archive's central directory.
 mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags);
 mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags);
 #ifndef MINIZ_NO_STDIO
 mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags);
 #endif
 // Returns the total number of files in the archive.
 mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
 // Returns detailed information about an archive file entry.
 mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat);
 // Determines if an archive file entry is a directory entry.
 mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index);
 mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index);
 // Retrieves the filename of an archive file entry.
 // Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename.
 mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size);
 // Attempts to locates a file in the archive's central directory.
 // Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
 // Returns -1 if the file cannot be found.
 int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
 // Extracts a archive file to a memory buffer using no memory allocation.
 mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
 mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
 // Extracts a archive file to a memory buffer.
 mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags);
 mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags);
 // Extracts a archive file to a dynamically allocated heap buffer.
 void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags);
 void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags);
 // Extracts a archive file using a callback function to output the file's data.
 mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
 mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
 #ifndef MINIZ_NO_STDIO
 // Extracts a archive file to a disk file and sets its last accessed and modified times.
 // This function only extracts files, not archive directory records.
 mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags);
 mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags);
 #endif
 // Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used.
 mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
 // ZIP archive writing
 #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
 // Inits a ZIP archive writer.
 mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
 mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size);
 #ifndef MINIZ_NO_STDIO
 mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning);
 #endif
 // Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive.
 // For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called.
 // For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it).
 // Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL.
 // Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before
 // the archive is finalized the file's central directory will be hosed.
 mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename);
 // Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive.
 // To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer.
 // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
 mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags);
 mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32);
 #ifndef MINIZ_NO_STDIO
 // Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive.
 // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
 mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
 #endif
 // Adds a file to an archive by fully cloning the data from another archive.
 // This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields.
 mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index);
 // Finalizes the archive by writing the central directory records followed by the end of central directory record.
 // After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end().
 // An archive must be manually finalized by calling this function for it to be valid.
 mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
 mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize);
 // Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used.
 // Note for the archive to be valid, it must have been finalized before ending.
 mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
 // Misc. high-level helper functions:
 // mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive.
 // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
 mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
 // Reads a single file from an archive into a heap block.
 // Returns NULL on failure.
 void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags);
 #endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
 #endif // #ifndef MINIZ_NO_ARCHIVE_APIS
 // ------------------- Low-level Decompression API Definitions
 // Decompression flags used by tinfl_decompress().
 // TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
 // TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
 // TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
 // TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes.
 enum
 {
  TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
  TINFL_FLAG_HAS_MORE_INPUT = 2,
  TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
  TINFL_FLAG_COMPUTE_ADLER32 = 8
 };
 // High level decompression functions:
 // tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc().
 // On entry:
 //  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress.
 // On return:
 //  Function returns a pointer to the decompressed data, or NULL on failure.
 //  *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data.
 //  The caller must call mz_free() on the returned block when it's no longer needed.
 void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
 // tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory.
 // Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success.
 #define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
 size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
 // tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer.
 // Returns 1 on success or 0 on failure.
 typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
 int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
 struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor;
 // Max size of LZ dictionary.
 #define TINFL_LZ_DICT_SIZE 32768
 // Return status.
 typedef enum
 {
  TINFL_STATUS_BAD_PARAM = -3,
  TINFL_STATUS_ADLER32_MISMATCH = -2,
  TINFL_STATUS_FAILED = -1,
  TINFL_STATUS_DONE = 0,
  TINFL_STATUS_NEEDS_MORE_INPUT = 1,
  TINFL_STATUS_HAS_MORE_OUTPUT = 2
 } tinfl_status;
 // Initializes the decompressor to its initial state.
 #define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END
 #define tinfl_get_adler32(r) (r)->m_check_adler32
 // Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability.
 // This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output.
 tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
 // Internal/private bits follow.
 enum
 {
  TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19,
  TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
 };
 typedef struct
 {
  mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
  mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
 } tinfl_huff_table;
 #if MINIZ_HAS_64BIT_REGISTERS
  #define TINFL_USE_64BIT_BITBUF 1
 #endif
 #if TINFL_USE_64BIT_BITBUF
  typedef mz_uint64 tinfl_bit_buf_t;
  #define TINFL_BITBUF_SIZE (64)
 #else
  typedef mz_uint32 tinfl_bit_buf_t;
  #define TINFL_BITBUF_SIZE (32)
 #endif
 struct tinfl_decompressor_tag
 {
  mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
  tinfl_bit_buf_t m_bit_buf;
  size_t m_dist_from_out_buf_start;
  tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
  mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
 };
 // ------------------- Low-level Compression API Definitions
 // Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently).
 #define TDEFL_LESS_MEMORY 0
 // tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search):
 // TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression).
 enum
 {
  TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF
 };
 // TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
 // TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers).
 // TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing.
 // TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
 // TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
 // TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
 // TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
 // TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
 // The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK).
 enum
 {
  TDEFL_WRITE_ZLIB_HEADER             = 0x01000,
  TDEFL_COMPUTE_ADLER32               = 0x02000,
  TDEFL_GREEDY_PARSING_FLAG           = 0x04000,
  TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
  TDEFL_RLE_MATCHES                   = 0x10000,
  TDEFL_FILTER_MATCHES                = 0x20000,
  TDEFL_FORCE_ALL_STATIC_BLOCKS       = 0x40000,
  TDEFL_FORCE_ALL_RAW_BLOCKS          = 0x80000
 };
 // High level compression functions:
 // tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc().
 // On entry:
 //  pSrc_buf, src_buf_len: Pointer and size of source block to compress.
 //  flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression.
 // On return:
 //  Function returns a pointer to the compressed data, or NULL on failure.
 //  *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data.
 //  The caller must free() the returned block when it's no longer needed.
 void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
 // tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory.
 // Returns 0 on failure.
 size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
 // Compresses an image to a compressed PNG file in memory.
 // On entry:
 //  pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4.
 //  The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory.
 //  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
 //  If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps).
 // On return:
 //  Function returns a pointer to the compressed data, or NULL on failure.
 //  *pLen_out will be set to the size of the PNG image file.
 //  The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed.
 void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
 void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
 // Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
 typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
 // tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally.
 mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
 enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 };
 // TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes).
 #if TDEFL_LESS_MEMORY
 enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
 #else
 enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
 #endif
 // The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions.
 typedef enum
 {
  TDEFL_STATUS_BAD_PARAM = -2,
  TDEFL_STATUS_PUT_BUF_FAILED = -1,
  TDEFL_STATUS_OKAY = 0,
  TDEFL_STATUS_DONE = 1,
 } tdefl_status;
 // Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
 typedef enum
 {
  TDEFL_NO_FLUSH = 0,
  TDEFL_SYNC_FLUSH = 2,
  TDEFL_FULL_FLUSH = 3,
  TDEFL_FINISH = 4
 } tdefl_flush;
 // tdefl's compression state structure.
 typedef struct
 {
  tdefl_put_buf_func_ptr m_pPut_buf_func;
  void *m_pPut_buf_user;
  mz_uint m_flags, m_max_probes[2];
  int m_greedy_parsing;
  mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
  mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
  mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
  mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
  tdefl_status m_prev_return_status;
  const void *m_pIn_buf;
  void *m_pOut_buf;
  size_t *m_pIn_buf_size, *m_pOut_buf_size;
  tdefl_flush m_flush;
  const mz_uint8 *m_pSrc;
  size_t m_src_buf_left, m_out_buf_ofs;
  mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
  mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
  mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
  mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
  mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
  mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
  mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
  mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
 } tdefl_compressor;
 // Initializes the compressor.
 // There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory.
 // pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression.
 // If pBut_buf_func is NULL the user should always call the tdefl_compress() API.
 // flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.)
 tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
 // Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible.
 tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
 // tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr.
 // tdefl_compress_buffer() always consumes the entire input buffer.
 tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
 tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
 mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
 // Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros.
 #ifndef MINIZ_NO_ZLIB_APIS
 // Create tdefl_compress() flags given zlib-style compression parameters.
 // level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files)
 // window_bits may be -15 (raw deflate) or 15 (zlib)
 // strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED
 mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
 #endif // #ifndef MINIZ_NO_ZLIB_APIS
 #ifdef __cplusplus
 }
 #endif
 #endif // MINIZ_HEADER_INCLUDED
--- a/test_conformance/CMakeLists.txt
+++ b/test_conformance/CMakeLists.txt
@@ -65,3 +65,6 @@ add_subdirectory(select)
 add_subdirectory(thread_dimensions)
 add_subdirectory(vec_align)
 add_subdirectory(vec_step)
 # Add any extension folders
 add_subdirectory(spir)
--- a/test_conformance/allocations/allocation_functions.cpp
+++ b/test_conformance/allocations/allocation_functions.cpp
@@ -27,7 +27,7 @@ int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id de
 }
-int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height) {
+int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height, size_t* max_size) {
  size_t max_width, max_height, num_pixels, found_width, found_height;
  int error;
@@ -48,34 +48,44 @@ int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t
  num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
-  if (num_pixels > (max_width*max_height))
+  if (num_pixels > (max_width*max_height)) {
    if(NULL != max_size) {
      *max_size = max_width * max_height * sizeof(cl_uint) * 4;
    }
    return FAILED_TOO_BIG;
  }
  // We want a close-to-square aspect ratio.
  // Note that this implicitly assumes that  max width >= max height
  found_width = (int)sqrt( (double) num_pixels );
  if (found_width == 0)
    found_width = 1;
  if( found_width > max_width ) {
    found_width = max_width;
  }
  if (found_width == 0)
    found_width = 1;
  found_height = (size_t)num_pixels/found_width;
  if (found_height > max_height) {
    found_height = max_height;
  }
  if (found_height == 0)
    found_height = 1;
  *width = found_width;
  *height = found_height;
  if(NULL != max_size) {
    *max_size = found_width * found_height * sizeof(cl_uint) * 4;
  }
  return SUCCEEDED;
 }
 int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
  size_t width, height;
  int error;
-  error = find_good_image_size(device_id, size_to_allocate, &width, &height);
+  error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
  if (error != SUCCEEDED)
    return error;
@@ -91,7 +101,7 @@ int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_devic
  size_t width, height;
  int error;
-  error = find_good_image_size(device_id, size_to_allocate, &width, &height);
+  error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
  if (error != SUCCEEDED)
    return error;
@@ -121,7 +131,6 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
  int error, result;
  size_t amount_allocated;
  size_t reduction_amount;
  size_t min_allocation_allowed;
  int current_allocation;
  size_t allocation_this_time, actual_allocation;
@@ -129,13 +138,17 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
  *number_of_mems = 0;
  error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
-  test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
+  test_error_abort(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
  error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
-  test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
+  test_error_abort(error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
-//  log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
+  if (global_mem_size > (cl_ulong)SIZE_MAX) {
-//           max_individual_allocation_size, toMB(max_individual_allocation_size),
+    global_mem_size = (cl_ulong)SIZE_MAX;
-//           global_mem_size, toMB(global_mem_size));
+  }
  //  log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
  //           max_individual_allocation_size, toMB(max_individual_allocation_size),
  //           global_mem_size, toMB(global_mem_size));
  if (size_to_allocate > global_mem_size) {
    log_error("Can not allocate more than the global memory size.\n");
@@ -144,18 +157,32 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
  amount_allocated = 0;
  current_allocation = 0;
-  reduction_amount = (size_t)max_individual_allocation_size/16;
+
-  min_allocation_allowed = (size_t)max_individual_allocation_size/4;
+  // If allocating for images, reduce the maximum allocation size to the maximum image size.
-  if (min_allocation_allowed > size_to_allocate)
+  // If we don't do this, then the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 can be higher
-    min_allocation_allowed = size_to_allocate/4;
+  // than the maximum image size on systems with 16GB or RAM or more. In this case, we
  // succeed in allocating an image but its size is less than CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4
  // (min_allocation_allowed) and thus we fail the allocation below.
  if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) {
    size_t width;
    size_t height;
    size_t max_size;
    error = find_good_image_size(device_id, size_to_allocate, &width, &height, &max_size);
    if (!(error == SUCCEEDED || error == FAILED_TOO_BIG))
      return error;
    if (max_size < max_individual_allocation_size)
      max_individual_allocation_size = max_size;
  }
  reduction_amount = (size_t)max_individual_allocation_size / 16;
  if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
  else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
  else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
-//  log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
+  //  log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
-//           toMB(reduction_amount), toMB(min_allocation_allowed));
+  //           toMB(reduction_amount), toMB(min_allocation_allowed));
-//  if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
+  //  if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
  // If we are only doing a single allocation, only allow 1
  int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
@@ -168,10 +195,10 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
    cl_uint max_image_args;
    error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
-    test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
+    test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
    if ((int)max_image_args < max_to_allocate) {
-      log_info("\t\tMaximum number of images per kernel limited to %d\n",(int)max_image_args);
+      log_info("\t\tMaximum number of images per kernel limited to %d\n", (int)max_image_args);
      max_to_allocate = max_image_args;
    }
  }
@@ -179,24 +206,32 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
  // Try to allocate the requested amount.
  while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
    // Determine how much more is needed
    allocation_this_time = size_to_allocate - amount_allocated;
    // Bound by the individual allocation size
    if (allocation_this_time > max_individual_allocation_size)
      allocation_this_time = (size_t)max_individual_allocation_size;
-    // Try to allocate a chunk of memory
+    // Allocate the largest object possible
    result = FAILED_TOO_BIG;
    //log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
    while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
      // Create the object
      result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
      if (result == SUCCEEDED) {
        // Allocation succeeded, another memory object was added to the array
-        *number_of_mems = (current_allocation+1);
+        *number_of_mems = (current_allocation + 1);
        // Verify the size is correct to within 1MB.
        actual_allocation = get_actual_allocation_size(mems[current_allocation]);
-        if (fabs((double)(allocation_this_time - actual_allocation)) > 1024.0*1024.0) {
+        if (fabs((double)allocation_this_time - (double)actual_allocation) > 1024.0*1024.0) {
-         	log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB( actual_allocation));
+          log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB(actual_allocation));
          return FAILED_ABORT;
        }
        // If we are filling the allocation for verification do so
        if (force_fill) {
          //log_info("\t\t\tWriting random values to object and calculating checksum.\n");
@@ -207,10 +242,15 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
          result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
        }
      }
      // If creation failed, try to create a smaller object
      if (result == FAILED_TOO_BIG) {
        //log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
        if (allocation_this_time > reduction_amount)
          allocation_this_time -= reduction_amount;
        else if (reduction_amount > 1) {
          reduction_amount /= 2;
        }
        else {
          allocation_this_time = 0;
        }
@@ -223,8 +263,8 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
      return FAILED_ABORT;
    }
-    if (allocation_this_time < min_allocation_allowed && allocation_this_time < (size_to_allocate-amount_allocated)) {
+    if (!allocation_this_time) {
-      log_info("\t\tFailed to allocate an individual allocation of more than %gMB.\n", toMB(min_allocation_allowed));
+      log_info("\t\tFailed to allocate %gMB across several objects.\n", toMB(size_to_allocate));
      return FAILED_TOO_BIG;
    }
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt
@@ -14,6 +14,7 @@ add_executable(conformance_test_api
   test_platform.cpp
   test_retain.cpp
   test_device_min_data_type_align_size_alignment.cpp
   test_queue_properties.cpp
   test_mem_objects.cpp
   test_bool.c
   test_null_buffer_arg.c
--- a/test_conformance/api/main.c
+++ b/test_conformance/api/main.c
@@ -112,6 +112,7 @@ basefn	basefn_list[] = {
    test_get_image1d_info,
    test_get_image1d_array_info,
    test_get_image2d_array_info,
    test_queue_properties,
 };
@@ -199,7 +200,7 @@ const char    *basefn_names[] = {
    "get_image1d_info",
    "get_image1d_array_info",
    "get_image2d_array_info",
-	
+    "queue_properties",
    "all",
 };
--- a/test_conformance/api/procs.h
+++ b/test_conformance/api/procs.h
@@ -105,4 +105,5 @@ extern int      test_get_image1d_info( cl_device_id deviceID, cl_context context
 extern int      test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
 extern int      test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
 extern int      test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
 extern int      test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
--- a/test_conformance/api/test_kernel_arg_info.c
+++ b/test_conformance/api/test_kernel_arg_info.c
--- a/test_conformance/api/test_queue_properties.cpp
+++ b/test_conformance/api/test_queue_properties.cpp
@@ -0,0 +1,174 @@
 //
 // Copyright (c) 2018 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #include "testBase.h"
 #include "../../test_common/harness/typeWrappers.h"
 #include "../../test_common/harness/conversions.h"
 #include <sstream>
 #include <string>
 #include <vector>
 using namespace std;
 /*
 The test against cl_khr_create_command_queue extension. It validates if devices with Opencl 1.X can use clCreateCommandQueueWithPropertiesKHR function.
 Based on device capabilities test will create queue with NULL properties, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property and
 CL_QUEUE_PROFILING_ENABLE property. Finally simple kernel will be executed on such queue.
 */
 const char *queue_test_kernel[] = {
 "__kernel void vec_cpy(__global int *src, __global int *dst)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
 "\n"
 "    dst[tid] = src[tid];\n"
 "\n"
 "}\n" };
 int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop_def, cl_device_id deviceID, clKernelWrapper& kernel, size_t num_elements)
 {
    clMemWrapper streams[2];
    int error;
    std::vector<int> buf(num_elements);
    clCreateCommandQueueWithPropertiesKHR_fn clCreateCommandQueueWithPropertiesKHR = NULL;
    cl_platform_id platform;
    clEventWrapper event;
    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL);
    test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
    clCreateCommandQueueWithPropertiesKHR = (clCreateCommandQueueWithPropertiesKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clCreateCommandQueueWithPropertiesKHR");
    if (clCreateCommandQueueWithPropertiesKHR == NULL)
    {
        log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
        return -1;
    }
    clCommandQueueWrapper queue = clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def, &error);
    test_error(error, "clCreateCommandQueueWithPropertiesKHR failed");
    for (int i = 0; i < num_elements; ++i)
    {
        buf[i] = i;
    }
    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf.data(), &error);
    test_error( error, "clCreateBuffer failed." );
    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error);
    test_error( error, "clCreateBuffer failed." );
    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
    test_error( error, "clSetKernelArg failed." );
    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
    test_error( error, "clSetKernelArg failed." );
    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, &event);
    test_error( error, "clEnqueueNDRangeKernel failed." );
    error = clWaitForEvents(1, &event);
    test_error(error, "clWaitForEvents failed.");
    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements, buf.data(), 0, NULL, NULL);
    test_error( error, "clEnqueueReadBuffer failed." );
    for (int i = 0; i < num_elements; ++i)
    {
        if (buf[i] != i)
        {
            log_error("ERROR: Incorrect vector copy result.");
            return -1;
        }
    }
    return 0;
 }
 int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
    if (num_elements <= 0)
    {
        num_elements = 128;
    }
    int error = 0;
    clProgramWrapper program;
    clKernelWrapper kernel;
    size_t strSize;
    std::string strExt(0, '\0');
    cl_queue_properties_khr device_props = NULL;
    cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 };
    // Query extension
    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &strSize);
    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
    strExt.resize(strSize);
    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, strExt.size(), &strExt[0], NULL);
    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
    log_info("CL_DEVICE_EXTENSIONS:\n%s\n\n", strExt.c_str());
    if (strExt.find("cl_khr_create_command_queue") == string::npos)
    {
        log_info("extension cl_khr_create_command_queue is not supported.\n");
        return 0;
    }
    error = create_single_kernel_helper(context, &program, &kernel, 1, queue_test_kernel, "vec_cpy");
    test_error(error, "create_single_kernel_helper failed");
    log_info("Queue property NULL. Testing ... \n");
    error = enqueue_kernel(context, NULL,deviceID, kernel, (size_t)num_elements);
    test_error(error, "enqueue_kernel failed");
    error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
    test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
    if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
    {
        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE supported. Testing ... \n");
        queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
        error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
        test_error(error, "enqueue_kernel failed");
    } else
    {
        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not supported \n");
    }
    if (device_props & CL_QUEUE_PROFILING_ENABLE)
    {
        log_info("Queue property CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
        queue_prop_def[1] = CL_QUEUE_PROFILING_ENABLE;
        error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
        test_error(error, "enqueue_kernel failed");
    } else
    {
        log_info("Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
    }
    if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE && device_props & CL_QUEUE_PROFILING_ENABLE)
    {
        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
        queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE;
        error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
        test_error(error, "enqueue_kernel failed");
    }
    else
    {
        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE or CL_QUEUE_PROFILING_ENABLE not supported \n");
    }
    return 0;
 }
--- a/test_conformance/basic/run_array
+++ b/test_conformance/basic/run_array
--- a/test_conformance/basic/run_array_image_copy
+++ b/test_conformance/basic/run_array_image_copy
--- a/test_conformance/basic/run_image
+++ b/test_conformance/basic/run_image
--- a/test_conformance/basic/run_multi_read_image
+++ b/test_conformance/basic/run_multi_read_image
--- a/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/basic/test_async_strided_copy.cpp
@@ -207,13 +207,12 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
            log_error( "ERROR: Results of copy did not validate!\n" );
            sprintf(values + strlen( values), "%d -> [", i);
            for (int j=0; j<(int)elementSize; j++)
-                sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
+                sprintf(values + strlen( values), "%2x ", inchar[j]);
            sprintf(values + strlen(values), "] != [");
            for (int j=0; j<(int)elementSize; j++)
-                sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
+                sprintf(values + strlen( values), "%2x ", outchar[j]);
            sprintf(values + strlen(values), "]");
            log_error("%s\n", values);
            return -1;
        }
    }
--- a/test_conformance/basic/test_sizeof.c
+++ b/test_conformance/basic/test_sizeof.c
@@ -126,7 +126,7 @@ const size_table  vector_table[] =
 const char  *ptr_table[] =
 {
-    "void*", 
+    "global void*",
    "size_t",
    "sizeof(int)",      // check return type of sizeof
    "ptrdiff_t"
@@ -356,7 +356,7 @@ int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue,
        err = get_type_size( context, queue, "half", &test  );
        if( err )
            return err;
-        if( test != 8 )
+        if( test != 2 )
        {
            log_error( "\nFAILED: half has size %lld, but must be 2!\n", test );
            return -1;
--- a/test_conformance/compiler/test_compile.c
+++ b/test_conformance/compiler/test_compile.c
@@ -143,8 +143,8 @@ const char* compile_static_var      = "static constant float foo = 2.78;\n";
 const char* compile_static_struct   = "static constant struct bar {float x, y, z, r; int color; } foo = {3.14159};\n";
 const char* compile_static_function = "static int foo(int x, int y) { return x*x + y*y; }\n";
-const char* compile_regular_var      = "constant float foo;\n";
+const char* compile_regular_var      = "constant float foo = 4.0f;\n";
-const char* compile_regular_struct   = "constant struct bar {float x, y, z, r; int color; } foo;\n";
+const char* compile_regular_struct   = "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, 0.f, 0};\n";
 const char* compile_regular_function = "int foo(int x, int y) { return x*x + y*y; }\n";
 const char* link_static_var_access = // use with compile_static_var
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -34,6 +34,7 @@ const char *known_extensions[] = {
    "cl_khr_3d_image_writes",
    "cl_khr_byte_addressable_store",
    "cl_khr_fp16",
    "cl_khr_spir",
    //API-only extensions after this point.  If you add above here, modify first_API_extension below.
    "cl_khr_gl_sharing",
@@ -42,13 +43,20 @@ const char *known_extensions[] = {
    "cl_khr_icd",
    "cl_khr_egl_image",
    "cl_khr_egl_event",
    "cl_khr_create_command_queue",
    "cl_khr_priority_hints",
    "cl_khr_throttle_hints",
    "cl_khr_il_program",
    "cl_khr_mipmap_image",
    "cl_khr_mipmap_image_writes",
 };
 size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
-size_t first_API_extension = 10;
+size_t first_API_extension = 11;
 const char *known_embedded_extensions[] = {
    "cles_khr_int64",
    "cles_khr_2d_image_array_writes",
    NULL
 };
--- a/test_conformance/contractions/contractions.c
+++ b/test_conformance/contractions/contractions.c
@@ -56,7 +56,7 @@
 #endif
 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
-#include <xmmintrin.h>
+#include <emmintrin.h>
 #endif
 #if defined(__PPC__)
@@ -396,6 +396,8 @@ static void PrintArch( void )
    vlog( "\tARCH:\tx86_64\n" );
 #elif defined( __arm__ )
    vlog( "\tARCH:\tarm\n" );
 #elif defined( __aarch64__ )
    vlog( "\tARCH:\taarch64\n" );
 #else
    vlog( "\tARCH:\tunknown\n" );
 #endif
--- a/test_conformance/conversions/CMakeLists.txt
+++ b/test_conformance/conversions/CMakeLists.txt
@@ -17,9 +17,9 @@ add_executable(conformance_test_conversions
        ../../test_common/harness/msvc9.c
        ../../test_common/harness/mingw_compat.c
        ../../test_common/harness/errorHelpers.c
        ../../test_common/harness/parseParameters.cpp
 )
 if(WIN32)
 set_source_files_properties(
        Sleep.c test_conversions.c basic_test_conversions.c
        ../../test_common/harness/ThreadPool.c
@@ -28,7 +28,6 @@ set_source_files_properties(
        ../../test_common/harness/msvc9.c
        ../../test_common/harness/errorHelpers.c
        PROPERTIES LANGUAGE CXX)
 endif(WIN32)
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)")
 if(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
--- a/test_conformance/conversions/basic_test_conversions.c
+++ b/test_conformance/conversions/basic_test_conversions.c
@@ -743,16 +743,11 @@ static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((
 static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; }
 static void ulong2float( void *out, void *in)
 {
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && defined(_M_X64)
    cl_ulong l = ((cl_ulong*) in)[0];
    float result;
    cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
 #if defined(_M_X64)
    _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
 #else
 	result = sl;
 #endif
    ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
 #else
    cl_ulong l = ((cl_ulong*) in)[0];
--- a/test_conformance/conversions/test_conversions.c
+++ b/test_conformance/conversions/test_conversions.c
@@ -16,6 +16,7 @@
 #include "../../test_common/harness/compat.h"
 #include "../../test_common/harness/rounding_mode.h"
 #include "../../test_common/harness/ThreadPool.h"
 #include "../../test_common/harness/parseParameters.h"
 #if defined (_WIN32)
 #define MAX(x,y) ((x>y)?x:y);
 #define MIN(x,y) ((x<y)?x:y);
@@ -103,6 +104,7 @@ cl_mem          gOutBuffers[ kCallStyleCount ];
 size_t          gComputeDevices = 0;
 uint32_t        gDeviceFrequency = 0;
 int             gWimpyMode = 0;
 int             gWimpyReductionFactor = 128;
 int             gSkipTesting = 0;
 int             gForceFTZ = 0;
 int             gMultithread = 1;
@@ -414,6 +416,9 @@ static int ParseArgs( int argc, const char **argv )
                    case 'w':
                        gWimpyMode ^= 1;
                        break;
                    case '[':
                        parseWimpyReductionFactor(arg, gWimpyReductionFactor);
                        break;
                    case 'z':
                        gForceFTZ ^= 1;
                        break;
@@ -509,6 +514,7 @@ static int ParseArgs( int argc, const char **argv )
        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
        vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
        vlog("*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
    }
    return 0;
@@ -535,6 +541,7 @@ static void PrintUsage( void )
    vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" );
    vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" );
    vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" );
    vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
    vlog( "\t\t-z\tToggle flush to zero mode  (Default: per device)\n" );
    vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" );
    vlog( "\n" );
@@ -556,6 +563,8 @@ static void PrintArch( void )
    vlog( "ARCH:\tx86_64\n" );
 #elif defined( __arm__ )
    vlog( "ARCH:\tarm\n" );
 #elif defined( __aarch64__ )
    vlog( "ARCH:\taarch64\n" );
 #elif defined (_WIN32)
    vlog( "ARCH:\tWindows\n" );
 #else
@@ -1136,6 +1145,7 @@ static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode r
    gTestCount++;
    size_t blockCount = BUFFER_SIZE / MAX( gTypeSizes[ inType ], gTypeSizes[ outType ] );
    size_t step = blockCount;
    uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]);
    cl_event writeInputBuffer = NULL;
@@ -1194,15 +1204,12 @@ static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode r
    if( 8*gTypeSizes[ inType ] > 32 )
        lastCase = 0x100000000ULL;
    if ( gWimpyMode )
        step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
    vlog( "Testing... " );
    fflush(stdout);
-    for( i = 0; i < (uint64_t)lastCase; i += blockCount )
+    for( i = 0; i < (uint64_t)lastCase; i += step )
    {
        if (gWimpyMode) {
            uint64_t blockIndex = (i / blockCount) & 0xFF;
            if (blockIndex != 0 && blockIndex != 0xFF)
                continue;
        }
        if( 0 == ( i & ((lastCase >> 3) -1))) {
            vlog(".");
--- a/test_conformance/geometrics/test_geometrics_double.cpp
+++ b/test_conformance/geometrics/test_geometrics_double.cpp
@@ -200,7 +200,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command
            return -1;
        /* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */
-        for( i = 0; i < TEST_SIZE * vecsize; i++ )
+        for( i = 0; i < size * vecsize; i++ )
        {
            inDataA[ i ] = get_random_double( -512.f, 512.f, d );
            inDataB[ i ] = get_random_double( -512.f, 512.f, d );
@@ -234,7 +234,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command
        }
        /* Run the kernel */
-        threads[0] = TEST_SIZE;
+        threads[0] = size;
        error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
        test_error( error, "Unable to get work group size to use" );
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -83,6 +83,7 @@ static const char *kernelpattern_image_write_1Darray_half =
 "}\n";
 static const char *kernelpattern_image_write_3D =
 "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
 "__kernel void sample_test( __global %s4 *source, write_only image3d_t dest )\n"
 "{\n"
 "    int  tidX   = get_global_id(0);\n"
@@ -96,6 +97,7 @@ static const char *kernelpattern_image_write_3D =
 "}\n";
 static const char *kernelpattern_image_write_3D_half =
 "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
 "__kernel void sample_test( __global half4 *source, write_only image3d_t dest )\n"
 "{\n"
 "    int  tidX   = get_global_id(0);\n"
--- a/test_conformance/half/CMakeLists.txt
+++ b/test_conformance/half/CMakeLists.txt
@@ -4,16 +4,15 @@ add_executable(conformance_test_half
        ../../test_common/harness/msvc9.c
        ../../test_common/harness/mingw_compat.c
        ../../test_common/harness/errorHelpers.c
        ../../test_common/harness/parseParameters.cpp
 )
 if(MSVC)
 set_source_files_properties(
          cl_utils.c Test_vLoadHalf.c Test_roundTrip.c
        Test_vStoreHalf.c main.c
        ../../test_common/harness/mingw_compat.c  
        ../../test_common/harness/msvc9.c
        PROPERTIES LANGUAGE CXX)
 endif(MSVC)
 TARGET_LINK_LIBRARIES(conformance_test_half
        ${CLConform_LIBRARIES})
--- a/test_conformance/half/Test_vStoreHalf.c
+++ b/test_conformance/half/Test_vStoreHalf.c
@@ -848,7 +848,7 @@ int Test_vStoreHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const c
    size_t stride = blockCount;
    if (gWimpyMode)
-        stride = 0x10000000U;
+        stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
    // we handle 64-bit types a bit differently.
    if( lastCase == 0 )
@@ -1502,7 +1502,7 @@ int Test_vStoreaHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const
    size_t stride = blockCount;
    if (gWimpyMode)
-        stride = 0x10000000U;
+        stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
    // we handle 64-bit types a bit differently.
    if( lastCase == 0 )
--- a/test_conformance/half/cl_utils.c
+++ b/test_conformance/half/cl_utils.c
@@ -66,6 +66,7 @@ size_t          gWorkGroupSize = 0;
 int             gTestCount = 0;
 int             gFailCount = 0;
 bool            gWimpyMode = false;
 int             gWimpyReductionFactor = 512;
 int             gTestDouble = 0;
 uint32_t        gDeviceIndex = 0;
--- a/test_conformance/half/cl_utils.h
+++ b/test_conformance/half/cl_utils.h
@@ -88,6 +88,7 @@ extern int             gReportTimes;
 // size of 32 bit ranges to a much smaller set.  This is meant to be used
 // as a smoke test
 extern bool            gWimpyMode;
 extern int             gWimpyReductionFactor;
 uint64_t ReadTime( void );
 double SubtractTime( uint64_t endTime, uint64_t startTime );
--- a/test_conformance/half/main.c
+++ b/test_conformance/half/main.c
@@ -25,6 +25,7 @@
 #endif
 #include "../../test_common/harness/mingw_compat.h"
 #include "../../test_common/harness/parseParameters.h"
 #if defined (__MINGW32__)
 #include <sys/param.h>
 #endif
@@ -217,7 +218,9 @@ static int ParseArgs( int argc, const char **argv )
                    case 'w':  // Wimpy mode
                        gWimpyMode = true;
                        break;
-                        
+                    case '[':
                        parseWimpyReductionFactor( arg, gWimpyReductionFactor);
                        break;
                    default:
                        vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
                        PrintUsage();
@@ -253,6 +256,7 @@ static int ParseArgs( int argc, const char **argv )
        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
        vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
        vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
    }
    return 0;
 }
@@ -263,6 +267,7 @@ static void PrintUsage( void )
    vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
    vlog( "\t\t-t\tToggle reporting performance data.\n" );
    vlog( "\t\t-w\tRun in wimpy mode\n" );
    vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
    vlog( "\t\t-h\tHelp\n" );
    vlog( "\n" );
 }
@@ -282,6 +287,8 @@ static void PrintArch( void )
    vlog( "ARCH:\tx86_64\n" );
 #elif defined( __arm__ )
    vlog( "ARCH:\tarm\n" );
 #elif defined( __aarch64__ )
    vlog( "\tARCH:\taarch64\n" );
 #else
 #error unknown arch
 #endif
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -74,7 +74,6 @@ int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
    cl_ulong maxAllocSize, memSize;
    image_descriptor imageInfo = { 0 };
    RandomSeed seed(gRandomSeed);
 	size_t rowPadding = gEnablePitch ? 48 : 0;
    size_t pixelSize;
    imageInfo.format = format;
@@ -87,10 +86,15 @@ int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
        {
            size_t rowPadding = gEnablePitch ? 48 : 0;
            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
            if (gEnablePitch)
@@ -119,6 +123,7 @@ int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
        {
            size_t rowPadding = gEnablePitch ? 48 : 0;
            imageInfo.width = sizes[ idx ][ 0 ];
            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -142,6 +147,7 @@ int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong size;
            size_t rowPadding = gEnablePitch ? 48 : 0;
            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -77,7 +77,6 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
    cl_ulong maxAllocSize, memSize;
    image_descriptor imageInfo = { 0 };
    RandomSeed seed(gRandomSeed);
 	size_t rowPadding = gEnablePitch ? 48 : 0;
    size_t pixelSize;
    imageInfo.format = format;
@@ -90,10 +89,15 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 1D array size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
        {
            size_t rowPadding = gEnablePitch ? 48 : 0;
            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
            if (gEnablePitch)
@@ -126,6 +130,7 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
        {
            size_t rowPadding = gEnablePitch ? 48 : 0;
            imageInfo.width = sizes[ idx ][ 0 ];
            imageInfo.arraySize = sizes[ idx ][ 2 ];
            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -151,6 +156,7 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong size;
            size_t rowPadding = gEnablePitch ? 48 : 0;
            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -77,7 +77,6 @@ int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
    cl_ulong maxAllocSize, memSize;
    image_descriptor imageInfo = { 0 };
    RandomSeed seed(gRandomSeed);
 	size_t rowPadding = gEnablePitch ? 48 : 0;
    size_t pixelSize;
    imageInfo.format = format;
@@ -90,10 +89,15 @@ int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
        {
            size_t rowPadding = gEnablePitch ? 48 : 0;
            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
            if (gEnablePitch)
@@ -125,6 +129,7 @@ int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
        {
            size_t rowPadding = gEnablePitch ? 48 : 0;
            imageInfo.width = sizes[ idx ][ 0 ];
            imageInfo.height = sizes[ idx ][ 1 ];
            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -149,6 +154,7 @@ int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong size;
            size_t rowPadding = gEnablePitch ? 48 : 0;
            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -142,8 +142,6 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
    image_descriptor srcImageInfo = { 0 };
    image_descriptor dstImageInfo = { 0 };
    RandomSeed  seed( gRandomSeed );
    size_t rowPadding = gEnablePitch ? 256 : 0;
    size_t slicePadding = gEnablePitch ? 3 : 0;
    srcImageInfo.format = dstImageInfo.format = format;
@@ -154,6 +152,10 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
@@ -162,6 +164,9 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
            {
                for( dstImageInfo.arraySize = 4; dstImageInfo.arraySize < 9; dstImageInfo.arraySize++ )
                {
                    size_t rowPadding = gEnablePitch ? 256 : 0;
                    size_t slicePadding = gEnablePitch ? 3 : 0;
                    set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.arraySize, rowPadding, slicePadding );
                    set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, 0, rowPadding, slicePadding );
                    if( gDebugTrace )
@@ -185,18 +190,27 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
    else if( gTestMaxImages )
    {
        // Try a specific set of maximum sizes
-        size_t numbeOfSizes;
+        size_t numberOfSizes2DArray, numberOfSizes2D;
-        size_t sizes[100][3];
+        size_t sizes2DArray[100][3], sizes2D[100][3];
        // Try to allocate a bit smaller images because we need the 2D ones as well for the copy.
-        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D_ARRAY, dstImageInfo.format);
+        get_max_sizes(&numberOfSizes2DArray, 100, sizes2DArray, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D_ARRAY, dstImageInfo.format);
        get_max_sizes(&numberOfSizes2D, 100, sizes2D, maxWidth, maxHeight, 1, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D, dstImageInfo.format);
-        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        for( size_t i = 0; i < numberOfSizes2D; i++ )
        {
-            set_image_dimensions( &dstImageInfo, sizes[ idx ][ 0 ], sizes[ idx ][ 1 ], sizes[ idx ][ 2 ], rowPadding, slicePadding );
+          for( size_t j = 0; j < numberOfSizes2DArray; j++ )
-            set_image_dimensions( &srcImageInfo, (size_t)random_in_range( (int)sizes[ idx ][ 0 ], (int)maxWidth/4, seed ), (size_t)random_in_range( (int)sizes[ idx ][ 1 ], (int)maxHeight/4, seed ), 0, rowPadding, slicePadding );
+        {
            size_t rowPadding = gEnablePitch ? 256 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            set_image_dimensions( &dstImageInfo, sizes2DArray[ j ][ 0 ], sizes2DArray[ j ][ 1 ], sizes2DArray[ j ][ 2 ], rowPadding, slicePadding );
            set_image_dimensions( &srcImageInfo, sizes2D[ i ][ 0 ], sizes2D[ i ][ 1 ], 0, rowPadding, slicePadding );
            cl_ulong dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.arraySize * 4;
-            if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) )
+            cl_ulong srcSize = (cl_ulong)srcImageInfo.rowPitch * (cl_ulong)srcImageInfo.height * 4;
            if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) && srcSize < maxAllocSize && srcSize < ( memSize / 3 ) )
            {
                if (reverse)
                    log_info( "Testing %d x %d x %d to %d x %d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height );
@@ -227,7 +241,7 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
                    log_info("Not testing max size %d x %d to %d x %d x %d due to memory constraints.\n",
                         (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize);
            }
-            
+          }
        }
    }
    else
@@ -235,6 +249,9 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong srcSize, dstSize;
            size_t rowPadding = gEnablePitch ? 256 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -138,8 +138,6 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
    image_descriptor srcImageInfo = { 0 };
    image_descriptor dstImageInfo = { 0 };
    RandomSeed  seed( gRandomSeed );
    size_t rowPadding = gEnablePitch ? 256 : 0;
    size_t slicePadding = gEnablePitch ? 3 : 0;
    srcImageInfo.format = dstImageInfo.format = format;
@@ -152,6 +150,10 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D or 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
@@ -160,6 +162,9 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
            {
                for( dstImageInfo.depth = 4; dstImageInfo.depth < 9; dstImageInfo.depth++ )
                {
                    size_t rowPadding = gEnablePitch ? 256 : 0;
                    size_t slicePadding = gEnablePitch ? 3 : 0;
                    set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.depth, rowPadding, slicePadding );
                    set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, 0, rowPadding, slicePadding );
                    if( gDebugTrace )
@@ -179,18 +184,27 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
    else if( gTestMaxImages )
    {
        // Try a specific set of maximum sizes
-        size_t numbeOfSizes;
+        size_t numberOfSizes3D, numberOfSizes2D;
-        size_t sizes[100][3];
+        size_t sizes3D[100][3], sizes2D[100][3];
        // Try to allocate a bit smaller images because we need the 2D ones as well for the copy.
-        get_max_sizes(&numbeOfSizes, 100, sizes, max3DWidth, max3DHeight, max3DDepth, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE3D, dstImageInfo.format);
+        get_max_sizes(&numberOfSizes3D, 100, sizes3D, max3DWidth, max3DHeight, max3DDepth, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE3D, dstImageInfo.format);
        get_max_sizes(&numberOfSizes2D, 100, sizes2D, maxWidth, maxHeight, 1, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D, srcImageInfo.format);
-        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        for( size_t i = 0; i < numberOfSizes2D; i++ )
        for( size_t j = 0; j < numberOfSizes3D; j++ )
        {
-            set_image_dimensions( &dstImageInfo, sizes[ idx ][ 0 ], sizes[ idx ][ 1 ], sizes[ idx ][ 2 ], rowPadding, slicePadding );
+            size_t rowPadding = gEnablePitch ? 256 : 0;
-            set_image_dimensions( &srcImageInfo, (size_t)random_in_range( (int)sizes[ idx ][ 0 ], (int)maxWidth/4, seed ), (size_t)random_in_range( (int)sizes[ idx ][ 1 ], (int)maxHeight/4, seed ), 0, rowPadding, slicePadding );
+            size_t slicePadding = gEnablePitch ? 3 : 0;
            set_image_dimensions( &dstImageInfo, sizes3D[ j ][ 0 ], sizes3D[ j ][ 1 ], sizes3D[ j ][ 2 ], rowPadding, slicePadding );
            set_image_dimensions( &srcImageInfo, sizes2D[ i ][ 0 ], sizes2D[ i ][ 1 ], 0, rowPadding, slicePadding );
            cl_ulong dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.depth * 4;
-            if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) )
+            cl_ulong srcSize = (cl_ulong)srcImageInfo.rowPitch * (cl_ulong)srcImageInfo.height * 4;
            if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) && srcSize < maxAllocSize && srcSize < ( memSize / 3 ) )
            {
                log_info( "Testing %d x %d to %d x %d x %d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.depth );
                if( gDebugTrace )
@@ -216,6 +230,9 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong srcSize, dstSize;
            size_t rowPadding = gEnablePitch ? 256 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
--- a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
@@ -42,8 +42,6 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
    cl_ulong maxAllocSize, memSize;
    image_descriptor imageInfo = { 0 };
    RandomSeed seed( gRandomSeed );
    size_t rowPadding = gEnablePitch ? 80 : 0;
    size_t slicePadding = gEnablePitch ? 3 : 0;
    size_t pixelSize;
    imageInfo.format = format;
@@ -57,10 +55,17 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D array size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
        {
            size_t rowPadding = gEnablePitch ? 80 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
            if (gEnablePitch)
@@ -95,6 +100,9 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
        {
            size_t rowPadding = gEnablePitch ? 80 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            imageInfo.width = sizes[ idx ][ 0 ];
            imageInfo.height = sizes[ idx ][ 1 ];
            imageInfo.arraySize = sizes[ idx ][ 2 ];
@@ -108,7 +116,7 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
                } while ((imageInfo.rowPitch % pixelSize) != 0);
            }
-            imageInfo.slicePitch = imageInfo.height * (imageInfo.rowPitch + slicePadding);
+            imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
            if( gDebugTrace )
                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
@@ -121,6 +129,9 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong size;
            size_t rowPadding = gEnablePitch ? 80 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
--- a/test_conformance/images/clCopyImage/test_copy_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp
@@ -42,8 +42,6 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
    cl_ulong maxAllocSize, memSize;
    image_descriptor imageInfo = { 0 };
    RandomSeed seed( gRandomSeed );
    size_t rowPadding = gEnablePitch ? 80 : 0;
    size_t slicePadding = gEnablePitch ? 3 : 0;
    size_t pixelSize;
    imageInfo.format = format;
@@ -57,10 +55,17 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
        {
            size_t rowPadding = gEnablePitch ? 80 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
            if (gEnablePitch)
@@ -94,6 +99,9 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
        {
            size_t rowPadding = gEnablePitch ? 80 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            imageInfo.width = sizes[ idx ][ 0 ];
            imageInfo.height = sizes[ idx ][ 1 ];
            imageInfo.depth = sizes[ idx ][ 2 ];
@@ -107,7 +115,7 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
                } while ((imageInfo.rowPitch % pixelSize) != 0);
            }
-            imageInfo.slicePitch = imageInfo.height * (imageInfo.rowPitch + slicePadding);
+            imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
            if( gDebugTrace )
                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
@@ -120,6 +128,9 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong size;
            size_t rowPadding = gEnablePitch ? 80 : 0;
            size_t slicePadding = gEnablePitch ? 3 : 0;
            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
--- a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
@@ -167,6 +167,10 @@ int test_copy_image_set_3D_2D_array( cl_device_id device, cl_image_format *forma
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D image array or 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
--- a/test_conformance/images/clFillImage/test_fill_1D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D.cpp
@@ -72,9 +72,10 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
 {
    size_t maxWidth;
    cl_ulong maxAllocSize, memSize;
-    image_descriptor imageInfo;
+    image_descriptor imageInfo = {0};
    RandomSeed seed(gRandomSeed);
-    size_t rowPadding = gEnablePitch ? 48 : 0;
+    size_t rowPadding_default = 48;
    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
    size_t pixelSize;
    memset(&imageInfo, 0x0, sizeof(image_descriptor));
@@ -87,6 +88,10 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if ( gTestSmallImages )
    {
        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
@@ -95,6 +100,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -124,6 +130,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -152,6 +159,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
                if (gEnablePitch)
                {
                  rowPadding = rowPadding_default;
                  do {
                    rowPadding++;
                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
--- a/test_conformance/images/clFillImage/test_fill_1D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
@@ -74,9 +74,10 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
 {
    size_t maxWidth, maxArraySize;
    cl_ulong maxAllocSize, memSize;
-    image_descriptor imageInfo;
+    image_descriptor imageInfo = {0};
    RandomSeed seed(gRandomSeed);
-    size_t rowPadding = gEnablePitch ? 48 : 0;
+    size_t rowPadding_default = 48;
    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
    size_t pixelSize;
    memset(&imageInfo, 0x0, sizeof(image_descriptor));
@@ -90,6 +91,10 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 1D array size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if ( gTestSmallImages )
    {
        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
@@ -98,6 +103,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -132,6 +138,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -162,6 +169,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
                if (gEnablePitch)
                {
                  rowPadding = rowPadding_default;
                  do {
                    rowPadding++;
                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
--- a/test_conformance/images/clFillImage/test_fill_2D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D.cpp
@@ -74,9 +74,10 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
 {
    size_t maxWidth, maxHeight;
    cl_ulong maxAllocSize, memSize;
-    image_descriptor imageInfo;
+    image_descriptor imageInfo = {0};
    RandomSeed seed(gRandomSeed);
-    size_t rowPadding = gEnablePitch ? 48 : 0;
+    size_t rowPadding_default = 48;
    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
    size_t pixelSize;
    memset(&imageInfo, 0x0, sizeof(image_descriptor));
@@ -90,6 +91,10 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if ( gTestSmallImages )
    {
        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
@@ -98,6 +103,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -131,6 +137,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -160,6 +167,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
                if (gEnablePitch)
                {
                  rowPadding = rowPadding_default;
                  do {
                    rowPadding++;
                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
--- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
@@ -75,9 +75,10 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
 {
    size_t maxWidth, maxHeight, maxArraySize;
    cl_ulong maxAllocSize, memSize;
-    image_descriptor imageInfo;
+    image_descriptor imageInfo = {0};
    RandomSeed seed( gRandomSeed );
-    size_t rowPadding = gEnablePitch ? 80 : 0;
+    size_t rowPadding_default = 80;
    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
    size_t slicePadding = gEnablePitch ? 3 : 0;
    size_t pixelSize;
@@ -93,6 +94,10 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D array size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if ( gTestSmallImages )
    {
        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
@@ -101,6 +106,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -137,6 +143,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -169,6 +176,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
                if (gEnablePitch)
                {
                  rowPadding = rowPadding_default;
                  do {
                    rowPadding++;
                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
--- a/test_conformance/images/clFillImage/test_fill_3D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_3D.cpp
@@ -75,9 +75,10 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
 {
    size_t maxWidth, maxHeight, maxDepth;
    cl_ulong maxAllocSize, memSize;
-    image_descriptor imageInfo;
+    image_descriptor imageInfo = {0};
    RandomSeed seed( gRandomSeed );
-    size_t rowPadding = gEnablePitch ? 80 : 0;
+    size_t rowPadding_default = 80;
    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
    size_t slicePadding = gEnablePitch ? 3 : 0;
    size_t pixelSize;
@@ -93,6 +94,10 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if ( gTestSmallImages )
    {
        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
@@ -101,6 +106,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -138,6 +144,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
            if (gEnablePitch)
            {
              rowPadding = rowPadding_default;
              do {
                rowPadding++;
                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
@@ -169,6 +176,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
                if (gEnablePitch)
                {
                  rowPadding = rowPadding_default;
                  do {
                    rowPadding++;
                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
--- a/test_conformance/images/clGetInfo/test_1D.cpp
+++ b/test_conformance/images/clGetInfo/test_1D.cpp
@@ -49,6 +49,10 @@ int test_get_image_info_1D( cl_device_id device, cl_image_format *format, cl_mem
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 1D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
+++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
@@ -47,6 +47,10 @@ int test_get_image_info_1D_array( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 1D array size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -291,6 +291,10 @@ int test_get_image_info_2D( cl_device_id device, cl_image_format *format, cl_mem
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D width or max image 3D height or max memory allocation size or global memory size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/clGetInfo/test_3D.cpp
+++ b/test_conformance/images/clGetInfo/test_3D.cpp
@@ -50,6 +50,10 @@ int test_get_image_info_3D( cl_device_id device, cl_image_format *format, cl_mem
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -138,6 +138,10 @@ int test_read_image_set_1D( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
@@ -144,6 +144,10 @@ int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
@@ -144,6 +144,10 @@ int test_read_image_set_2D( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
@@ -118,6 +118,10 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
@@ -118,6 +118,10 @@ int test_read_image_set_3D( cl_device_id device, cl_image_format *format )
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    if( gTestSmallImages )
    {
        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
--- a/test_conformance/images/image_helpers.cpp
+++ b/test_conformance/images/image_helpers.cpp
@@ -136,7 +136,7 @@ AddressFn   ClampToEdgeLinearFn
 volatile float gFloatHome;
 float           RepeatNormalizedAddressFn( float fValue, size_t maxValue )
 {
-#ifndef _MSC_VER // Use original if not the VS compiler.
+#if !defined( __i386__ ) && !defined( __x86_64__ ) // Use original if not the x86 compiler.
    // General computation for repeat
    return (fValue - floorf( fValue )) * (float) maxValue; // Reduce to [0, 1.f]
 #else // Otherwise, use this instead:
@@ -352,9 +352,14 @@ void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
      int x1_dim = (fixed_dim == 2) ? 1 : 2;
      // Choose two other sizes for these dimensions
      double x1 = fmin(fmin(A/M,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);
      double x0 = fmin(fmin(A/M,maximum_sizes[x0_dim]),other_sizes[(other_size++)%num_other_sizes]);
-      
+      // GPUs have certain restrictions on minimum width (row alignment) of images which has given us issues
      // testing small widths in this test (say we set width to 3 for testing, and compute size based on this width and decide
      // it fits within vram ... but GPU driver decides that, due to row alignment requirements, it has to use
      // width of 16 which doesnt fit in vram). For this purpose we are not testing width < 16 for this test.
      if(x0_dim == 0 && x0 < 16)
        x0 = 16;
      double x1 = fmin(fmin(A/M/x0,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);
      // Store the size
      sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
      sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
@@ -384,13 +389,6 @@ void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
  }
 }
 int issubnormal(float a) 
 {
    union { cl_int i; cl_float f; } u;
    u.f = a;
    return (u.i & 0x7f800000U) == 0;
 }
 float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
    if (sampler->filter_mode == CL_FILTER_NEAREST)
        return 0.0f;
@@ -1254,15 +1252,11 @@ bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset,
            case CL_ADDRESS_REPEAT:
                x = RepeatNormalizedAddressFn( x, width );
                if (height != 0) {
-                    if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                    if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
                        y *= (float)height+yAddressOffset;
                    else
                        y = RepeatNormalizedAddressFn( y, height );
                }
                if (depth != 0) {
-                    if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                    if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
                        z *= (float)depth+zAddressOffset;
                    else
                        z = RepeatNormalizedAddressFn( z, depth );
                }
@@ -1298,15 +1292,11 @@ bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset,
            case CL_ADDRESS_MIRRORED_REPEAT:
                x = MirroredRepeatNormalizedAddressFn( x, width );
                if (height != 0) {
-                    if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                    if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
                        y *= (float)height+yAddressOffset;
                    else
                        y = MirroredRepeatNormalizedAddressFn( y, height );
                }
                if (depth != 0) {
-                    if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                    if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
                        z *= (float)depth+zAddressOffset;
                    else
                        z = MirroredRepeatNormalizedAddressFn( z, depth );
                }
@@ -1334,9 +1324,11 @@ bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset,
            default:
                // Also, remultiply to the original coords. This simulates any truncation in
                // the pass to OpenCL
-                x *= (float)width+xAddressOffset;
+                x = (x * (float)width) + xAddressOffset;
-                y *= (float)height+yAddressOffset;
+                if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
-                z *= (float)depth+zAddressOffset;
+                  y = (y * (float)height) + yAddressOffset;
                if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
                  z = (z * (float)depth) + zAddressOffset;
                break;
        }
    }
@@ -1522,16 +1514,12 @@ FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *i
            // The image array types require special care:
            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                y = unnormalize_coordinate("array index", y, yAddressOffset, 
                    (float)imageInfo->arraySize, CL_ADDRESS_CLAMP_TO_EDGE, verbose);
                z = 0; // don't care -- unused for 1D arrays
                break;
            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
                y = unnormalize_coordinate("y", y, yAddressOffset, (float)imageInfo->height,
                    imageSampler->addressing_mode, verbose);
                z = unnormalize_coordinate("array index", z, zAddressOffset, 
                    (float)imageInfo->arraySize, CL_ADDRESS_CLAMP_TO_EDGE, verbose);
                break;
            // Everybody else:
--- a/test_conformance/images/image_helpers.h
+++ b/test_conformance/images/image_helpers.h
@@ -491,7 +491,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor *
 extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
 extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
 extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
 extern int issubnormal(float);
 #define errMax( _x , _y )       ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -748,6 +748,10 @@ int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( outputType == kInt )
        readFormat = "i";
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -855,6 +855,10 @@ int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D array size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( outputType == kInt )
        readFormat = "i";
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -31,6 +31,28 @@ extern cl_mem_flags gMemFlagsToUse;
 #define MAX_TRIES               1
 #define MAX_CLAMPED             1
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize) {
    size_t DimSize = maxDimSize/128;
    if (DimSize < (size_t) 16)
        return 16;
    else if (DimSize > (size_t) 64)
        return 64;
    else
        return DimSize;
 }
 static size_t reduceImageDepth(size_t maxDepth) {
    size_t Depth = maxDepth/32;
    if (Depth < (size_t) 8)
        return 8;
    else if (Depth > (size_t) 32)
        return 32;
    else
        return Depth;
 }
 const char *read2DArrayKernelSourcePattern =
 "__kernel void sample_kernel( read_only image2d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s4 *results )\n"
@@ -834,6 +856,10 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( outputType == kInt )
        readFormat = "i";
@@ -921,6 +947,9 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format,
    }
    else
    {
        int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
        int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
        int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong size;
@@ -928,9 +957,9 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format,
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
            {
-                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 128, seed );
+                imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
-                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 128, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
-                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, seed );
                imageInfo.rowPitch = imageInfo.width * pixelSize;
                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
--- a/test_conformance/images/kernel_read_write/test_read_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp
@@ -31,6 +31,24 @@ extern cl_mem_flags gMemFlagsToUse;
 #define MAX_TRIES               1
 #define MAX_CLAMPED             1
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize, RandomSeed& seed) {
  size_t DimSize = random_log_in_range(16, (int) maxDimSize/32, seed);
  if (DimSize > (size_t) 128)
    return 128;
  else
    return DimSize;
 }
 static size_t reduceImageDepth(size_t maxDimSize, RandomSeed& seed) {
  size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
  if (DimSize > (size_t) 32)
    return 32;
  else
    return DimSize;
 }
 const char *read3DKernelSourcePattern =
 "__kernel void sample_kernel( read_only image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s4 *results )\n"
@@ -837,6 +855,10 @@ int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( outputType == kInt )
        readFormat = "i";
@@ -931,9 +953,9 @@ int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
            {
-                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.width = reduceImageSizeRange(maxWidth, seed );
-                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.height = reduceImageSizeRange(maxHeight, seed );
-                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
+                imageInfo.depth = reduceImageDepth(maxDepth, seed );
                imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -413,6 +413,10 @@ int test_write_image_1D_set( cl_device_id device, cl_image_format *format, Expli
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( inputType == kInt )
        readFormat = "i";
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -422,6 +422,10 @@ int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( inputType == kInt )
        readFormat = "i";
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -30,6 +30,28 @@ extern cl_mem_flags gMemFlagsToUse;
 extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
                                ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize) {
    size_t DimSize = maxDimSize/32;
    if (DimSize < (size_t) 16)
        return 16;
    else if (DimSize > (size_t) 128)
        return 128;
    else
        return DimSize;
 }
 static size_t reduceImageDepth(size_t maxDepth) {
    size_t Depth = maxDepth/32;
    if (Depth < (size_t) 8)
        return 8;
    else if (Depth > (size_t) 32)
        return 32;
    else
        return Depth;
 }
 const char *write2DArrayKernelSourcePattern =
 "__kernel void sample_kernel( __global %s4 *input, write_only image2d_array_t output )\n"
 "{\n"
@@ -398,6 +420,10 @@ int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( inputType == kInt )
        readFormat = "i";
@@ -472,13 +498,16 @@ int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format,
        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
        {
            cl_ulong size;
            int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
            int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
            int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
            {
-                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, d );
-                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
+                imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, d );
-                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, d );
+                imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, d );
                imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -30,6 +30,24 @@ extern cl_mem_flags gMemFlagsToUse;
 extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
                                ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize, MTdata& seed) {
    size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
    if (DimSize > (size_t) 128)
        return 128;
    else
        return DimSize;
 }
 static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
    size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
    if (DimSize > (size_t) 32)
        return 32;
    else
        return DimSize;
 }
 const char *write3DKernelSourcePattern =
 "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"
 "__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output )\n"
@@ -397,6 +415,10 @@ int test_write_image_3D_set( cl_device_id device, cl_image_format *format, Expli
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( inputType == kInt )
        readFormat = "i";
@@ -475,9 +497,9 @@ int test_write_image_3D_set( cl_device_id device, cl_image_format *format, Expli
            // image, the result array, plus offset arrays, will fit in the global ram space
            do
            {
-                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                imageInfo.width = reduceImageSizeRange(maxWidth, d );
-                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
+                imageInfo.height = reduceImageSizeRange(maxHeight, d );
-                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, d );
+                imageInfo.depth = reduceImageDepth(maxDepth, d );
                imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -422,6 +422,10 @@ int test_write_image_set( cl_device_id device, cl_image_format *format, Explicit
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if( inputType == kInt )
        readFormat = "i";
--- a/test_conformance/images/samplerlessReads/test_read_1D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp
@@ -166,6 +166,10 @@ int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 1D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if ( outputType == kInt )
    {
--- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
@@ -172,6 +172,10 @@ int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if ( outputType == kInt )
    {
--- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
@@ -185,6 +185,11 @@ int test_read_image_set_1D_buffer( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth1D, NULL );
    test_error( error, "Unable to get max image 1D buffer size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // note: image_buffer test uses image1D for results validation.
    // So the test can't use the biggest possible size for image_buffer if it's bigger than the max image1D size
    maxWidth = (maxWidth > maxWidth1D) ? maxWidth1D : maxWidth;
--- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
@@ -156,6 +156,10 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format,
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 2D array size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if ( outputType == kInt )
    {
--- a/test_conformance/images/samplerlessReads/test_read_3D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp
@@ -161,6 +161,10 @@ int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_
    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
    test_error( error, "Unable to get max image 3D size from device" );
    if (memSize > (cl_ulong)SIZE_MAX) {
        memSize = (cl_ulong)SIZE_MAX;
    }
    // Determine types
    if ( outputType == kInt )
    {
--- a/test_conformance/integer_ops/test_int_basic_ops.c
+++ b/test_conformance/integer_ops/test_int_basic_ops.c
@@ -477,10 +477,42 @@ test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_
 {
    globalThreadData * pThreadInfo = NULL;
    cl_int result=0;
    cl_uint threadcount = GetThreadCount();
    // This test will run threadcount threads concurrently; each thread will
    // execute test_integer_ops() which will allocate 2 OpenCL buffers on the
    // device; each buffer has size num_elements * type_size * vectorSize. We
    // need to make sure that the total device memory allocated by all threads
    // does not exceed the maximum memory on the device. If it does, we decrease
    // num_elements until all threads combined will not over-subscribe device
    // memory.
    cl_ulong maxDeviceGlobalMem;
    result =
        clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
                        sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL);
    if (result != CL_SUCCESS) {
      log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n",
                result);
      return result;
    }
    if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) {
      maxDeviceGlobalMem = (cl_ulong)SIZE_MAX;
    }
    // Let's not take all device memory - reduce by 75%
    maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2;
    // Now reduce num_elements so that the total device memory usage does not
    // exceed 75% of global device memory.
    size_t type_size = get_explicit_type_size(type);
    while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize >
           maxDeviceGlobalMem) {
      num_elements >>= 1;
    }
    uint64_t startIndx = (uint64_t)0;
    uint64_t endIndx = (1ULL<<num_runs_shift);
    uint64_t jobcount = (endIndx-startIndx)/num_elements;
    cl_uint threadcount = GetThreadCount();
    if(jobcount==0)
    {
--- a/test_conformance/math_brute_force/main.c
+++ b/test_conformance/math_brute_force/main.c
@@ -590,6 +590,8 @@ static void PrintArch( void )
        vlog( "\tARCH:\tx86_64\n" );
    #elif defined( __arm__ )
        vlog( "\tARCH:\tarm\n" );
    #elif defined( __aarch64__ )
        vlog( "\tARCH:\taarch64\n" );
    #else
        vlog( "\tARCH:\tunknown\n" );
    #endif
--- a/test_conformance/math_brute_force/reference_math.c
+++ b/test_conformance/math_brute_force/reference_math.c
@@ -1763,7 +1763,7 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
 //    *signgamp = 1;
    ix = hx&0x7fffffff;
    if(ix>=0x7ff00000) return x*x;
-	if((ix|lx)==0) return one/zero;
+    if((ix|lx)==0) return INFINITY;
    if(ix<0x3b900000) {    /* |x|<2**-70, return -log(|x|) */
        if(hx<0) {
 //            *signgamp = -1;
@@ -1772,9 +1772,10 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
    }
    if(hx<0) {
        if(ix>=0x43300000)     /* |x|>=2**52, must be -integer */
-		return one/zero;
+        return INFINITY;
        t = reference_sinpi(x);
-	    if(t==zero) return one/zero; /* -integer */
+        if(t==zero)
        return INFINITY; /* -integer */
        nadj = reference_log(pi/reference_fabs(t*x));
 //        if(t<zero) *signgamp = -1;
        x = -x;
@@ -5414,5 +5415,3 @@ int reference_notl( long double x )
    int r = !x;
    return r;
 }
--- a/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
+++ b/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
--- a/test_conformance/printf/CMakeLists.txt
+++ b/test_conformance/printf/CMakeLists.txt
@@ -1,3 +1,5 @@
 add_compile_options(-std=c++11)
 add_executable(conformance_test_printf
 	test_printf.c
 	util_printf.c
--- a/test_conformance/printf/test_printf.c
+++ b/test_conformance/printf/test_printf.c
@@ -25,6 +25,7 @@
 #include <math.h>
 #include <string.h>
 #include <memory>
 #if ! defined( _WIN32)
 #include <sys/sysctl.h>
@@ -349,9 +350,6 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
 //-----------------------------------------
 static bool isLongSupported(cl_device_id device_id)
 {
 	//profile type && device extention for long support checking
 	char *profileType = NULL,*devExt = NULL;
    size_t tempSize = 0;
    cl_int status;
    bool extSupport = true;
@@ -370,7 +368,7 @@ static bool isLongSupported(cl_device_id device_id)
        return false;
    }
-	profileType = new char[tempSize];
+    std::unique_ptr<char[]> profileType(new char[tempSize]);
    if(profileType == NULL)
    {
        log_error("Failed to allocate memory(profileType)");
@@ -381,11 +379,11 @@ static bool isLongSupported(cl_device_id device_id)
        device_id,
        CL_DEVICE_PROFILE,
        sizeof(char) * tempSize,
-		profileType,
+        profileType.get(),
        NULL);
-	if(!strcmp("EMBEDDED_PROFILE",profileType))
+    if(!strcmp("EMBEDDED_PROFILE",profileType.get()))
    {
        // Device extention
        status = clGetDeviceInfo(
@@ -401,7 +399,7 @@ static bool isLongSupported(cl_device_id device_id)
            return false;
        }
-		devExt = new char[tempSize];
+        std::unique_ptr<char[]> devExt(new char[tempSize]);
        if(devExt == NULL)
        {
            log_error("Failed to allocate memory(devExt)");
@@ -412,16 +410,14 @@ static bool isLongSupported(cl_device_id device_id)
            device_id,
            CL_DEVICE_EXTENSIONS,
            sizeof(char) * tempSize,
-			devExt,
+            devExt.get(),
            NULL);
-		extSupport  = (strstr(devExt,"cles_khr_int64") != NULL);
+        extSupport  = (strstr(devExt.get(),"cles_khr_int64") != NULL);
 		delete devExt;
 		delete profileType;
    }
    return extSupport;
 }
 //-----------------------------------------
 // is64bAddressSpace
 //-----------------------------------------
@@ -455,7 +451,9 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
    int err;
    cl_program program;
    cl_kernel  kernel;
-	cl_mem d_out;
+    cl_mem d_out, d_a;
    int has_d_out = 0;
    int has_d_a = 0;
    char _analysisBuffer[ANALYSIS_BUFFER_SIZE];
   // Define an index space (global work size) of threads for execution.
@@ -474,12 +472,13 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
        if(isKernelArgument(allTestCase[testId],testNum))
        {
            int a = 2;
-			cl_mem d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
+            d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
                sizeof(int), &a, &err);
            if(err!= CL_SUCCESS || d_a == NULL) {
                log_error("clCreateBuffer failed\n");
                goto exit;
            }
            has_d_a = 1;
            err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
            if(err!= CL_SUCCESS) {
                log_error("clSetKernelArg failed\n");
@@ -490,11 +489,12 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
        if(isKernelPFormat(allTestCase[testId],testNum))
        {
            d_out = clCreateBuffer(context, CL_MEM_READ_WRITE,
-				sizeof(long), NULL, &err);
+            sizeof(cl_long), NULL, &err);
            if(err!= CL_SUCCESS || d_out == NULL) {
                log_error("clCreateBuffer failed\n");
                goto exit;
            }
            has_d_out = 1;
            err  = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_out);
            if(err!= CL_SUCCESS) {
                log_error("clSetKernelArg failed\n");
@@ -562,6 +562,12 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
            err = ++s_test_fail;
    }
 exit:
  if(has_d_out)
    if(clReleaseMemObject(d_out) != CL_SUCCESS)
      log_error("clReleaseMemObject failed\n");
    if(has_d_a)
      if(clReleaseMemObject(d_a) != CL_SUCCESS)
        log_error("clReleaseMemObject failed\n");
    if(clReleaseKernel(kernel) != CL_SUCCESS)
        log_error("clReleaseKernel failed\n");
    if(clReleaseProgram(program) != CL_SUCCESS)
@@ -598,6 +604,8 @@ static void printArch( void )
    log_info( "ARCH:\tx86_64\n" );
 #elif defined( __arm__ )
    log_info( "ARCH:\tarm\n" );
 #elif defined( __aarch64__ )
    vlog( "\tARCH:\taarch64\n" );
 #else
 #error unknown arch
 #endif
--- a/test_conformance/printf/util_printf.c
+++ b/test_conformance/printf/util_printf.c
@@ -618,11 +618,6 @@ struct printDataGenParameters printStringGenParameters[] = {
    //%% specification
    {"%s","\"%%\""},
 	//null string
 	{"%s","(void*)0"}
 };
 //---------------------------------------------------------
@@ -638,9 +633,6 @@ const char * correctBufferString[] = {
    "f",
    "%%",
 	"(null)"
 };
 //---------------------------------------------------------
@@ -865,10 +857,8 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
        char* eCorrectBuffer = strstr((char*)pTestCase->_correctBuffer[testId],correctExp);
        if(eCorrectBuffer == NULL)
            return false;
        eCorrectBuffer+=2;
        exp += 2;
        //Exponent always contains at least two digits
        if(strlen(exp) < 2)
            return false;
@@ -878,7 +868,10 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
    return strcmp(eCorrectBuffer,exp);
    }
    if(!strcmp(pTestCase->_correctBuffer[testId],"inf"))
-       return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity");
+        return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf");
    if(!strcmp(pTestCase->_correctBuffer[testId],"nan") || !strcmp(pTestCase->_correctBuffer[testId],"-nan")) {
        return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)")&&strcmp(analysisBuffer,"-nan(ind)");
    }
    return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]);
 }
--- a/test_conformance/run_conformance.py
+++ b/test_conformance/run_conformance.py
--- a/test_conformance/select/CMakeLists.txt
+++ b/test_conformance/select/CMakeLists.txt
@@ -5,6 +5,8 @@ add_executable(conformance_test_select
        ../../test_common/harness/msvc9.c
        ../../test_common/harness/kernelHelpers.c
        ../../test_common/harness/errorHelpers.c
        ../../test_common/harness/parseParameters.cpp
        ../../test_common/harness/testHarness.c
 )
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" AND NOT MSVC)
@@ -12,15 +14,14 @@ set_source_files_properties(
        COMPILE_FLAGS -msse2)
 endif()
 if(WIN32)
 set_source_files_properties(
        test_select.c
        util_select.c
        ../../test_common/harness/msvc9.c
        ../../test_common/harness/kernelHelpers.c
        ../../test_common/harness/errorHelpers.c
        ../../test_common/harness/testHarness.c
        PROPERTIES LANGUAGE CXX)
 endif(WIN32)
 TARGET_LINK_LIBRARIES(conformance_test_select
        ${CLConform_LIBRARIES})
--- a/test_conformance/select/test_select.c
+++ b/test_conformance/select/test_select.c
@@ -34,14 +34,11 @@
 #include <limits.h>
 #include "test_select.h"
 #if defined(_WIN32)
 #include "../../test_common/harness/testHarness.h"
 #endif
 #include "../../test_common/harness/kernelHelpers.h"
 #include "../../test_common/harness/mt19937.h"
-cl_uint gRandomSeed = 0;
+#include "../../test_common/harness/parseParameters.h"
 cl_uint gIsEmbedded = 0;
 //-----------------------------------------
 // Static functions
@@ -79,6 +76,7 @@ static int doTest(cl_command_queue queue, cl_context context,
 // range.  Otherwise, we test a subset of the range
 // [-min_short, min_short]
 static bool  s_wimpy_mode = false;
 static int s_wimpy_reduction_factor = 256;
 // Tests are broken into the major test which is based on the
 // src and cmp type and their corresponding vector types and
@@ -352,7 +350,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
    cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE;
    size_t block_elements = BUFFER_SIZE / type_size[stype];
-    size_t step = s_wimpy_mode ? 256 : 1;
+    size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1;
    cl_ulong cmp_stride = block_elements * step;
    // It is more efficient to create the tests all at once since we
@@ -519,6 +517,7 @@ static void printUsage( void )
    log_info("test_select:  [-cghw] [test_name|start_test_num] \n");
    log_info("  default is to run the full test on the default device\n");
    log_info("  -w run in wimpy mode (smoke test)\n");
    log_info("  -[2^n] Set wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", s_wimpy_reduction_factor);
    log_info("  test_name will run only one test of that name\n");
    log_info("  start_test_num will start running from that num\n");
 }
@@ -539,6 +538,8 @@ static void printArch( void )
    log_info( "ARCH:\tx86_64\n" );
 #elif defined( __arm__ )
    log_info( "ARCH:\tarm\n" );
 #elif defined( __aarch64__ )
    log_info( "ARCH:\taarch64\n" );
 #else
 #error unknown arch
 #endif
@@ -554,12 +555,6 @@ static void printArch( void )
 #endif
 }
 void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
 {
    log_info( "%s\n", errinfo );
 }
 //-----------------------------------------
 // main
 //-----------------------------------------
@@ -620,6 +615,9 @@ int main(int argc, char* argv[]) {
                    case 'w':  // Wimpy mode
                        s_wimpy_mode = true;
                        break;
                    case '[':
                        parseWimpyReductionFactor(arg, s_wimpy_reduction_factor);
                        break;
                    default:
                        log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
                        printUsage();
@@ -694,6 +692,7 @@ int main(int argc, char* argv[]) {
        log_info("*** WARNING: Testing in Wimpy mode!                     ***\n");
        log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n");
        log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n");
        log_info("*** Wimpy Reduction Factor: %-27u ***\n\n", s_wimpy_reduction_factor);
    }
    cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL);
--- a/test_conformance/spir/CMakeLists.txt
+++ b/test_conformance/spir/CMakeLists.txt
@@ -0,0 +1,94 @@
 function (install_spir_artifacts suite_name)
  install(FILES "${suite_name}.zip" DESTINATION "${CLConf_OUT_DIR}"
          COMPONENT OpenCLCTS)
 endfunction()
 add_executable(
    conformance_test_spir
    main.cpp
    datagen.cpp
    run_build_test.cpp
    run_services.cpp
    kernelargs.cpp
    ../../test_common/harness/errorHelpers.c
    ../../test_common/harness/kernelHelpers.c
    ../../test_common/harness/mt19937.c
    ../../test_common/harness/msvc9.c
    ../../test_common/harness/os_helpers.cpp
    ../../test_common/harness/testHarness.c
    ../../test_common/miniz/miniz.c)
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" AND NOT MSVC)
 set_source_files_properties(
        COMPILE_FLAGS -msse2)
 endif()
 if(UNIX)
    set_target_properties(conformance_test_spir PROPERTIES
 	   COMPILE_FLAGS "-fexceptions -frtti")
 elseif(MSVC)
    set_target_properties(conformance_test_spir PROPERTIES
       COMPILE_FLAGS "/GR /EHs /EHc")
 endif()
 TARGET_LINK_LIBRARIES(conformance_test_spir
 	${CLConform_LIBRARIES})
 # Need to copy the spir zips to sit beside the executable
 add_custom_command(TARGET conformance_test_spir POST_BUILD
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/khr.csv" "$<TARGET_FILE_DIR:conformance_test_spir>/khr.csv"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/api.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/api.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/atomics.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/atomics.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/basic.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/basic.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/compile_and_link.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/compile_and_link.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/commonfns.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/commonfns.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/conversions.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/conversions.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/geometrics.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/geometrics.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/enum_values.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/enum_values.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/half.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/half.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/kernel_attributes.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/kernel_attributes.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_methods.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/kernel_image_methods.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/images_kernel_read_write.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/images_kernel_read_write.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/images_samplerlessRead.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/images_samplerlessRead.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/integer_ops.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/integer_ops.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/math_brute_force.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/math_brute_force.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/printf.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/printf.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/profiling.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/profiling.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/relationals.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/relationals.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/select.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/select.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/sampler_enumeration.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/sampler_enumeration.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/vec_align.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/vec_align.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/vec_step.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/vec_step.zip"
 	COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/binary_type.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/binary_type.zip")
 add_dependencies(OpenCLCTS conformance_test_spir)
 install(TARGETS conformance_test_spir
    DESTINATION "${CLConf_OUT_DIR}"
    COMPONENT OpenCLCTS)
 install_spir_artifacts(api)
 install_spir_artifacts(atomics)
 install_spir_artifacts(basic)
 install_spir_artifacts(compile_and_link)
 install_spir_artifacts(commonfns)
 install_spir_artifacts(conversions)
 install_spir_artifacts(geometrics)
 install_spir_artifacts(enum_values)
 install_spir_artifacts(half)
 install_spir_artifacts(kernel_attributes)
 install_spir_artifacts(kernel_image_methods)
 install_spir_artifacts(images_kernel_read_write)
 install_spir_artifacts(images_samplerlessRead)
 install_spir_artifacts(integer_ops)
 install_spir_artifacts(math_brute_force)
 install_spir_artifacts(printf)
 install_spir_artifacts(profiling)
 install_spir_artifacts(relationals)
 install_spir_artifacts(select)
 install_spir_artifacts(sampler_enumeration)
 install_spir_artifacts(vec_align)
 install_spir_artifacts(vec_step)
 install_spir_artifacts(binary_type)
 install(FILES "khr.csv" DESTINATION "${CLConf_OUT_DIR}" COMPONENT OpenCLCTS)
 #Add any other runtime directories you need here.
 # end of file #
--- a/test_conformance/spir/Makefile
+++ b/test_conformance/spir/Makefile
@@ -0,0 +1,45 @@
 ifdef BUILD_WITH_ATF
 ATF = -framework ATF
 USE_ATF = -DUSE_ATF
 endif
 SRCS = main.cpp datagen.cpp kernelargs.cpp run_build_test.cpp run_services.cpp \
 			../../test_common/miniz/miniz.c \
 			../../test_common/harness/testHarness.c \
 			../../test_common/harness/errorHelpers.c \
 			../../test_common/harness/typeWrappers.cpp \
 			../../test_common/harness/mt19937.c \
 			../../test_common/harness/os_helpers.c \
 			../../test_common/harness/kernelHelpers.c
 SOURCES = $(abspath $(SRCS))
 LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
 LIBPATH += -L.
 FRAMEWORK = ${SOURCES}
 HEADERS = 
 TARGET = test_spir
 INCLUDE = 
 COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
 #COMPILERFLAGS = -c -Wall -g -DUSE_LOCAL_THREADS
 CC = c++
 CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
 CXXFLAGS= $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
 LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
 OBJECTS := ${SOURCES:.c=.o}
 OBJECTS := ${OBJECTS:.cpp=.o}
 TARGETOBJECT =
 all: $(TARGET)
 $(TARGET): $(OBJECTS)
 	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
 clean:
 	rm -f $(TARGET) $(OBJECTS)
 .DEFAULT:
 	@echo The target \"$@\" does not exist in Makefile.
--- a/test_conformance/spir/api.zip
+++ b/test_conformance/spir/api.zip
--- a/test_conformance/spir/atomics.zip
+++ b/test_conformance/spir/atomics.zip
--- a/test_conformance/spir/basic.zip
+++ b/test_conformance/spir/basic.zip
--- a/test_conformance/spir/binary_type.zip
+++ b/test_conformance/spir/binary_type.zip
--- a/test_conformance/spir/commonfns.zip
+++ b/test_conformance/spir/commonfns.zip
--- a/test_conformance/spir/compile_and_link.zip
+++ b/test_conformance/spir/compile_and_link.zip
--- a/Show More
+++ b/Show More