diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b975bb5..207b8789 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,7 +18,7 @@ set(CLConform_VERSION "${CLConform_VERSION_MAJOR}.${CLConform_VERSION_MINOR}")
 set(CLConform_VERSION_FULL
     "${CLConform_VERSION}.${CLConform_VERSION_MICRO}${CLConform_VERSION_EXTRA}")
 
-cmake_minimum_required(VERSION 2.8)
+cmake_minimum_required(VERSION 3.1)
 
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS=1)
@@ -77,6 +77,11 @@ else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
 endif()
 
+# Clang gives C++11 narrowing warnings so surpress these for now
+if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-narrowing")
+endif()
+
 list(APPEND CLConform_LIBRARIES ${OPENCL_LIBRARIES})
 if(ANDROID)
     list(APPEND CLConform_LIBRARIES m)
diff --git a/build_android.py b/build_android.py
old mode 100644
new mode 100755
index 2bd7f5c8..e1b8c1e9
--- a/build_android.py
+++ b/build_android.py
@@ -108,7 +108,9 @@ def install_android_cmake():
     print "input: "
     if get_input():
         print("installing android-cmake")
-        subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake'])
+        #subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake'])
+        # Use a newer fork of android-cmake which has been updated to support Clang. GCC is deprecated in newer NDKs and C11 atomics conformance doesn't build with NDK > 10.
+        subprocess.call(['git', 'clone', 'https://github.com/daewoong-jang/android-cmake'])
         args.android_cmake = os.path.join(args.src_dir,"android-cmake")
     else:
         exit()
diff --git a/build_lnx.sh b/build_lnx.sh
old mode 100644
new mode 100755
diff --git a/clean_tests.py b/clean_tests.py
old mode 100644
new mode 100755
diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
index b0d3cf75..23445c8e 100644
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -207,7 +207,10 @@ long double roundl(long double x);
         int cf_signbit(double x);
         int cf_signbitf(float x);
 
+// Added in _MSC_VER == 1800 (Visual Studio 2013)
+#if _MSC_VER < 1800
         static int signbit(double x) { return  cf_signbit(x); }
+#endif
         static int signbitf(float x) { return cf_signbitf(x); }
 
 long int lrint (double flt);
@@ -241,8 +244,11 @@ int32_t float2int (float   fx);
 // stdio.h
 //
 
-#if defined( _MSC_VER )
-    #define snprintf   sprintf_s
+#if defined(_MSC_VER)
+	// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
+	#if _MSC_VER < 1900
+		#define snprintf   sprintf_s
+	#endif
 #endif
 
 
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 7fe7945c..cdb7028c 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -479,16 +479,6 @@ struct AddressingTable
 
 static AddressingTable  sAddressingTable;
 
-bool alpha_is_x(cl_image_format *format){
-    switch (format->image_channel_order) {
-        case CL_RGBx:
-        case CL_sRGBx:
-            return true;
-        default:
-            return false;
-    }
-}
-
 bool is_sRGBA_order(cl_channel_order image_channel_order){
     switch (image_channel_order) {
         case CL_sRGB:
@@ -508,19 +498,21 @@ int has_alpha(cl_image_format *format) {
         case CL_R:
             return 0;
         case CL_A:
-        case CL_Rx:
             return 1;
+        case CL_Rx:
+            return 0;
         case CL_RG:
             return 0;
         case CL_RA:
-        case CL_RGx:
             return 1;
+        case CL_RGx:
+            return 0;
         case CL_RGB:
         case CL_sRGB:
             return 0;
         case CL_RGBx:
         case CL_sRGBx:
-            return 1;
+            return 0;
         case CL_RGBA:
             return 1;
         case CL_BGRA:
@@ -719,13 +711,6 @@ void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
   }
 }
 
-int issubnormal(float a)
-{
-    union { cl_int i; cl_float f; } u;
-    u.f = a;
-    return (u.i & 0x7f800000U) == 0;
-}
-
 float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
     if (sampler->filter_mode == CL_FILTER_NEAREST)
         return 0.0f;
@@ -1412,16 +1397,9 @@ void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
                || ( depth_lod != 0 && z >= (int)depth_lod )
                || ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) )
     {
-        // Border color
-        if (imageInfo->format->image_channel_order == CL_DEPTH)
-        {
-            outData[ 0 ] = 0;
-        }
-        else {
             outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
             if (!has_alpha(imageInfo->format))
-                outData[3] = alpha_is_x(imageInfo->format) ? 0 : 1;
-        }
+                outData[3] = 1;
         return;
     }
 
diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
index 83ff1377..ba80ea77 100644
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h
@@ -136,8 +136,6 @@ extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *d
 
 int has_alpha(cl_image_format *format);
 
-extern bool alpha_is_x(cl_image_format *format);
-
 extern bool is_sRGBA_order(cl_channel_order image_channel_order);
 
 inline float calculate_array_index( float coord, float extent );
@@ -594,7 +592,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor *
 extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
 extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
 extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
-extern int issubnormal(float);
 
 
 #define errMax( _x , _y )       ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
diff --git a/test_common/harness/kernelHelpers.c b/test_common/harness/kernelHelpers.c
index 77c40571..ce7fd7fe 100644
--- a/test_common/harness/kernelHelpers.c
+++ b/test_common/harness/kernelHelpers.c
@@ -645,14 +645,14 @@ size_t get_pixel_bytes( const cl_image_format *fmt )
     return 0;
 }
 
-int verifyImageSupport( cl_device_id device )
+test_status verifyImageSupport( cl_device_id device )
 {
     if( checkForImageSupport( device ) )
     {
         log_error( "ERROR: Device does not supported images as required by this test!\n" );
-        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+        return TEST_FAIL;
     }
-    return 0;
+    return TEST_PASS;
 }
 
 int checkForImageSupport( cl_device_id device )
diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h
index 692f17b3..9feaca87 100644
--- a/test_common/harness/kernelHelpers.h
+++ b/test_common/harness/kernelHelpers.h
@@ -17,6 +17,7 @@
 #define _kernelHelpers_h
 
 #include "compat.h"
+#include "testHarness.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -90,8 +91,8 @@ extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl
 /* Helper to get pixel size for a pixel format */
 size_t get_pixel_bytes( const cl_image_format *fmt );
 
-/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
-extern int verifyImageSupport( cl_device_id device );
+/* Verify the given device supports images. */
+extern test_status verifyImageSupport( cl_device_id device );
 
 /* Checks that the given device supports images. Same as verify, but doesn't print an error */
 extern int checkForImageSupport( cl_device_id device );
diff --git a/test_common/harness/msvc9.c b/test_common/harness/msvc9.c
index 093bb978..3b168453 100644
--- a/test_common/harness/msvc9.c
+++ b/test_common/harness/msvc9.c
@@ -566,6 +566,7 @@ long int lrintf (float x)
 //
 ///////////////////////////////////////////////////////////////////
 
+#if _MSC_VER < 1900
 int fetestexcept(int excepts)
 {
     unsigned int status = _statusfp();
@@ -583,6 +584,7 @@ int feclearexcept(int excepts)
     _clearfp();
     return 0;
 }
+#endif
 
 #endif // __INTEL_COMPILER
 
diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
new file mode 100644
index 00000000..13e5c05d
--- /dev/null
+++ b/test_common/harness/parseParameters.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "parseParameters.h"
+#include "errorHelpers.h"
+#include <string.h>
+
+bool is_power_of_two(int number)
+{
+    return number && !(number & (number - 1));
+}
+
+extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor)
+{
+    const char *arg_temp = strchr(&arg[1], ']');
+    if (arg_temp != 0)
+    {
+        int new_factor = atoi(&arg[1]);
+        arg = arg_temp; // Advance until ']'
+        if (is_power_of_two(new_factor))
+        {
+            log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor);
+            wimpyReductionFactor = new_factor;
+        }
+        else
+        {
+            log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor);
+        }
+    }
+}
diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h
new file mode 100644
index 00000000..212a9764
--- /dev/null
+++ b/test_common/harness/parseParameters.h
@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _parseParameters_h
+#define _parseParameters_h
+
+#include "compat.h"
+#include <string>
+
+extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor);
+
+#endif // _parseParameters_h
diff --git a/test_common/harness/testHarness.c b/test_common/harness/testHarness.c
index c1b8bfa0..30358df9 100644
--- a/test_common/harness/testHarness.c
+++ b/test_common/harness/testHarness.c
@@ -135,7 +135,7 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
         log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
         log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
 
-        for( i = 0; i < num_fns - 1; i++ )
+        for( i = 0; i < num_fns; i++ )
         {
             log_info( "\t\t%s\n", fnNames[ i ] );
         }
@@ -431,10 +431,18 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
 
 
     /* If we have a device checking function, run it */
-    if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
+    if( ( deviceCheckFn != NULL ) )
     {
-        test_finish();
-        return -1;
+        test_status status = deviceCheckFn( device );
+        switch (status)
+        {
+            case TEST_PASS:
+                break;
+            case TEST_FAIL:
+                return 1;
+            case TEST_SKIP:
+                return 0;
+        }
     }
 
     if (num_elements <= 0)
diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h
index c2620647..7c1d9cb2 100644
--- a/test_common/harness/testHarness.h
+++ b/test_common/harness/testHarness.h
@@ -23,6 +23,13 @@
 extern "C" {
 #endif
 
+typedef enum test_status
+{
+    TEST_PASS = 0,
+    TEST_FAIL = 1,
+    TEST_SKIP = 2,
+} test_status;
+
 extern cl_uint gReSeed;
 extern cl_uint gRandomSeed;
 
@@ -32,8 +39,8 @@ extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
                             basefn fnList[], const char *fnNames[],
                             int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
 
-// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
-typedef int (*DeviceCheckFn)( cl_device_id device );
+// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits.
+typedef test_status (*DeviceCheckFn)( cl_device_id device );
 
 // Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
 extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt
index 62a3b62c..694bcc2a 100644
--- a/test_conformance/SVM/CMakeLists.txt
+++ b/test_conformance/SVM/CMakeLists.txt
@@ -1,3 +1,4 @@
+set(CMAKE_CXX_STANDARD 11)
 set(MODULE_NAME SVM)
 
 set(${MODULE_NAME}_SOURCES
diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp
index 209b1ed2..8ab0c392 100644
--- a/test_conformance/SVM/main.cpp
+++ b/test_conformance/SVM/main.cpp
@@ -226,7 +226,7 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt
     }
 
     error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
-    test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING");
+    test_error(error,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
     if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER |  CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
     {
       log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
diff --git a/test_conformance/SVM/test_allocate_shared_buffer.cpp b/test_conformance/SVM/test_allocate_shared_buffer.cpp
index 7d555c8d..14262cb7 100644
--- a/test_conformance/SVM/test_allocate_shared_buffer.cpp
+++ b/test_conformance/SVM/test_allocate_shared_buffer.cpp
@@ -97,6 +97,10 @@ int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_c
           log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
           return -1;
         }
+        err = clEnqueueUnmapMemObject(queues[0], buf, pBufData2, 0, NULL, NULL);
+        test_error(err, "clEnqueueUnmapMemObject failed");
+        err = clFinish(queues[0]);
+        test_error(err, "clFinish failed");
       }
     }
 
diff --git a/test_conformance/SVM/test_enqueue_api.cpp b/test_conformance/SVM/test_enqueue_api.cpp
index 6a04e955..f92872d6 100644
--- a/test_conformance/SVM/test_enqueue_api.cpp
+++ b/test_conformance/SVM/test_enqueue_api.cpp
@@ -17,10 +17,15 @@
 #include "../../test_common/harness/mt19937.h"
 
 #include <vector>
+#include <atomic>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif 
 
 typedef struct
 {
-  cl_uint status;
+  std::atomic<cl_uint> status;
   cl_uint num_svm_pointers;
   std::vector<void *> svm_pointers;
 } CallbackData;
@@ -62,7 +67,7 @@ void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_point
     clSVMFree(context, svm_pointers[i]);
   }
 
-  data->status = 1;
+  data->status.store(1, std::memory_order_release);
 }
 
 int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
@@ -231,7 +236,9 @@ int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue
   test_error(error, "clFinish failed");
 
   //wait for the callback
-  while(data.status == 0) { }
+  while(data.status.load(std::memory_order_acquire) == 0) { 
+    usleep(1);
+  }
 
   //check if number of SVM pointers returned in the callback matches with expected
   if (data.num_svm_pointers != buffers.size())
diff --git a/test_conformance/api/test_api_min_max.c b/test_conformance/api/test_api_min_max.c
index 36046751..ab3f10ca 100644
--- a/test_conformance/api/test_api_min_max.c
+++ b/test_conformance/api/test_api_min_max.c
@@ -1303,7 +1303,6 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
     int error;
     clProgramWrapper program;
     clKernelWrapper kernel;
-    clMemWrapper            streams[3];
     size_t    threads[1], localThreads[1];
     cl_int *constantData, *resultData;
     cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
@@ -1324,12 +1323,12 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
 
     log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
 
-    // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
+    // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE
     error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
     test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
 
-    if (maxSize > maxGlobalSize / 4)
-        maxSize = maxGlobalSize / 4;
+    if (maxSize > maxGlobalSize / 8)
+        maxSize = maxGlobalSize / 8;
 
     error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
     test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
@@ -1358,6 +1357,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
         for(i=0; i<(int)(numberOfInts); i++)
             constantData[i] = (int)genrand_int32(d);
 
+        clMemWrapper streams[3];
         streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error);
         test_error( error, "Creating test array failed" );
         streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
@@ -1427,7 +1427,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
 
     if (allocPassed) {
         if (currentSize < maxSize/PASSING_FRACTION) {
-            log_error("Failed to allocate at least 1/4 of the reported constant size.\n");
+            log_error("Failed to allocate at least 1/8 of the reported constant size.\n");
             return -1;
         } else if (currentSize != maxSize) {
             log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);
diff --git a/test_conformance/basic/run_array b/test_conformance/basic/run_array
old mode 100644
new mode 100755
diff --git a/test_conformance/basic/run_array_image_copy b/test_conformance/basic/run_array_image_copy
old mode 100644
new mode 100755
diff --git a/test_conformance/basic/run_image b/test_conformance/basic/run_image
old mode 100644
new mode 100755
diff --git a/test_conformance/basic/run_multi_read_image b/test_conformance/basic/run_multi_read_image
old mode 100644
new mode 100755
diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp
index ec65101d..ca657787 100644
--- a/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/basic/test_async_strided_copy.cpp
@@ -209,14 +209,13 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
             log_error( "ERROR: Results of copy did not validate!\n" );
             sprintf(values + strlen( values), "%d -> [", i);
             for (int j=0; j<(int)elementSize; j++)
-                sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
+                sprintf(values + strlen( values), "%2x ", inchar[j]);
             sprintf(values + strlen(values), "] != [");
             for (int j=0; j<(int)elementSize; j++)
-                sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
+                sprintf(values + strlen( values), "%2x ", outchar[j]);
             sprintf(values + strlen(values), "]");
             log_error("%s\n", values);
-
-               return -1;
+            return -1;
         }
     }
 
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index 9a3eb99c..243d2361 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -872,12 +872,14 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
 
     // We need to create 5 random values of the given type,
     // and read 4 of them back.
-    cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)];
-    cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)];
+    const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
+    const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
+    cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
+    cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
 
-    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) );
+    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
     test_error_ret(status,"Failed to allocate write buffer",status);
-    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) );
+    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
     test_error_ret(status,"Failed to allocate read buffer",status);
 
     status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
@@ -892,7 +894,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
             // Generate new random data to push through.
             // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
 
-            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0);
+            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
 
             if ( ti.is_bool() ) {
                 // For boolean, random data cast to bool isn't very random.
@@ -904,7 +906,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
                 }
                 bool_iter++;
             } else {
-                l_set_randomly( write_data, sizeof(write_data), rand_state );
+                l_set_randomly( write_data, write_data_size, rand_state );
             }
             status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
 
@@ -913,7 +915,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
             status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
 
             // Determine the expected values.
-            cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)];
+            cl_uchar expected[read_data_size];
             memset( expected, -1, sizeof(expected) );
             l_copy( expected, 0, write_data, 0, ti );
             l_copy( expected, 1, write_data, 1, ti );
@@ -930,8 +932,8 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
                 for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
             }
 
-            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0);
-            memset( read_data, -1, sizeof(read_data) );
+            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+            memset(read_data, -1, read_data_size);
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
             // Now run the kernel
@@ -940,7 +942,7 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
             status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
             status = clFinish(queue); test_error_ret(status,"finish",status);
 
-            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0);
+            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
 
             if ( ti.is_bool() ) {
                 // Collapse down to one bit.
@@ -959,7 +961,8 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
     }
 
     if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
-
+    align_free(write_data);
+    align_free(read_data);
     return err;
 }
 
@@ -1018,12 +1021,14 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
 
     // We need to create 5 random values of the given type,
     // and read 4 of them back.
-    cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)];
-    cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)];
+    const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
+    const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16);
 
-    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) );
+    cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
+    cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
+    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
     test_error_ret(status,"Failed to allocate write buffer",status);
-    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) );
+    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
     test_error_ret(status,"Failed to allocate read buffer",status);
 
     status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
@@ -1043,7 +1048,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
             // Generate new random data to push through.
             // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
 
-            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0);
+            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
 
             if ( ti.is_bool() ) {
                 // For boolean, random data cast to bool isn't very random.
@@ -1055,7 +1060,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
                 }
                 bool_iter++;
             } else {
-                l_set_randomly( write_data, sizeof(write_data), rand_state );
+                l_set_randomly( write_data, write_data_size, rand_state );
             }
             status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
 
@@ -1071,7 +1076,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
             status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
 
             // Determine the expected values.
-            cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)];
+            cl_uchar expected[read_data_size];
             memset( expected, -1, sizeof(expected) );
             if ( iteration ) {
                 l_copy( expected, 0, write_data, 0, ti );
@@ -1102,8 +1107,8 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
 
             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
 
-            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0);
-            memset( read_data, -1, sizeof(read_data) );
+            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+            memset( read_data, -1, read_data_size );
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
             // Now run the kernel
@@ -1117,7 +1122,7 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
             status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
             status = clFinish(queue); test_error_ret(status,"finish",status);
 
-            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0);
+            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
 
             if ( ti.is_bool() ) {
                 // Collapse down to one bit.
@@ -1139,6 +1144,8 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
     }
 
     if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    align_free(write_data);
+    align_free(read_data);
 
     return err;
 }
@@ -1352,6 +1359,13 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
             return status;
         }
+
+        status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
+        if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status)))
+        {
+            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+            return status;
+        }
     }
 
 
@@ -1372,12 +1386,12 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
     test_error_ret(status,"Failed to create reader kernel for user type test",status);
 
     // Set up data.
-    cl_uchar CL_ALIGNED(ALIGNMENT) uchar_data;
-    cl_uint CL_ALIGNED(ALIGNMENT) uint_data;
+    cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
+    cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
 
-    clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uchar_data), &uchar_data, &status ) );
+    clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) );
     test_error_ret(status,"Failed to allocate uchar buffer",status);
-    clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uint_data), &uint_data, &status ) );
+    clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) );
     test_error_ret(status,"Failed to allocate uint buffer",status);
 
     status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
@@ -1387,18 +1401,18 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
     cl_uint expected_uint = 42;
     for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
         // Read back data
-        uchar_data = -1;
-        uint_data = -1;
+        *uchar_data = -1;
+        *uint_data = -1;
         const size_t one = 1;
         status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
         status = clFinish(queue); test_error_ret(status,"finish",status);
 
-        cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uint_data), 0, 0, 0, 0);
-        cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uchar_data), 0, 0, 0, 0);
+        cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0);
+        cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0);
 
-        if ( expected_uchar != uchar_data || expected_uint != uint_data ) {
+        if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) {
             log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
-                    iter, (int)uchar_data, uint_data, (int)expected_uchar, expected_uint );
+                    iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint );
             err |= 1;
         }
 
@@ -1410,16 +1424,17 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
         expected_uint++;
 
         // Write the new values into persistent store.
-        uchar_data = expected_uchar;
-        uint_data = expected_uint;
-        status = clSetKernelArg(writer,0,sizeof(uchar_data),&uchar_data); test_error_ret(status,"set arg",status);
-        status = clSetKernelArg(writer,1,sizeof(uint_data),&uint_data); test_error_ret(status,"set arg",status);
+        *uchar_data = expected_uchar;
+        *uint_data = expected_uint;
+        status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status);
+        status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status);
         status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
         status = clFinish(queue); test_error_ret(status,"finish",status);
     }
 
     if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
-
+    align_free(uchar_data);
+    align_free(uint_data);
     return err;
 }
 
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index 72c88b19..5ca7f6ca 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1566,6 +1566,7 @@ public:
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
   using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
   CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
   {
@@ -1606,7 +1607,7 @@ public:
       program += "    atomic_work_item_fence(" +
                  std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
                  "memory_order_acquire," +
-                 std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") +
+                 std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
                  ");\n";
 
     program +=
@@ -1632,7 +1633,7 @@ public:
       program += "      atomic_work_item_fence(" +
                  std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
                  "memory_order_release," +
-                 std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") +
+                 std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
                  ");\n";
 
     program +=
diff --git a/test_conformance/compatibility/test_common/harness/compat.h b/test_conformance/compatibility/test_common/harness/compat.h
index f69c8886..d177f87a 100644
--- a/test_conformance/compatibility/test_common/harness/compat.h
+++ b/test_conformance/compatibility/test_common/harness/compat.h
@@ -13,32 +13,63 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+/*
+    Header compat.h should be used instead of stdlib.h, stdbool.h, stdint.h, float.h, fenv.h,
+    math.h. It provides workarounds if these headers are not available or not complete.
+
+    Important: It should be included before math.h, directly or indirectly, because Intel mathimf.h
+    is not compatible with Microsoft math.h. Including math.h before mathimf.h causes compile-time
+    error.
+*/
 #ifndef _COMPAT_H_
 #define _COMPAT_H_
 
 #if defined(_WIN32) && defined (_MSC_VER)
-
 #include <Windows.h>
-#include <Winbase.h>
-#include <CL/cl.h>
-#include <float.h>
-#include <xmmintrin.h>
-#include <math.h>
-
-#define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
-#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
-#define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
-
-#define isfinite(x) _finite(x)
-
-#if !defined(__cplusplus)
-typedef char bool;
-#define inline
-
-#else
-extern "C" {
 #endif
 
+#ifdef __cplusplus
+    #define EXTERN_C extern "C"
+#else
+    #define EXTERN_C
+#endif
+
+
+//
+// stdlib.h
+//
+
+#include <stdlib.h>     // On Windows, _MAX_PATH defined there.
+
+// llabs appeared in MS C v16 (VS 10/2010).
+#if defined( _MSC_VER ) && _MSC_VER <= 1500
+    EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
+#endif
+
+
+//
+// stdbool.h
+//
+
+// stdbool.h appeared in MS C v18 (VS 12/2013).
+#if defined( _MSC_VER ) && MSC_VER <= 1700
+#if !defined(__cplusplus)
+typedef char bool;
+        #define true  1
+        #define false 0
+    #endif
+#else
+    #include <stdbool.h>
+#endif
+
+
+
+//
+// stdint.h
+//
+
+// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
+#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
 typedef unsigned char       uint8_t;
 typedef char                int8_t;
 typedef unsigned short      uint16_t;
@@ -47,25 +78,83 @@ typedef unsigned int        uint32_t;
 typedef int                 int32_t;
 typedef unsigned long long  uint64_t;
 typedef long long           int64_t;
-
-#define MAXPATHLEN MAX_PATH
-
-typedef unsigned short ushort;
-typedef unsigned int   uint;
-typedef unsigned long  ulong;
+#else
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+    #include <stdint.h>
+#endif
 
 
-#define INFINITY    (FLT_MAX + FLT_MAX)
-//#define NAN (INFINITY | 1)
-//const static int PINFBITPATT_SP32  = INFINITY;
+
+//
+// float.h
+//
+
+#include <float.h>
+
+
+
+//
+// fenv.h
+//
+
+// fenv.h appeared in MS C v18 (VS 12/2013).
+#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
+    // reimplement fenv.h because windows doesn't have it
+    #define FE_INEXACT          0x0020
+    #define FE_UNDERFLOW        0x0010
+    #define FE_OVERFLOW         0x0008
+    #define FE_DIVBYZERO        0x0004
+    #define FE_INVALID          0x0001
+    #define FE_ALL_EXCEPT       0x003D
+    int fetestexcept(int excepts);
+    int feclearexcept(int excepts);
+#else
+    #include <fenv.h>
+#endif
+
+
+//
+// math.h
+//
+
+#if defined( __INTEL_COMPILER )
+    #include <mathimf.h>
+#else
+    #include <math.h>
+#endif
+
+#if defined( _MSC_VER )
+
+    #ifdef __cplusplus
+        extern "C" {
+    #endif
 
 #ifndef M_PI
     #define M_PI    3.14159265358979323846264338327950288
 #endif
 
+    #if ! defined( __INTEL_COMPILER )
 
+        #ifndef NAN
+            #define NAN  (INFINITY - INFINITY)
+        #endif
+        #ifndef HUGE_VALF
+            #define HUGE_VALF (float)HUGE_VAL
+        #endif
+        #ifndef INFINITY
+            #define INFINITY    (FLT_MAX + FLT_MAX)
+        #endif
+        #ifndef isfinite
+            #define isfinite(x) _finite(x)
+        #endif
+        #ifndef isnan
 #define    isnan( x )       ((x) != (x))
+        #endif
+        #ifndef isinf
 #define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
+        #endif
 
 double rint( double x);
 float  rintf( float x);
@@ -99,27 +188,6 @@ long double remquol( long double x, long double y, int *quo);
 
 long double scalblnl(long double x, long n);
 
-inline long long
-llabs(long long __x) { return __x >= 0 ? __x : -__x; }
-
-
-// end of math functions
-
-uint64_t ReadTime( void );
-double SubtractTime( uint64_t endTime, uint64_t startTime );
-
-#define sleep(X)   Sleep(1000*X)
-#define snprintf   sprintf_s
-//#define hypotl     _hypot
-
-float   make_nan();
-float nanf( const char* str);
-double  nan( const char* str);
-long double nanl( const char* str);
-
-//#if defined USE_BOOST
-//#include <boost/math/tr1.hpp>
-//double hypot(double x, double y);
 float hypotf(float x, float y);
 long double hypotl(long double x, long double y) ;
 double lgamma(double x);
@@ -144,67 +212,179 @@ double round(double x);
 float  roundf(float x);
 long double roundl(long double x);
 
-int signbit(double x);
-int signbitf(float x);
+        int cf_signbit(double x);
+        int cf_signbitf(float x);
 
-//bool signbitl(long double x)         { return boost::math::tr1::signbit<long double>(x); }
-//#endif // USE_BOOST
+        static int signbit(double x) { return  cf_signbit(x); }
+        static int signbitf(float x) { return cf_signbitf(x); }
 
 long int lrint (double flt);
 long int lrintf (float flt);
 
-
 float   int2float (int32_t ix);
 int32_t float2int (float   fx);
 
+    #endif
+
+    #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
+        // These functions appeared in Intel C v13.
+        float  nanf( const char* str);
+        double nan( const char* str);
+        long double nanl( const char* str);
+    #endif
+
+    #ifdef __cplusplus
+        }
+    #endif
+
+#endif
+
+#if defined( __ANDROID__ )
+    #define log2(X)  (log(X)/log(2))
+#endif
+
+
+
+//
+// stdio.h
+//
+
+#if defined( _MSC_VER )
+    #define snprintf   sprintf_s
+#endif
+
+
+
+//
+// unistd.h
+//
+
+#if defined( _MSC_VER )
+    EXTERN_C unsigned int sleep( unsigned int sec );
+    EXTERN_C int usleep( int usec );
+#endif
+
+
+
+//
+// syscall.h
+//
+
+#if defined( __ANDROID__ )
+    // Android bionic's isn't providing SYS_sysctl wrappers.
+    #define SYS__sysctl  __NR__sysctl
+#endif
+
+
+
+// Some tests use _malloca which defined in malloc.h.
+#if !defined (__APPLE__)
+#include <malloc.h>
+#endif
+
+
+//
+// ???
+//
+
+#if defined( _MSC_VER )
+
+    #define MAXPATHLEN _MAX_PATH
+
+    EXTERN_C uint64_t ReadTime( void );
+    EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
+
 /** Returns the number of leading 0-bits in x,
     starting at the most significant bit position.
     If x is 0, the result is undefined.
 */
-int __builtin_clz(unsigned int pattern);
-
-
-static const double zero=  0.00000000000000000000e+00;
-#define NAN  (INFINITY - INFINITY)
-#define HUGE_VALF (float)HUGE_VAL
-
-int usleep(int usec);
-
-// reimplement fenv.h because windows doesn't have it
-#define FE_INEXACT          0x0020
-#define FE_UNDERFLOW        0x0010
-#define FE_OVERFLOW         0x0008
-#define FE_DIVBYZERO        0x0004
-#define FE_INVALID          0x0001
-#define FE_ALL_EXCEPT       0x003D
-
-int fetestexcept(int excepts);
-int feclearexcept(int excepts);
-
-#ifdef __cplusplus
-}
-#endif
-
-#else // !((defined(_WIN32) && defined(_MSC_VER)
-#if defined(__MINGW32__)
-#include <windows.h>
-#define sleep(X)   Sleep(1000*X)
+    EXTERN_C int __builtin_clz(unsigned int pattern);
 
 #endif
-#if defined(__linux__) || defined(__MINGW32__) || defined(__APPLE__)
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS
+
+#ifndef MIN
+    #define MIN(x,y) (((x)<(y))?(x):(y))
 #endif
-#include <fenv.h>
-#include <math.h>
-#include <float.h>
-#include <stdint.h>
+#ifndef MAX
+    #define MAX(x,y) (((x)>(y))?(x):(y))
 #endif
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
+
+    This is a typical usage of the macros:
+
+        double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
+
+     (taken from math_brute_force/reference_math.c). There are two problems:
+
+        1.  There is an error here. On Windows in will produce incorrect result
+            `0x1.5555555555555p+50'. To have a correct result it should be written as
+            `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
+            third argument is not obvious -- sometimes it should be the same as exponent of the
+            first argument, but sometimes not.
+
+        2.  Information is duplicated. It is easy to make a mistake.
+
+    Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
+    ------------------------------------------------------------------------------------------------
+*/
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+
+    #define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
+    #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
+    #define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
+
+#else
+
+// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
 #define MAKE_HEX_FLOAT(x,y,z) x
 #define MAKE_HEX_DOUBLE(x,y,z) x
 #define MAKE_HEX_LONG(x,y,z) x
 
-#endif // !((defined(_WIN32) && defined(_MSC_VER)
+#endif
 
 
+/*
+    ------------------------------------------------------------------------------------------------
+    HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
+    double respectively. Arguments:
+
+        sm    -- sign of number,
+        int   -- integer part of mantissa (without `0x' prefix),
+        fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
+        se    -- sign of exponent,
+        exp   -- absolute value of (binary) exponent.
+
+    Example:
+
+        double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
+
+    Note:
+
+        We have to pass signs as separate arguments because gcc pass negative integer values
+        (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
+        `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
+        literal.
+    ------------------------------------------------------------------------------------------------
+*/
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    // If compiler does not support hex floating point literals:
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm ldexpf(       (float)( 0x ## int ## fract ## UL  ), se exp + ilogbf(       (float) 0x ## int ) - ilogbf(       ( float )( 0x ## int ## fract ## UL  ) ) )
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm ldexp(       (double)( 0x ## int ## fract ## ULL ), se exp + ilogb(       (double) 0x ## int ) - ilogb(       ( double )( 0x ## int ## fract ## ULL ) ) )
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
+#else
+    // If compiler supports hex floating point literals: just concatenate all the parts into a literal.
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
+#endif
+
+#if defined(__MINGW32__)
+    #include <Windows.h>
+    #define sleep(sec) Sleep((sec) * 1000)
+#endif
+
 #endif // _COMPAT_H_
diff --git a/test_conformance/compatibility/test_common/harness/kernelHelpers.c b/test_conformance/compatibility/test_common/harness/kernelHelpers.c
index 3680c06e..42f41aa4 100644
--- a/test_conformance/compatibility/test_common/harness/kernelHelpers.c
+++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.c
@@ -475,14 +475,14 @@ size_t get_pixel_bytes( const cl_image_format *fmt )
     return 0;
 }
 
-int verifyImageSupport( cl_device_id device )
+test_status verifyImageSupport( cl_device_id device )
 {
     if( checkForImageSupport( device ) )
     {
         log_error( "ERROR: Device does not supported images as required by this test!\n" );
-        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+        return TEST_FAIL;
     }
-    return 0;
+    return TEST_PASS;
 }
 
 int checkForImageSupport( cl_device_id device )
diff --git a/test_conformance/compatibility/test_common/harness/kernelHelpers.h b/test_conformance/compatibility/test_common/harness/kernelHelpers.h
index 59c01b5f..09515e28 100644
--- a/test_conformance/compatibility/test_common/harness/kernelHelpers.h
+++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.h
@@ -17,6 +17,7 @@
 #define _kernelHelpers_h
 
 #include "compat.h"
+#include "testHarness.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -84,8 +85,8 @@ extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl
 /* Helper to get pixel size for a pixel format */
 size_t get_pixel_bytes( const cl_image_format *fmt );
 
-/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
-extern int verifyImageSupport( cl_device_id device );
+/* Verify the given device supports images. */
+extern test_status verifyImageSupport( cl_device_id device );
 
 /* Checks that the given device supports images. Same as verify, but doesn't print an error */
 extern int checkForImageSupport( cl_device_id device );
diff --git a/test_conformance/compatibility/test_common/harness/msvc9.c b/test_conformance/compatibility/test_common/harness/msvc9.c
index 7d559bce..093bb978 100644
--- a/test_conformance/compatibility/test_common/harness/msvc9.c
+++ b/test_conformance/compatibility/test_common/harness/msvc9.c
@@ -13,15 +13,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#if defined(_WIN32) && defined (_MSC_VER)
-
 #include "compat.h"
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-#include <CL/cl_platform.h>
 
+#if defined ( _MSC_VER )
 
+#include <limits.h>
+#include <stdlib.h>
+
+#include <CL/cl.h>
+
+#include <windows.h>
+
+#if ! defined( __INTEL_COMPILER )
 
 ///////////////////////////////////////////////////////////////////
 //
@@ -387,86 +390,6 @@ long double log2l(long double x)
     return 1.44269504088896340735992468100189214L * log(x);
 }
 
-///////////////////////////////////////////////////////////////////
-//
-//                  misc functions
-//
-///////////////////////////////////////////////////////////////////
-
-/*
-// This function is commented out because the Windows implementation should never call munmap.
-// If it is calling it, we have a bug. Please file a bugzilla.
-int munmap(void *addr, size_t len)
-{
-// FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
-
-    return (int)VirtualAlloc( (LPVOID)addr, len,
-                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
-}
-*/
-
-uint64_t ReadTime( void )
-{
-    LARGE_INTEGER current;
-    QueryPerformanceCounter(&current);
-    return (uint64_t)current.QuadPart;
-}
-
-double SubtractTime( uint64_t endTime, uint64_t startTime )
-{
-    static double PerformanceFrequency = 0.0;
-
-    if (PerformanceFrequency == 0.0) {
-        LARGE_INTEGER frequency;
-        QueryPerformanceFrequency(&frequency);
-        PerformanceFrequency = (double) frequency.QuadPart;
-    }
-
-    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
-}
-
-float make_nan()
-{
-/* This is the IEEE 754 single-precision format:
-    unsigned int mantissa:  22;
-    unsigned int quiet_nan:  1;
-    unsigned int exponent:   8;
-    unsigned int negative:   1;
-*/
-     //const static unsigned
-     static const int32_t _nan = 0x7fc00000;
-     return *(const float*)(&_nan);
-}
-
-float nanf( const char* str)
-{
-    cl_uint u = atoi( str );
-    u |= 0x7fc00000U;
-    return *( float*)(&u);
-}
-
-
-double nan( const char* str)
-{
-    cl_ulong u = atoi( str );
-    u |= 0x7ff8000000000000ULL;
-    return *( double*)(&u);
-}
-
-// double check this implementatation
-long double nanl( const char* str)
-{
-    union
-    {
-        long double f;
-        struct { cl_ulong m; cl_ushort sexp; }u;
-    }u;
-    u.u.sexp = 0x7fff;
-    u.u.m = 0x8000000000000000ULL | atoi( str );
-
-    return u.f;
-}
-
 double trunc(double x)
 {
     double absx = fabs(x);
@@ -589,7 +512,165 @@ long double roundl(long double x)
     return x;
 }
 
-int signbit(double x)
+float cbrtf( float x )
+{
+    float z = pow( fabs((double) x), 1.0 / 3.0 );
+    return copysignf( z, x );
+}
+
+double cbrt( double x )
+{
+    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
+}
+
+long int lrint (double x)
+{
+    double absx = fabs(x);
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+long int lrintf (float x)
+{
+    float absx = fabsf(x);
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                  fenv functions
+//
+///////////////////////////////////////////////////////////////////
+
+int fetestexcept(int excepts)
+{
+    unsigned int status = _statusfp();
+    return excepts & (
+        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
+        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
+        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
+        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
+        ((status & _SW_INVALID) ? FE_INVALID : 0)
+    );
+}
+
+int feclearexcept(int excepts)
+{
+    _clearfp();
+    return 0;
+}
+
+#endif // __INTEL_COMPILER
+
+#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
+
+float make_nan()
+{
+/* This is the IEEE 754 single-precision format:
+    unsigned int mantissa:  22;
+    unsigned int quiet_nan:  1;
+    unsigned int exponent:   8;
+    unsigned int negative:   1;
+*/
+     //const static unsigned
+     static const int32_t _nan = 0x7fc00000;
+     return *(const float*)(&_nan);
+}
+
+float nanf( const char* str)
+{
+    cl_uint u = atoi( str );
+    u |= 0x7fc00000U;
+    return *( float*)(&u);
+}
+
+
+double nan( const char* str)
+{
+    cl_ulong u = atoi( str );
+    u |= 0x7ff8000000000000ULL;
+    return *( double*)(&u);
+}
+
+// double check this implementatation
+long double nanl( const char* str)
+{
+    union
+    {
+        long double f;
+        struct { cl_ulong m; cl_ushort sexp; }u;
+    }u;
+    u.u.sexp = 0x7fff;
+    u.u.m = 0x8000000000000000ULL | atoi( str );
+
+    return u.f;
+}
+
+#endif
+
+///////////////////////////////////////////////////////////////////
+//
+//                  misc functions
+//
+///////////////////////////////////////////////////////////////////
+
+/*
+// This function is commented out because the Windows implementation should never call munmap.
+// If it is calling it, we have a bug. Please file a bugzilla.
+int munmap(void *addr, size_t len)
+{
+// FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
+
+    return (int)VirtualAlloc( (LPVOID)addr, len,
+                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
+}
+*/
+
+uint64_t ReadTime( void )
+{
+    LARGE_INTEGER current;
+    QueryPerformanceCounter(&current);
+    return (uint64_t)current.QuadPart;
+}
+
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    static double PerformanceFrequency = 0.0;
+
+    if (PerformanceFrequency == 0.0) {
+        LARGE_INTEGER frequency;
+        QueryPerformanceFrequency(&frequency);
+        PerformanceFrequency = (double) frequency.QuadPart;
+    }
+
+    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
+}
+
+int cf_signbit(double x)
 {
     union
     {
@@ -600,7 +681,7 @@ int signbit(double x)
     return u.u >> 63;
 }
 
-int signbitf(float x)
+int cf_signbitf(float x)
 {
     union
     {
@@ -611,17 +692,6 @@ int signbitf(float x)
     return u.u >> 31;
 }
 
-float cbrtf( float x )
-{
-    float z = pow( fabs((double) x), 1.0 / 3.0 );
-    return copysignf( z, x );
-}
-
-double cbrt( double x )
-{
-    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
-}
-
 float int2float (int32_t ix)
 {
     union {
@@ -642,7 +712,7 @@ int32_t float2int (float   fx)
     return u.i;
 }
 
-#if defined(_MSC_VER) && !defined(_WIN64)
+#if !defined(_WIN64)
 /** Returns the number of leading 0-bits in x,
     starting at the most significant bit position.
     If x is 0, the result is undefined.
@@ -682,45 +752,10 @@ int __builtin_clz(unsigned int pattern)
    return count;
 }
 
-#endif //defined(_MSC_VER) && !defined(_WIN64)
+#endif // !defined(_WIN64)
 
 #include <intrin.h>
 #include <emmintrin.h>
-long int lrint (double x)
-{
-    double absx = fabs(x);
-
-    if( x >= (double) LONG_MAX )
-        return LONG_MAX;
-
-    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
-    {
-        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
-        double rounded = x + magic;
-        rounded -= magic;
-        return (long int) rounded;
-    }
-
-    return (long int) x;
-}
-
-long int lrintf (float x)
-{
-    float absx = fabsf(x);
-
-    if( x >= (float) LONG_MAX )
-        return LONG_MAX;
-
-    if( absx < 8388608.0f /* 0x1.0p23f */ )
-    {
-        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
-        float rounded = x + magic;
-        rounded -= magic;
-        return (long int) rounded;
-    }
-
-    return (long int) x;
-}
 
 int usleep(int usec)
 {
@@ -728,22 +763,10 @@ int usleep(int usec)
     return 0;
 }
 
-int fetestexcept(int excepts)
+unsigned int sleep( unsigned int sec )
 {
-    unsigned int status = _statusfp();
-    return excepts & (
-        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
-        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
-        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
-        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
-        ((status & _SW_INVALID) ? FE_INVALID : 0)
-    );
-}
-
-int feclearexcept(int excepts)
-{
-    _clearfp();
+    Sleep( sec * 1000 );
     return 0;
 }
 
-#endif //defined(_WIN32)
+#endif // defined( _MSC_VER )
diff --git a/test_conformance/compatibility/test_common/harness/testHarness.c b/test_conformance/compatibility/test_common/harness/testHarness.c
index 1994399d..af05c8d5 100644
--- a/test_conformance/compatibility/test_common/harness/testHarness.c
+++ b/test_conformance/compatibility/test_common/harness/testHarness.c
@@ -439,10 +439,18 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
 
 
     /* If we have a device checking function, run it */
-    if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
+    if( ( deviceCheckFn != NULL ) )
     {
-        test_finish();
-        return -1;
+        test_status status = deviceCheckFn( device );
+        switch (status)
+        {
+            case TEST_PASS:
+                break;
+            case TEST_FAIL:
+                return 1;
+            case TEST_SKIP:
+                return 0;
+        }
     }
 
     if (num_elements <= 0)
diff --git a/test_conformance/compatibility/test_common/harness/testHarness.h b/test_conformance/compatibility/test_common/harness/testHarness.h
index c2620647..7c1d9cb2 100644
--- a/test_conformance/compatibility/test_common/harness/testHarness.h
+++ b/test_conformance/compatibility/test_common/harness/testHarness.h
@@ -23,6 +23,13 @@
 extern "C" {
 #endif
 
+typedef enum test_status
+{
+    TEST_PASS = 0,
+    TEST_FAIL = 1,
+    TEST_SKIP = 2,
+} test_status;
+
 extern cl_uint gReSeed;
 extern cl_uint gRandomSeed;
 
@@ -32,8 +39,8 @@ extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
                             basefn fnList[], const char *fnNames[],
                             int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
 
-// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
-typedef int (*DeviceCheckFn)( cl_device_id device );
+// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits.
+typedef test_status (*DeviceCheckFn)( cl_device_id device );
 
 // Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
 extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
diff --git a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
index 50dd64ef..4af76989 100644
--- a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
@@ -20,6 +20,7 @@ set(${MODULE_NAME}_SOURCES
          test_mem_object_info.cpp
          test_null_buffer_arg.c
          test_kernel_arg_info.c
+         test_queue_properties.cpp
          ../../test_common/harness/errorHelpers.c
          ../../test_common/harness/threadTesting.c
          ../../test_common/harness/testHarness.c
diff --git a/test_conformance/compatibility/test_conformance/api/main.c b/test_conformance/compatibility/test_conformance/api/main.c
index 1870bf8f..a8e9e993 100644
--- a/test_conformance/compatibility/test_conformance/api/main.c
+++ b/test_conformance/compatibility/test_conformance/api/main.c
@@ -113,6 +113,7 @@ basefn    basefn_list[] = {
     test_get_image1d_info,
     test_get_image1d_array_info,
     test_get_image2d_array_info,
+    test_queue_properties,
 };
 
 
@@ -200,6 +201,7 @@ const char    *basefn_names[] = {
     "get_image1d_info",
     "get_image1d_array_info",
     "get_image2d_array_info",
+    "queue_properties",
 };
 
 ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
diff --git a/test_conformance/compatibility/test_conformance/api/procs.h b/test_conformance/compatibility/test_conformance/api/procs.h
index ff796a38..e0a85356 100644
--- a/test_conformance/compatibility/test_conformance/api/procs.h
+++ b/test_conformance/compatibility/test_conformance/api/procs.h
@@ -105,4 +105,5 @@ extern int      test_get_image1d_info( cl_device_id deviceID, cl_context context
 extern int      test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
 extern int      test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
 extern int      test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
 
diff --git a/test_conformance/compatibility/test_conformance/api/test_api_min_max.c b/test_conformance/compatibility/test_conformance/api/test_api_min_max.c
index aa733996..2dc43d74 100644
--- a/test_conformance/compatibility/test_conformance/api/test_api_min_max.c
+++ b/test_conformance/compatibility/test_conformance/api/test_api_min_max.c
@@ -1319,12 +1319,12 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
 
     log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
 
-    // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
+    // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE
     error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
     test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
 
-    if (maxSize > maxGlobalSize / 4)
-        maxSize = maxGlobalSize / 4;
+    if (maxSize > maxGlobalSize / 8)
+        maxSize = maxGlobalSize / 8;
 
     error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
     test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
@@ -1422,7 +1422,7 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
 
     if (allocPassed) {
         if (currentSize < maxSize/PASSING_FRACTION) {
-            log_error("Failed to allocate at least 1/4 of the reported constant size.\n");
+            log_error("Failed to allocate at least 1/8 of the reported constant size.\n");
             return -1;
         } else if (currentSize != maxSize) {
             log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);
diff --git a/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp b/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp
new file mode 100644
index 00000000..3b368077
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_queue_properties.cpp
@@ -0,0 +1,174 @@
+//
+// Copyright (c) 2018 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include <sstream>
+#include <string>
+#include <vector>
+
+using namespace std;
+/*
+The test against cl_khr_create_command_queue extension. It validates if devices with Opencl 1.X can use clCreateCommandQueueWithPropertiesKHR function.
+Based on device capabilities test will create queue with NULL properties, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property and
+CL_QUEUE_PROFILING_ENABLE property. Finally simple kernel will be executed on such queue.
+*/
+
+const char *queue_test_kernel[] = {
+"__kernel void vec_cpy(__global int *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid];\n"
+"\n"
+"}\n" };
+
+int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop_def, cl_device_id deviceID, clKernelWrapper& kernel, size_t num_elements)
+{
+    clMemWrapper streams[2];
+    int error;
+    std::vector<int> buf(num_elements);
+    clCreateCommandQueueWithPropertiesKHR_fn clCreateCommandQueueWithPropertiesKHR = NULL;
+    cl_platform_id platform;
+    clEventWrapper event;
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+    clCreateCommandQueueWithPropertiesKHR = (clCreateCommandQueueWithPropertiesKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clCreateCommandQueueWithPropertiesKHR");
+    if (clCreateCommandQueueWithPropertiesKHR == NULL)
+    {
+        log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
+        return -1;
+    }
+
+    clCommandQueueWrapper queue = clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def, &error);
+    test_error(error, "clCreateCommandQueueWithPropertiesKHR failed");
+
+    for (int i = 0; i < num_elements; ++i)
+    {
+        buf[i] = i;
+    }
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf.data(), &error);
+    test_error( error, "clCreateBuffer failed." );
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error);
+    test_error( error, "clCreateBuffer failed." );
+
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error( error, "clSetKernelArg failed." );
+
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error( error, "clSetKernelArg failed." );
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, &event);
+    test_error( error, "clEnqueueNDRangeKernel failed." );
+    
+    error = clWaitForEvents(1, &event);
+    test_error(error, "clWaitForEvents failed.");
+    
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements, buf.data(), 0, NULL, NULL);
+    test_error( error, "clEnqueueReadBuffer failed." );
+
+    for (int i = 0; i < num_elements; ++i)
+    {
+        if (buf[i] != i)
+        {
+            log_error("ERROR: Incorrect vector copy result.");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    if (num_elements <= 0)
+    {
+        num_elements = 128;
+    }
+    int error = 0;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    size_t strSize;
+    std::string strExt(0, '\0');
+    cl_queue_properties_khr device_props = NULL;
+    cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 };
+
+    // Query extension
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &strSize);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
+    strExt.resize(strSize);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, strExt.size(), &strExt[0], NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
+    log_info("CL_DEVICE_EXTENSIONS:\n%s\n\n", strExt.c_str());
+
+    if (strExt.find("cl_khr_create_command_queue") == string::npos)
+    {
+        log_info("extension cl_khr_create_command_queue is not supported.\n");
+        return 0;
+    }
+
+    error = create_single_kernel_helper(context, &program, &kernel, 1, queue_test_kernel, "vec_cpy");
+    test_error(error, "create_single_kernel_helper failed");
+
+    log_info("Queue property NULL. Testing ... \n");
+    error = enqueue_kernel(context, NULL,deviceID, kernel, (size_t)num_elements);
+    test_error(error, "enqueue_kernel failed");
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+    if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
+    {
+        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE supported. Testing ... \n");
+        queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+        error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
+        test_error(error, "enqueue_kernel failed");
+    } else
+    {
+        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not supported \n");
+    }
+
+    if (device_props & CL_QUEUE_PROFILING_ENABLE)
+    {
+        log_info("Queue property CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
+        queue_prop_def[1] = CL_QUEUE_PROFILING_ENABLE;
+        error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
+        test_error(error, "enqueue_kernel failed");
+    } else
+    {
+        log_info("Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
+    }
+
+    if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE && device_props & CL_QUEUE_PROFILING_ENABLE)
+    {
+        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
+        queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE;
+        error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
+        test_error(error, "enqueue_kernel failed");
+    }
+    else
+    {
+        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE or CL_QUEUE_PROFILING_ENABLE not supported \n");
+    }
+
+    return 0;
+}
\ No newline at end of file
diff --git a/test_conformance/compatibility/test_conformance/basic/run_array b/test_conformance/compatibility/test_conformance/basic/run_array
old mode 100644
new mode 100755
diff --git a/test_conformance/compatibility/test_conformance/basic/run_array_image_copy b/test_conformance/compatibility/test_conformance/basic/run_array_image_copy
old mode 100644
new mode 100755
diff --git a/test_conformance/compatibility/test_conformance/basic/run_image b/test_conformance/compatibility/test_conformance/basic/run_image
old mode 100644
new mode 100755
diff --git a/test_conformance/compatibility/test_conformance/basic/run_multi_read_image b/test_conformance/compatibility/test_conformance/basic/run_multi_read_image
old mode 100644
new mode 100755
diff --git a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp
index 6db06398..dca52b5e 100644
--- a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp
@@ -202,10 +202,10 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
             log_error( "ERROR: Results of copy did not validate!\n" );
             sprintf(values + strlen( values), "%d -> [", i);
             for (int j=0; j<(int)elementSize; j++)
-                sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
+                sprintf(values + strlen( values), "%2x ", inchar[j]);
             sprintf(values + strlen(values), "] != [");
             for (int j=0; j<(int)elementSize; j++)
-                sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
+                sprintf(values + strlen( values), "%2x ", outchar[j]);
             sprintf(values + strlen(values), "]");
             log_error("%s\n", values);
 
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagedim.c b/test_conformance/compatibility/test_conformance/basic/test_imagedim.c
index 2dbaaa7b..81de4cbc 100644
--- a/test_conformance/compatibility/test_conformance/basic/test_imagedim.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagedim.c
@@ -16,6 +16,7 @@
 #include "../../test_common/harness/compat.h"
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -112,6 +113,10 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que
     log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
            max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
 
+    if (max_mem_size > (cl_ulong)SIZE_MAX) {
+        max_mem_size = (cl_ulong)SIZE_MAX;
+    }
+
     cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
     test_error(err, "clCreateSampler failed");
 
@@ -182,7 +187,7 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que
 
             size_t origin[3] = {0,0,0};
             size_t region[3] = {img_width, img_height, 1};
-            err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+            err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
             if (err != CL_SUCCESS)
             {
                 log_error("clWriteImage failed\n");
@@ -324,6 +329,10 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
     max_img_width = (int)max_image2d_width;
     max_img_height = (int)max_image2d_height;
 
+  if (max_mem_size > (cl_ulong)SIZE_MAX) {
+    max_mem_size = (cl_ulong)SIZE_MAX;
+  }
+
     // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
     //  and we want to consume 1/4 of global memory (this is the minimum required to be
     //  supported by the spec)
@@ -351,6 +360,9 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
             max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
 
     d = init_genrand( gRandomSeed );
+    input_ptr = generate_8888_image(max_img_width, max_img_height, d);
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
+
     int plus_minus;
     for (plus_minus=0; plus_minus < 3; plus_minus++)
     {
@@ -390,9 +402,6 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
                       break;
                 }
 
-                input_ptr = generate_8888_image(effective_img_width, effective_img_height, d);
-                output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * effective_img_width * effective_img_height);
-
                 img_format.image_channel_order = CL_RGBA;
                 img_format.image_channel_data_type = CL_UNORM_INT8;
                 streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
@@ -419,7 +428,7 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
 
                   size_t origin[3] = {0,0,0};
                   size_t region[3] = {effective_img_width, effective_img_height, 1};
-                  err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+                  err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
                 if (err != CL_SUCCESS)
                 {
                     log_error("clWriteImage failed\n");
@@ -484,14 +493,14 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
 
                 clReleaseMemObject(streams[0]);
                 clReleaseMemObject(streams[1]);
-                free(input_ptr);
-                free(output_ptr);
             }
         }
 
   }
 
     // cleanup
+    free(input_ptr);
+    free(output_ptr);
     free_mtdata(d);
     clReleaseSampler(sampler);
     clReleaseKernel(kernel);
diff --git a/test_conformance/compatibility/test_conformance/basic/test_sizeof.c b/test_conformance/compatibility/test_conformance/basic/test_sizeof.c
index bece1b94..9c56bb58 100644
--- a/test_conformance/compatibility/test_conformance/basic/test_sizeof.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_sizeof.c
@@ -133,7 +133,7 @@ const size_table  vector_table[] =
 
 const char  *ptr_table[] =
 {
-    "void*",
+    "global void*",
     "size_t",
     "sizeof(int)",      // check return type of sizeof
     "ptrdiff_t"
diff --git a/test_conformance/compatibility/test_conformance/images/image_helpers.h b/test_conformance/compatibility/test_conformance/images/image_helpers.h
index d2132c7e..cd0eff6c 100644
--- a/test_conformance/compatibility/test_conformance/images/image_helpers.h
+++ b/test_conformance/compatibility/test_conformance/images/image_helpers.h
@@ -481,7 +481,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor *
 extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
 extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
 extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
-extern int issubnormal(float);
 
 
 #define errMax( _x , _y )       ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 3a03e1d5..cfb01a34 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -54,6 +54,9 @@ const char *known_extensions[] = {
     "cl_khr_egl_image",
     "cl_khr_egl_event",
     "cl_khr_il_program",
+    "cl_khr_create_command_queue",
+    "cl_khr_throttle_hints",
+    "cl_khr_priority_hints",
 };
 
 size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
diff --git a/test_conformance/contractions/contractions.c b/test_conformance/contractions/contractions.c
index 6fb444b9..cb9069b0 100644
--- a/test_conformance/contractions/contractions.c
+++ b/test_conformance/contractions/contractions.c
@@ -515,6 +515,8 @@ static void PrintArch( void )
     vlog( "\tARCH:\tx86_64\n" );
 #elif defined( __arm__ )
     vlog( "\tARCH:\tarm\n" );
+#elif defined( __aarch64__ )
+    vlog( "\tARCH:\taarch64\n" );
 #else
     vlog( "\tARCH:\tunknown\n" );
 #endif
diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt
index 35acc7a6..4c65c69d 100644
--- a/test_conformance/conversions/CMakeLists.txt
+++ b/test_conformance/conversions/CMakeLists.txt
@@ -19,6 +19,7 @@ set (${MODULE_NAME}_SOURCES
       ../../test_common/harness/errorHelpers.c
       ../../test_common/harness/kernelHelpers.c
       ../../test_common/harness/testHarness.c
+      ../../test_common/harness/parseParameters.cpp
 )
 
 if(ANDROID)
diff --git a/test_conformance/conversions/basic_test_conversions.c b/test_conformance/conversions/basic_test_conversions.c
index 73f38633..bfd31b27 100644
--- a/test_conformance/conversions/basic_test_conversions.c
+++ b/test_conformance/conversions/basic_test_conversions.c
@@ -751,16 +751,11 @@ static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((
 static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; }
 static void ulong2float( void *out, void *in)
 {
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && defined(_M_X64)
     cl_ulong l = ((cl_ulong*) in)[0];
     float result;
-
     cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
-#if defined(_M_X64)
     _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
-#else
-    result = sl;
-#endif
     ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
 #else
     cl_ulong l = ((cl_ulong*) in)[0];
diff --git a/test_conformance/conversions/test_conversions.c b/test_conformance/conversions/test_conversions.c
index 5b835343..fa04909b 100644
--- a/test_conformance/conversions/test_conversions.c
+++ b/test_conformance/conversions/test_conversions.c
@@ -18,6 +18,7 @@
 #include "../../test_common/harness/ThreadPool.h"
 #include "../../test_common/harness/testHarness.h"
 #include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/parseParameters.h"
 #if !defined(_WIN32) && !defined(__ANDROID__)
 #include <sys/sysctl.h>
 #endif
@@ -98,6 +99,7 @@ cl_mem          gOutBuffers[ kCallStyleCount ];
 size_t          gComputeDevices = 0;
 uint32_t        gDeviceFrequency = 0;
 int             gWimpyMode = 0;
+int             gWimpyReductionFactor = 128;
 int             gSkipTesting = 0;
 int             gForceFTZ = 0;
 int             gMultithread = 1;
@@ -438,6 +440,9 @@ static int ParseArgs( int argc, const char **argv )
                     case 'w':
                         gWimpyMode ^= 1;
                         break;
+                    case '[':
+                        parseWimpyReductionFactor(arg, gWimpyReductionFactor);
+                        break;
                     case 'z':
                         gForceFTZ ^= 1;
                         break;
@@ -540,6 +545,7 @@ static int ParseArgs( int argc, const char **argv )
         vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
         vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
         vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
+        vlog("*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
     }
 
     return 0;
@@ -566,6 +572,7 @@ static void PrintUsage( void )
     vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" );
     vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" );
     vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" );
+    vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
     vlog( "\t\t-z\tToggle flush to zero mode  (Default: per device)\n" );
     vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" );
     vlog( "\n" );
@@ -1243,15 +1250,12 @@ static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode r
     if ( !gWimpyMode && gIsEmbedded )
       step = blockCount * EMBEDDED_REDUCTION_FACTOR;
 
+    if ( gWimpyMode )
+        step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
     vlog( "Testing... " );
     fflush(stdout);
     for( i = 0; i < (uint64_t)lastCase; i += step )
     {
-        if (gWimpyMode) {
-            uint64_t blockIndex = (i / blockCount) & 0xFF;
-            if (blockIndex != 0 && blockIndex != 0xFF)
-                continue;
-        }
 
         if( 0 == ( i & ((lastCase >> 3) -1))) {
             vlog(".");
diff --git a/test_conformance/d3d10/harness.cpp b/test_conformance/d3d10/harness.cpp
index 5caee9bb..13829760 100644
--- a/test_conformance/d3d10/harness.cpp
+++ b/test_conformance/d3d10/harness.cpp
@@ -200,7 +200,7 @@ cl_int HarnessD3D10_CreateDevice(IDXGIAdapter* pAdapter, ID3D10Device **ppDevice
         pAdapter,
         D3D10_DRIVER_TYPE_HARDWARE,
         NULL,
-        D3D10_CREATE_DEVICE_DEBUG,
+        0,
         D3D10_SDK_VERSION,
         &sd,
         &HarnessD3D10_pSwapChain,
diff --git a/test_conformance/device_execution/execute_block.cpp b/test_conformance/device_execution/execute_block.cpp
index e10b7c6e..99589605 100644
--- a/test_conformance/device_execution/execute_block.cpp
+++ b/test_conformance/device_execution/execute_block.cpp
@@ -928,15 +928,16 @@ static const char* block_barrier[] =
     NL, "  size_t gid = get_group_id(0);"
     NL, "  size_t idx = gid*lsz;"
     NL, ""
+    NL, "  res[tid]=lsz;"
+    NL, "  barrier(CLK_GLOBAL_MEM_FENCE);"
     NL, "  int (^kernelBlock)(int) = ^(int a)"
     NL, "  {"
-    NL, "    atomic_inc(res+idx);"
+    NL, "    atomic_dec(res+idx);"
     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
-    NL, "    return (int)abs(a - b) - (res[idx] != lsz ? 0 : 1);"
+    NL, "    return (int)abs(a - b) - (res[idx] != 0 ? 0 : 1);"
     NL, "  };"
     NL, ""
     NL, "  int d = kernelBlock(2);"
-    NL, "  barrier(CLK_GLOBAL_MEM_FENCE);"
     NL, "  res[tid] = d;"
     NL, "}"
     NL
diff --git a/test_conformance/device_execution/main.c b/test_conformance/device_execution/main.c
index 3aa67de3..6d247d02 100644
--- a/test_conformance/device_execution/main.c
+++ b/test_conformance/device_execution/main.c
@@ -65,7 +65,7 @@ ct_assert(arr_size(commonfn_names) == arr_size(basefn_list))
 
 static const int num_commonfns = arr_size(commonfn_names);
 
-int deviceCheck(cl_device_id device)
+test_status deviceCheck(cl_device_id device)
 {
     static const char expected_cl[] =  "OpenCL 2.0";
     static const char expected_clc[] = "OpenCL C 2.0";
@@ -77,7 +77,7 @@ int deviceCheck(cl_device_id device)
     if(res != CL_SUCCESS || ret_len < strlen(expected_cl) || strncmp(version, expected_cl, strlen(expected_cl)))
     {
         log_info("Device does not support '%s'. Skipping the test.\n", expected_cl);
-        return CL_INVALID_DEVICE;
+        return TEST_FAIL;
     }
 
     version[0] = 0;
@@ -87,10 +87,10 @@ int deviceCheck(cl_device_id device)
     if(res != CL_SUCCESS || ret_len < strlen(expected_clc) || strncmp(version, expected_clc, strlen(expected_clc)))
     {
         log_info("Device does not support '%s'. Skipping the test.\n", expected_clc);
-        return CL_INVALID_DEVICE;
+        return TEST_FAIL;
     }
 
-    return CL_SUCCESS;
+    return TEST_PASS;
 }
 
 int
diff --git a/test_conformance/device_partition/Jamfile b/test_conformance/device_partition/Jamfile
index 0e1d4d6a..3bf4b29f 100644
--- a/test_conformance/device_partition/Jamfile
+++ b/test_conformance/device_partition/Jamfile
@@ -1,32 +1,32 @@
-project
-    : requirements
-      <toolset>gcc:<cflags>-xc++
-      <toolset>msvc:<cflags>"/TP"
-    ;
-
-exe test_device_partition
-    : main.c
-      test_device_partition.cpp
-      ../../test_common/harness/errorHelpers.c
-      ../../test_common/harness/threadTesting.c
-      ../../test_common/harness/testHarness.c
-      ../../test_common/harness/kernelHelpers.c
-      ../../test_common/harness/genericThread.cpp
-      ../../test_common/harness/mt19937.c
-      ../../test_common/harness/conversions.c
-      ../../test_common/harness/typeWrappers.cpp
-    : <target-os>windows:<source>../../test_common/harness/msvc9.c
-    ;
-
-install dist
-    : test_device_partition
-    : <variant>debug:<location>$(DIST)/debug/tests/conformance/1.2/x86/device_partition
-      <variant>release:<location>$(DIST)/release/tests/conformance/1.2/x86/device_partition
-    ;
-
-install dist
-    : test_device_partition
-    : <variant>debug:<location>$(DIST)/debug/tests/conformance/1.2/x86_64/device_partition
-      <variant>release:<location>$(DIST)/release/tests/conformance/1.2/x86_64/device_partition
-      <address-model>64
-    ;
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_device_partition
+    : main.c
+      test_device_partition.cpp
+      ../../test_common/harness/errorHelpers.c
+      ../../test_common/harness/threadTesting.c
+      ../../test_common/harness/testHarness.c
+      ../../test_common/harness/kernelHelpers.c
+      ../../test_common/harness/genericThread.cpp
+      ../../test_common/harness/mt19937.c
+      ../../test_common/harness/conversions.c
+      ../../test_common/harness/typeWrappers.cpp
+    : <target-os>windows:<source>../../test_common/harness/msvc9.c
+    ;
+
+install dist
+    : test_device_partition
+    : <variant>debug:<location>$(DIST)/debug/tests/conformance/1.2/x86/device_partition
+      <variant>release:<location>$(DIST)/release/tests/conformance/1.2/x86/device_partition
+    ;
+
+install dist
+    : test_device_partition
+    : <variant>debug:<location>$(DIST)/debug/tests/conformance/1.2/x86_64/device_partition
+      <variant>release:<location>$(DIST)/release/tests/conformance/1.2/x86_64/device_partition
+      <address-model>64
+    ;
diff --git a/test_conformance/geometrics/test_geometrics_double.cpp b/test_conformance/geometrics/test_geometrics_double.cpp
index 2d258d5d..34bd1933 100644
--- a/test_conformance/geometrics/test_geometrics_double.cpp
+++ b/test_conformance/geometrics/test_geometrics_double.cpp
@@ -203,7 +203,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command
             return -1;
 
         /* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */
-        for( i = 0; i < TEST_SIZE * vecsize; i++ )
+        for( i = 0; i < size * vecsize; i++ )
         {
             inDataA[ i ] = get_random_double( -512.f, 512.f, d );
             inDataB[ i ] = get_random_double( -512.f, 512.f, d );
@@ -237,7 +237,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command
         }
 
         /* Run the kernel */
-        threads[0] = TEST_SIZE;
+        threads[0] = size;
 
         error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
         test_error( error, "Unable to get work group size to use" );
diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp
index fa0c00a6..06a3f393 100644
--- a/test_conformance/gl/test_image_methods.cpp
+++ b/test_conformance/gl/test_image_methods.cpp
@@ -34,7 +34,8 @@ typedef struct image_kernel_data
   cl_int numSamples;
 };
 
-static const char *methodTestKernelPattern =
+static const char *methodTestKernelPattern = 
+"%s"
 "typedef struct {\n"
 "    int width;\n"
 "    int height;\n"
@@ -75,6 +76,8 @@ static const char *channelOrderConstLine =
 "   outData->expectedChannelOrder = CLK_%s;\n";
 static const char *numSamplesKernelLine =
 "   outData->numSamples = get_image_num_samples( input );\n";
+static const char *enableMSAAKernelLine =
+"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n";
 
 static int verify(cl_int input, cl_int kernelOutput, const char * description)
 {
@@ -185,6 +188,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
   bool doImageChannelOrder = false;
   bool doImageDim = false;
   bool doNumSamples = false;
+  bool doMSAA = false;
   switch(target) {
     case GL_TEXTURE_2D:
       imageType = "image2d_depth_t";
@@ -206,6 +210,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
       break;
     case GL_TEXTURE_2D_MULTISAMPLE:
       doNumSamples = true;
+      doMSAA = true;
       if(format.formattype == GL_DEPTH_COMPONENT) {
         doImageWidth = true;
         imageType = "image2d_msaa_depth_t";
@@ -214,6 +219,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
       }
       break;
     case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      doMSAA = true;
       if(format.formattype == GL_DEPTH_COMPONENT) {
         doImageWidth = true;
         imageType = "image2d_msaa_array_depth_t";
@@ -244,9 +250,11 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
     }
   }
 
-    // Create a program to run against
-    sprintf( programSrc, methodTestKernelPattern,
-            imageType,
+	// Create a program to run against
+	sprintf(programSrc, 
+          methodTestKernelPattern, 
+          ( doMSAA ) ? enableMSAAKernelLine : "",
+	        imageType,
           ( doArraySize ) ? arraySizeKernelLine : "",
           ( doImageWidth ) ? imageWidthKernelLine : "",
           ( doImageHeight ) ? imageHeightKernelLine : "",
@@ -265,7 +273,7 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
   if (error)
     print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
     test_error( error, "Unable to create kernel to test against" );
 
     // Create an output buffer
diff --git a/test_conformance/gl/test_images_read_common.cpp b/test_conformance/gl/test_images_read_common.cpp
index 068b9a29..f9b3e302 100644
--- a/test_conformance/gl/test_images_read_common.cpp
+++ b/test_conformance/gl/test_images_read_common.cpp
@@ -107,6 +107,7 @@ static const char *kernelpattern_image_read_2darray_depth =
 "}\n";
 
 static const char *kernelpattern_image_multisample_read_2d =
+"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
 "__kernel void sample_test( read_only image2d_msaa_t source, sampler_t sampler, __global %s4 *results )\n"
 "{\n"
 "    int  tidX = get_global_id(0);\n"
@@ -121,6 +122,7 @@ static const char *kernelpattern_image_multisample_read_2d =
 "}\n";
 
 static const char *kernelpattern_image_multisample_read_2d_depth =
+  "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
   "__kernel void sample_test( read_only image2d_msaa_depth_t source, sampler_t sampler, __global %s *results )\n"
 "{\n"
 "    int  tidX = get_global_id(0);\n"
@@ -135,6 +137,7 @@ static const char *kernelpattern_image_multisample_read_2d_depth =
 "}\n";
 
 static const char *kernelpattern_image_multisample_read_2darray =
+"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
 "__kernel void sample_test( read_only image2d_array_msaa_t source, sampler_t sampler, __global %s4 *results )\n"
 "{\n"
 "    int  tidX = get_global_id(0);\n"
@@ -151,6 +154,7 @@ static const char *kernelpattern_image_multisample_read_2darray =
 "}\n";
 
 static const char *kernelpattern_image_multisample_read_2darray_depth =
+  "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n"
   "__kernel void sample_test( read_only image2d_array_msaa_depth_t source, sampler_t sampler, __global %s *results )\n"
 "{\n"
 "    int  tidX = get_global_id(0);\n"
@@ -248,8 +252,8 @@ int test_cl_image_read( cl_context context, cl_command_queue queue,
     get_kernel_suffix( outFormat ) );
 
   programPtr = kernelSource;
-  if( create_single_kernel_helper( context, &program, &kernel, 1,
-    (const char **)&programPtr, "sample_test" ) )
+  if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1,
+    (const char **)&programPtr, "sample_test", "-cl-std=CL2.0" ) )
   {
     return -1;
   }
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 7b91f585..1345f46f 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -333,8 +333,8 @@ int test_cl_image_write( cl_context context, cl_command_queue queue,
     get_explicit_type_name( *outType ), suffix, convert);
 
   programPtr = kernelSource;
-  if( create_single_kernel_helper( context, &program, &kernel, 1,
-    (const char **)&programPtr, "sample_test" ) )
+  if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1,
+    (const char **)&programPtr, "sample_test", "-cl-std=CL2.0" ) )
   {
       return -1;
   }
diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp
index e7aeb938..0b61702a 100644
--- a/test_conformance/gles/main.cpp
+++ b/test_conformance/gles/main.cpp
@@ -101,16 +101,14 @@ const char    *basefn_names[] = {
     "images_write_cube",
     "renderbuffer_read",
     "renderbuffer_write",
-  "renderbuffer_getinfo",
-    "all"
+  "renderbuffer_getinfo"
 };
 
 const char    *basefn_names32[] = {
-    "fence_sync",
-  "all"
+    "fence_sync"
 };
 
-ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
 
 int    num_fns = sizeof(basefn_names) / sizeof(char *);
 int num_fns32 = sizeof(basefn_names32) / sizeof(char *);
@@ -386,17 +384,17 @@ int main(int argc, const char *argv[])
 // Intentional falling through
 cleanup:
 
-    // Cleanup EGL
-    glEnv->terminate_egl_display();
-
     // Always make sure that OpenCL context is released properly when the test exit
     if(sCurrentContext)
     {
         clReleaseContext( sCurrentContext );
         sCurrentContext = NULL;
     }
+
+    // Cleanup EGL
+    glEnv->terminate_egl_display();
+
     delete glEnv;
 
-
     return error;
-}
\ No newline at end of file
+}
diff --git a/test_conformance/half/CMakeLists.txt b/test_conformance/half/CMakeLists.txt
index 46931eb2..6e92a888 100644
--- a/test_conformance/half/CMakeLists.txt
+++ b/test_conformance/half/CMakeLists.txt
@@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES
         ../../test_common/harness/kernelHelpers.c
         ../../test_common/harness/ThreadPool.c
         ../../test_common/harness/testHarness.c
+        ../../test_common/harness/parseParameters.cpp
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/half/Test_roundTrip.c b/test_conformance/half/Test_roundTrip.c
index d941405b..f55f65d3 100644
--- a/test_conformance/half/Test_roundTrip.c
+++ b/test_conformance/half/Test_roundTrip.c
@@ -161,7 +161,7 @@ int Test_roundTrip( cl_device_id deviceID, cl_context context, cl_command_queue
 
     // Figure out how many elements are in a work block
     size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
-    size_t blockCount = (size_t)getBufferSize(gDevice) / elementSize; //elementSize is a power of two
+    size_t blockCount = (size_t)gBufferSize / elementSize; //elementSize is a power of two
     uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half
     size_t stride = blockCount;
 
diff --git a/test_conformance/half/Test_vLoadHalf.c b/test_conformance/half/Test_vLoadHalf.c
index 81c76ead..3f8b53cd 100644
--- a/test_conformance/half/Test_vLoadHalf.c
+++ b/test_conformance/half/Test_vLoadHalf.c
@@ -454,7 +454,7 @@ int Test_vLoadHalf_private( bool aligned )
 
     // Figure out how many elements are in a work block
     size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
-    size_t blockCount = getBufferSize(gDevice) / elementSize; // elementSize is power of 2
+    size_t blockCount = gBufferSize / elementSize; // elementSize is power of 2
     uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half
 
     // we handle 64-bit types a bit differently.
@@ -504,7 +504,7 @@ int Test_vLoadHalf_private( bool aligned )
                  continue;
                  }
                  */
-                memset_pattern4( gOut_single, &pattern, getBufferSize(gDevice));
+                memset_pattern4( gOut_single, &pattern, gBufferSize);
                 if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) )
                 {
                     vlog_error( "Failure in clWriteArray\n" );
diff --git a/test_conformance/half/Test_vStoreHalf.c b/test_conformance/half/Test_vStoreHalf.c
index 80b74ea2..5d9a1b0a 100644
--- a/test_conformance/half/Test_vStoreHalf.c
+++ b/test_conformance/half/Test_vStoreHalf.c
@@ -1045,7 +1045,7 @@ int Test_vStoreHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const c
     size_t stride = blockCount;
 
     if (gWimpyMode)
-        stride = 0x10000000U;
+        stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
 
     // we handle 64-bit types a bit differently.
     if( lastCase == 0 )
@@ -1654,7 +1654,7 @@ int Test_vStoreaHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const
     size_t stride = blockCount;
 
     if (gWimpyMode)
-        stride = 0x10000000U;
+        stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
 
     // we handle 64-bit types a bit differently.
     if( lastCase == 0 )
diff --git a/test_conformance/half/cl_utils.c b/test_conformance/half/cl_utils.c
index 0c587805..4a6123b7 100644
--- a/test_conformance/half/cl_utils.c
+++ b/test_conformance/half/cl_utils.c
@@ -61,8 +61,10 @@ size_t          gMaxThreadGroupSize = 0;
 size_t          gWorkGroupSize = 0;
 int             gFailCount = 0;
 bool            gWimpyMode = false;
+int             gWimpyReductionFactor = 512;
 int             gTestDouble = 0;
 uint32_t        gDeviceIndex = 0;
+size_t          gBufferSize = 0;
 
 #if defined( __APPLE__ )
 int             gReportTimes = 1;
@@ -178,17 +180,19 @@ int InitCL( void )
 #if defined( __APPLE__ )
     // FIXME: use clProtectedArray
 #endif
+    gBufferSize = getBufferSize(gDevice);
+
     //Allocate buffers
-    gIn_half   = malloc( getBufferSize(gDevice)/2  );
+    gIn_half   = malloc( gBufferSize/2  );
     gOut_half = malloc( BUFFER_SIZE/2  );
     gOut_half_reference = malloc( BUFFER_SIZE/2  );
     gOut_half_reference_double = malloc( BUFFER_SIZE/2  );
     gIn_single   = malloc( BUFFER_SIZE );
-    gOut_single = malloc( getBufferSize(gDevice)  );
-    gOut_single_reference = malloc( getBufferSize(gDevice)  );
+    gOut_single = malloc( gBufferSize  );
+    gOut_single_reference = malloc( gBufferSize  );
     gIn_double   = malloc( 2*BUFFER_SIZE  );
-    // gOut_double = malloc( (2*getBufferSize(gDevice))  );
-    // gOut_double_reference = malloc( (2*getBufferSize(gDevice))  );
+    // gOut_double = malloc( (2*gBufferSize)  );
+    // gOut_double_reference = malloc( (2*gBufferSize)  );
 
     if ( NULL == gIn_half ||
      NULL == gOut_half ||
@@ -201,7 +205,7 @@ int InitCL( void )
          )
         return -3;
 
-    gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(gDevice) / 2, NULL, &error);
+    gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, gBufferSize / 2, NULL, &error);
     if( gInBuffer_half == NULL )
     {
         vlog_error( "clCreateArray failed for input (%d)\n", error );
@@ -229,7 +233,7 @@ int InitCL( void )
         return -5;
     }
 
-    gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(gDevice), NULL, &error );
+    gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, gBufferSize, NULL, &error );
     if( gOutBuffer_single == NULL )
     {
         vlog_error( "clCreateArray failed for output (%d)\n", error );
@@ -237,7 +241,7 @@ int InitCL( void )
     }
 
 #if 0
-    gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(gDevice)), NULL, &error );
+    gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*gBufferSize), NULL, &error );
     if( gOutBuffer_double == NULL )
     {
         vlog_error( "clCreateArray failed for output (%d)\n", error );
@@ -310,6 +314,15 @@ void ReleaseCL(void)
     // clReleaseMemObject(gOutBuffer_double);
     clReleaseCommandQueue(gQueue);
     clReleaseContext(gContext);
+
+    free(gIn_half);
+    free(gOut_half);
+    free(gOut_half_reference);
+    free(gOut_half_reference_double);
+    free(gIn_single);
+    free(gOut_single);
+    free(gOut_single_reference);
+    free(gIn_double);
 }
 
 cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) {
@@ -427,21 +440,30 @@ size_t getBufferSize(cl_device_id device_id)
 
     if(s_initialized == 0 || s_device_id != device_id)
     {
-        cl_ulong result;
+        cl_ulong result, maxGlobalSize;
         cl_int err = clGetDeviceInfo (device_id,
                                       CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
                                       sizeof(result), (void *)&result,
                                       NULL);
         if(err)
         {
-            vlog_error("clGetDeviceInfo() failed\n");
+            vlog_error("clGetDeviceInfo(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) failed\n");
             s_result = 64*1024;
             goto exit;
         }
+        log_info("Const buffer size is %llx (%llu)\n", result, result);
+        err = clGetDeviceInfo (device_id,
+                               CL_DEVICE_GLOBAL_MEM_SIZE,
+                               sizeof(maxGlobalSize), (void *)&maxGlobalSize,
+                               NULL);
+        if(err)
+        {
+            vlog_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed\n");
+            goto exit;
+        }
         result = result / 2;
-        if (result > BUFFER_SIZE)
-            result = BUFFER_SIZE;
-        log_info("Using const buffer size 0x%lx (%lu)\n", (unsigned long)result, (unsigned long)result);
+        if(maxGlobalSize < result * 10)
+            result = result / 10;
         s_initialized = 1;
         s_device_id = device_id;
         s_result = result;
diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h
index 6299d3de..aba31e2f 100644
--- a/test_conformance/half/cl_utils.h
+++ b/test_conformance/half/cl_utils.h
@@ -71,11 +71,13 @@ extern size_t          gWorkGroupSize;
 extern int             gFailCount;
 extern int             gTestDouble;
 extern int             gReportTimes;
+extern size_t          gBufferSize;
 
 // gWimpyMode indicates if we run the test in wimpy mode where we limit the
 // size of 32 bit ranges to a much smaller set.  This is meant to be used
 // as a smoke test
 extern bool            gWimpyMode;
+extern int             gWimpyReductionFactor;
 
 uint64_t ReadTime( void );
 double SubtractTime( uint64_t endTime, uint64_t startTime );
diff --git a/test_conformance/half/main.c b/test_conformance/half/main.c
index 21b2226f..a09639c3 100644
--- a/test_conformance/half/main.c
+++ b/test_conformance/half/main.c
@@ -28,6 +28,7 @@
 
 #include "../../test_common/harness/testHarness.h"
 #include "../../test_common/harness/mingw_compat.h"
+#include "../../test_common/harness/parseParameters.h"
 #if defined (__MINGW32__)
 #include <sys/param.h>
 #endif
@@ -260,7 +261,9 @@ static int ParseArgs( int argc, const char **argv )
                     case 'w':  // Wimpy mode
                         gWimpyMode = true;
                         break;
-
+                    case '[':
+                        parseWimpyReductionFactor( arg, gWimpyReductionFactor);
+                        break;
                     default:
                         vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
                         PrintUsage();
@@ -302,6 +305,7 @@ static int ParseArgs( int argc, const char **argv )
         vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
         vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
         vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
+        vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
     }
     return 0;
 }
@@ -312,6 +316,7 @@ static void PrintUsage( void )
     vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
     vlog( "\t\t-t\tToggle reporting performance data.\n" );
     vlog( "\t\t-w\tRun in wimpy mode\n" );
+    vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
     vlog( "\t\t-h\tHelp\n" );
     for( int i = 0; i < num_fns; i++ )
     {
@@ -334,6 +339,8 @@ static void PrintArch( void )
     vlog( "ARCH:\tx86_64\n" );
 #elif defined( __arm__ )
     vlog( "ARCH:\tarm\n" );
+#elif defined( __aarch64__ )
+    vlog( "\tARCH:\taarch64\n" );
 #else
 #error unknown arch
 #endif
diff --git a/test_conformance/headers/CMakeLists.txt b/test_conformance/headers/CMakeLists.txt
index 1fdccaa6..82e2d397 100644
--- a/test_conformance/headers/CMakeLists.txt
+++ b/test_conformance/headers/CMakeLists.txt
@@ -3,6 +3,7 @@ set(HEADERS_SOURCES
     ../../test_common/harness/errorHelpers.c
     ../../test_common/harness/kernelHelpers.c
     ../../test_common/harness/testHarness.c
+    ../../test_common/harness/msvc9.c
 )
 
 set_source_files_properties(${HEADERS_SOURCES} PROPERTIES LANGUAGE CXX)
@@ -24,6 +25,8 @@ set(CL_H_SOURCES
     test_cl.h.c
 )
 
+set_source_files_properties(${CL_H_SOURCES} PROPERTIES LANGUAGE CXX)
+
 set(CL_H_OUT ${CONFORMANCE_PREFIX}cl_h${CONFORMANCE_SUFFIX})
 
 add_executable(
@@ -41,6 +44,8 @@ set(CL_PLATFORM_H_SOURCES
     test_cl_platform.h.c
 )
 
+set_source_files_properties(${CL_PLATFORM_H_SOURCES} PROPERTIES LANGUAGE CXX)
+
 set(CL_PLATFORM_H_OUT ${CONFORMANCE_PREFIX}cl_platform_h${CONFORMANCE_SUFFIX})
 
 add_executable(
@@ -58,6 +63,8 @@ set(CL_GL_H_SOURCES
     test_cl_gl.h.c
 )
 
+set_source_files_properties(${CL_GL_H_SOURCES} PROPERTIES LANGUAGE CXX)
+
 set(CL_GL_H_OUT ${CONFORMANCE_PREFIX}cl_gl_h${CONFORMANCE_SUFFIX})
 
 add_executable(
@@ -75,6 +82,8 @@ set(OPENCL_H_SOURCES
     test_opencl.h.c
 )
 
+set_source_files_properties(${OPENCL_H_SOURCES} PROPERTIES LANGUAGE CXX)
+
 set(OPENCL_H_OUT ${CONFORMANCE_PREFIX}opencl_h${CONFORMANCE_SUFFIX})
 
 add_executable(
@@ -88,5 +97,4 @@ TARGET_LINK_LIBRARIES(${OPENCL_H_OUT} ${CLConform_LIBRARIES})
 
 ########################################################################################
 
-
 # end of file #
diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
index 2ec35f6b..d88a49f0 100644
--- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
@@ -152,9 +152,27 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
 
             imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
 
-            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
-            if ( gDebugTrace )
-                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            cl_ulong size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+
+            while (size > maxAllocSize || (size * 3) > memSize) {
+                if (imageInfo.arraySize == 1) {
+                    // arraySize cannot be 0.
+                    break;
+                }
+                imageInfo.arraySize--;
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            }
+
+            while (size > maxAllocSize || (size * 3) > memSize) {
+                imageInfo.height--;
+                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            }
+
+            log_info( "Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
+
             if ( test_fill_image_2D_array( device, &imageInfo, outputType, seed ) )
                 return -1;
         }
diff --git a/test_conformance/images/clGetInfo/main.cpp b/test_conformance/images/clGetInfo/main.cpp
index df3a03e1..3deeed49 100644
--- a/test_conformance/images/clGetInfo/main.cpp
+++ b/test_conformance/images/clGetInfo/main.cpp
@@ -57,11 +57,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i
 
     return test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
 }
-int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
 }
-int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
 }
@@ -70,16 +70,16 @@ basefn basefn_list[] = {
     test_1D,
     test_2D,
     test_3D,
-    test_1DArray,
-    test_2DArray,
+    test_1Darray,
+    test_2Darray,
 };
 
 const char *basefn_names[] = {
     "1D",
     "2D",
     "3D",
-    "1DArray",
-    "2DArray",
+    "1Darray",
+    "2Darray",
 };
 
 ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
diff --git a/test_conformance/images/clReadWriteImage/main.cpp b/test_conformance/images/clReadWriteImage/main.cpp
index 260ba791..cfea18e2 100644
--- a/test_conformance/images/clReadWriteImage/main.cpp
+++ b/test_conformance/images/clReadWriteImage/main.cpp
@@ -52,11 +52,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
 }
-int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
 }
-int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
 }
@@ -65,16 +65,16 @@ basefn basefn_list[] = {
     test_1D,
     test_2D,
     test_3D,
-    test_1DArray,
-    test_2DArray,
+    test_1Darray,
+    test_2Darray,
 };
 
 const char *basefn_names[] = {
     "1D",
     "2D",
     "3D",
-    "1DArray",
-    "2DArray",
+    "1Darray",
+    "2Darray",
 };
 
 ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
diff --git a/test_conformance/images/kernel_image_methods/main.cpp b/test_conformance/images/kernel_image_methods/main.cpp
index 22cdd698..e10da63d 100644
--- a/test_conformance/images/kernel_image_methods/main.cpp
+++ b/test_conformance/images/kernel_image_methods/main.cpp
@@ -52,11 +52,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
 }
-int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
 }
-int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
 }
@@ -65,16 +65,16 @@ basefn basefn_list[] = {
     test_1D,
     test_2D,
     test_3D,
-    test_1DArray,
-    test_2DArray,
+    test_1Darray,
+    test_2Darray,
 };
 
 const char *basefn_names[] = {
     "1D",
     "2D",
     "3D",
-    "1DArray",
-    "2DArray",
+    "1Darray",
+    "2Darray",
 };
 
 ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
diff --git a/test_conformance/images/samplerlessReads/main.cpp b/test_conformance/images/samplerlessReads/main.cpp
index 587e612d..f14d4e1f 100644
--- a/test_conformance/images/samplerlessReads/main.cpp
+++ b/test_conformance/images/samplerlessReads/main.cpp
@@ -65,11 +65,11 @@ int test_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, i
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
 }
-int test_1DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_1Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
 }
-int test_2DArray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_2Darray(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
 }
@@ -78,16 +78,16 @@ basefn basefn_list[] = {
     test_1D,
     test_2D,
     test_3D,
-    test_1DArray,
-    test_2DArray,
+    test_1Darray,
+    test_2Darray,
 };
 
 const char *basefn_names[] = {
     "1D",
     "2D",
     "3D",
-    "1DArray",
-    "2DArray",
+    "1Darray",
+    "2Darray",
 };
 
 ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index 2f4900c2..9eb2f8ae 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -23,6 +23,10 @@ set(${MODULE_NAME}_SOURCES
     ../../test_common/harness/ThreadPool.c
     ../../test_common/harness/mt19937.c
     ../../test_common/harness/msvc9.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/parseParameters.cpp
 )
 
 
@@ -46,26 +50,7 @@ set_source_files_properties(
 endif(NOT ANDROID)
 
 set_source_files_properties(
-    FunctionList.c
-    Sleep.c
-    binary.c
-    binaryOperator.c
-    Utility.c
-    binary_i.c
-    binary_two_results_i.c
-    i_unary.c
-    macro_binary.c
-    macro_unary.c
-    mad.c
-    main.c
-    reference_math.c
-    ternary.c
-    unary.c
-    unary_two_results.c
-    unary_two_results_i.c unary_u.c
-    ../../test_common/harness/rounding_mode.c
-    ../../test_common/harness/ThreadPool.c
-    ../../test_common/harness/msvc9.c
+    ${MODULE_NAME}_SOURCES
     PROPERTIES LANGUAGE CXX)
 
 if(CMAKE_COMPILER_IS_GNUCC)
diff --git a/test_conformance/math_brute_force/FunctionList.h b/test_conformance/math_brute_force/FunctionList.h
index 346654fb..b5ddb715 100644
--- a/test_conformance/math_brute_force/FunctionList.h
+++ b/test_conformance/math_brute_force/FunctionList.h
@@ -85,7 +85,7 @@ typedef struct Func
   float           relaxed_error;
   int             ftz;
   int             relaxed;
-  const ::vtbl    *vtbl;
+  const vtbl      *vtbl_ptr;
 }Func;
 
 
diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h
index 13651037..0dd1826c 100644
--- a/test_conformance/math_brute_force/Utility.h
+++ b/test_conformance/math_brute_force/Utility.h
@@ -26,11 +26,7 @@
 #include <stdio.h>
 #include "../../test_common/harness/rounding_mode.h"
 #include "../../test_common/harness/fpcontrol.h"
-
-#if defined( _WIN32) && defined (_MSC_VER)
 #include "../../test_common/harness/testHarness.h"
-#endif
-
 #include "../../test_common/harness/ThreadPool.h"
 #define BUFFER_SIZE         (1024*1024*2)
 
@@ -112,7 +108,7 @@ extern "C" {
 float Abs_Error( float test, double reference );
 float Ulp_Error( float test, double reference );
 //float Ulp_Error_Half( float test, double reference );
-float Ulp_Error_Double( double test, long double reference );
+float Bruteforce_Ulp_Error_Double( double test, long double reference );
 #ifdef __cplusplus
 } //extern "C"
 #endif
diff --git a/test_conformance/math_brute_force/binary.c b/test_conformance/math_brute_force/binary.c
index 5800aaf5..7bf136ff 100644
--- a/test_conformance/math_brute_force/binary.c
+++ b/test_conformance/math_brute_force/binary.c
@@ -233,6 +233,7 @@ typedef struct TestInfo
     cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
     ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
     cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
     cl_uint     step;                               // step between each chunk and the next.
     cl_uint     scale;                              // stride between individual test values
     float       ulps;                               // max_allowed ulps
@@ -268,6 +269,16 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
     test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -341,7 +352,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
     // Run the kernels
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
 
         // Accumulate the arithmetic errors
         for( i = 0; i < test_info.threadCount; i++ )
@@ -991,6 +1002,16 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ulps = f->double_ulps;
     test_info.ftz = f->ftz || gForceFTZ;
@@ -1063,7 +1084,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
 
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
 
         // Accumulate the arithmetic errors
         for( i = 0; i < test_info.threadCount; i++ )
@@ -1359,7 +1380,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
             {
                 cl_double test = ((cl_double*) q)[j];
                 long double correct = func.f_ff( s[j], s2[j] );
-                float err = Ulp_Error_Double( test, correct );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
                 int fail = ! (fabsf(err) <= ulps);
 
                 if( fail && ftz )
@@ -1399,8 +1420,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                         {
                             long double correct2 = func.f_ff( 0.0, s2[j] );
                             long double correct3 = func.f_ff( -0.0, s2[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
@@ -1422,10 +1443,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                                 correct3 = func.f_ff( -0.0, 0.0 );
                                 long double correct4 = func.f_ff( 0.0, -0.0 );
                                 long double correct5 = func.f_ff( -0.0, -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                 fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
                                                  (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
                                 if( fabsf( err2 ) < fabsf(err ) )
@@ -1451,8 +1472,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                         {
                             long double correct2 = func.f_ff( s[j], 0.0 );
                             long double correct3 = func.f_ff( s[j], -0.0 );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
diff --git a/test_conformance/math_brute_force/binaryOperator.c b/test_conformance/math_brute_force/binaryOperator.c
index 718afd3d..09946cbe 100644
--- a/test_conformance/math_brute_force/binaryOperator.c
+++ b/test_conformance/math_brute_force/binaryOperator.c
@@ -207,6 +207,7 @@ typedef struct TestInfo
     cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
     ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
     cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
     cl_uint     step;                               // step between each chunk and the next.
     cl_uint     scale;                              // stride between individual test values
     float       ulps;                               // max_allowed ulps
@@ -260,6 +261,16 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
     }
 
     test_info.step = test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
     test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -329,7 +340,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
 
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
 
         // Accumulate the arithmetic errors
         for( i = 0; i < test_info.threadCount; i++ )
@@ -501,63 +512,51 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
     int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
     int indx = (totalSpecialValueCount - 1) / buffer_elements;
 
-    if( job_id <= (cl_uint)indx )
-    { // test edge cases
-        float *fp = (float *)p;
-        float *fp2 = (float *)p2;
+
+    if( job_id <= (cl_uint)indx ) {
+        // Insert special values
         uint32_t x, y;
 
-    x = (job_id * buffer_elements) % specialValuesFloatCount;
-    y = (job_id * buffer_elements) / specialValuesFloatCount;
+        x = (job_id * buffer_elements) % specialValuesFloatCount;
+        y = (job_id * buffer_elements) / specialValuesFloatCount;
 
-        for( ; j < buffer_elements; j++ )
-        {
-            fp[j] = specialValuesFloat[x];
-            fp2[j] = specialValuesFloat[y];
-            if( ++x >= specialValuesFloatCount )
-            {
+        for( ; j < buffer_elements; j++ ) {
+            p[j] = ((cl_uint *)specialValuesFloat)[x];
+            p2[j] = ((cl_uint *)specialValuesFloat)[y];
+            ++x;
+            if (x >= specialValuesFloatCount) {
                 x = 0;
                 y++;
-                if( y >= specialValuesFloatCount )
+                if (y >= specialValuesFloatCount)
                     break;
             }
-            if(gTestFastRelaxed && strcmp(name,"divide") == 0 )
-            {
-              float fpj = *(float*)&fp[j];
-              float fpj2 = *(float*)&fp2[j];
-              if(fabs(fpj) > 0x5E800000 ) //[2^-62,2^62]
-              {
-                fp[j] = NAN;
-              }
-              if( fabs(fpj2) > 0x5E800000 ) //[2^-62,2^62]
-              {
-                fp2[j] = NAN;
-              }
+            if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
+                cl_uint pj = p[j] & 0x7fffffff;
+                cl_uint p2j = p2[j] & 0x7fffffff;
+                // Replace values outside [2^-62, 2^62] with QNaN
+                if (pj < 0x20800000 || pj > 0x5e800000)
+                    p[j] = 0x7fc00000;
+                if (p2j < 0x20800000 || p2j > 0x5e800000)
+                    p2[j] = 0x7fc00000;
+            }
         }
     }
-    }
 
-    //Init any remaining values.
+    // Init any remaining values.
     for( ; j < buffer_elements; j++ )
     {
         p[j] = genrand_int32(d);
         p2[j] = genrand_int32(d);
 
-        if(gTestFastRelaxed)
-        {
-          if( strcmp(name,"divide")==0){
-            float pj = *(float*)&p[j];
-            float pj2 = *(float*)&p2[j];
-            if(fabs(pj) > 0x5E800000 ) //[2^-62,2^62]
-            {
-              p[j] = NAN;
-            }
-            if( fabs(pj2) > 0x5E800000 ) //[2^-62,2^62]
-            {
-              p2[j] = NAN;
-            }
-          }
-    }
+        if (gTestFastRelaxed && strcmp(name,"divide") == 0) {
+            cl_uint pj = p[j] & 0x7fffffff;
+            cl_uint p2j = p2[j] & 0x7fffffff;
+            // Replace values outside [2^-62, 2^62] with QNaN
+            if (pj < 0x20800000 || pj > 0x5e800000)
+                p[j] = 0x7fc00000;
+            if (p2j < 0x20800000 || p2j > 0x5e800000)
+                p2[j] = 0x7fc00000;
+        }
     }
 
     if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
@@ -950,6 +949,16 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ulps = f->double_ulps;
     test_info.ftz = f->ftz || gForceFTZ;
@@ -1020,7 +1029,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
 
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
 
         // Accumulate the arithmetic errors
         for( i = 0; i < test_info.threadCount; i++ )
@@ -1315,7 +1324,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
             {
                 cl_double test = ((cl_double*) q)[j];
                 long double correct = func.f_ff( s[j], s2[j] );
-                float err = Ulp_Error_Double( test, correct );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
                 int fail = ! (fabsf(err) <= ulps);
 
                 if( fail && ftz )
@@ -1334,8 +1343,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                     {
                         long double correct2 = func.f_ff( 0.0, s2[j] );
                         long double correct3 = func.f_ff( -0.0, s2[j] );
-                        float err2 = Ulp_Error_Double( test, correct2  );
-                        float err3 = Ulp_Error_Double( test, correct3  );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                         fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                         if( fabsf( err2 ) < fabsf(err ) )
                             err = err2;
@@ -1357,10 +1366,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                             correct3 = func.f_ff( -0.0, 0.0 );
                             long double correct4 = func.f_ff( 0.0, -0.0 );
                             long double correct5 = func.f_ff( -0.0, -0.0 );
-                            err2 = Ulp_Error_Double( test, correct2  );
-                            err3 = Ulp_Error_Double( test, correct3  );
-                            float err4 = Ulp_Error_Double( test, correct4  );
-                            float err5 = Ulp_Error_Double( test, correct5  );
+                            err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                            float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                             fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
                                              (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
@@ -1386,8 +1395,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                     {
                         long double correct2 = func.f_ff( s[j], 0.0 );
                         long double correct3 = func.f_ff( s[j], -0.0 );
-                        float err2 = Ulp_Error_Double( test, correct2  );
-                        float err3 = Ulp_Error_Double( test, correct3  );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                         fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                         if( fabsf( err2 ) < fabsf(err ) )
                             err = err2;
diff --git a/test_conformance/math_brute_force/binary_i.c b/test_conformance/math_brute_force/binary_i.c
index b72d117f..314d65dd 100644
--- a/test_conformance/math_brute_force/binary_i.c
+++ b/test_conformance/math_brute_force/binary_i.c
@@ -230,6 +230,7 @@ typedef struct TestInfo
     cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
     ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
     cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
     cl_uint     step;                               // step between each chunk and the next.
     cl_uint     scale;                              // stride between individual test values
     float       ulps;                               // max_allowed ulps
@@ -262,6 +263,16 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
     test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -330,7 +341,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
     }
 
     // Run the kernels
-    error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+    error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
 
 
     // Accumulate the arithmetic errors
@@ -758,6 +769,16 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ulps = f->double_ulps;
     test_info.ftz = f->ftz || gForceFTZ;
@@ -831,7 +852,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
 
     // Run the kernels
     if( !gSkipCorrectnessTesting )
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
 
 
     // Accumulate the arithmetic errors
@@ -1128,7 +1149,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
             {
                 cl_double test = ((cl_double*) q)[j];
                 long double correct = func.f_fi( s[j], s2[j] );
-                float err = Ulp_Error_Double( test, correct );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
                 int fail = ! (fabsf(err) <= ulps);
 
                 if( fail && ftz )
@@ -1146,8 +1167,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                     {
                         long double correct2 = func.f_fi( 0.0, s2[j] );
                         long double correct3 = func.f_fi( -0.0, s2[j] );
-                        float err2 = Ulp_Error_Double( test, correct2  );
-                        float err3 = Ulp_Error_Double( test, correct3  );
+                        float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                        float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                         fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                         if( fabsf( err2 ) < fabsf(err ) )
                             err = err2;
diff --git a/test_conformance/math_brute_force/binary_two_results_i.c b/test_conformance/math_brute_force/binary_two_results_i.c
index ab06c450..b1e80ede 100644
--- a/test_conformance/math_brute_force/binary_two_results_i.c
+++ b/test_conformance/math_brute_force/binary_two_results_i.c
@@ -871,7 +871,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
                     double test = ((double*) q)[j];
                     int correct2 = INT_MIN;
                     long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 );
-                    float err = Ulp_Error_Double( test, correct );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
                     int64_t iErr;
 
                     // in case of remquo, we only care about the sign and last seven bits of
@@ -907,8 +907,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
                             int correct3i, correct4i;
                             long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i );
                             long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i );
-                            float err2 = Ulp_Error_Double( test, correct3  );
-                            float err3 = Ulp_Error_Double( test, correct4  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
                             int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
                             int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
                             fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
@@ -937,10 +937,10 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
                                 correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i );
                                 long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i );
                                 long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i );
-                                err2 = Ulp_Error_Double( test, correct3  );
-                                err3 = Ulp_Error_Double( test, correct4  );
-                                float err4 = Ulp_Error_Double( test, correct7  );
-                                float err5 = Ulp_Error_Double( test, correct8  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct8  );
                                 iErr3 = (long long) q2[j] - (long long) correct3i;
                                 iErr4 = (long long) q2[j] - (long long) correct4i;
                                 int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
@@ -979,8 +979,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
                             int correct3i, correct4i;
                             long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i );
                             long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i );
-                            float err2 = Ulp_Error_Double( test, correct3  );
-                            float err3 = Ulp_Error_Double( test, correct4  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
                             int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
                             int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
                             fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
diff --git a/test_conformance/math_brute_force/macro_binary.c b/test_conformance/math_brute_force/macro_binary.c
index e489c598..b65948ee 100644
--- a/test_conformance/math_brute_force/macro_binary.c
+++ b/test_conformance/math_brute_force/macro_binary.c
@@ -222,6 +222,7 @@ typedef struct TestInfo
     cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
     ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
     cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
     cl_uint     step;                               // step between each chunk and the next.
     cl_uint     scale;                              // stride between individual test values
     int         ftz;                                // non-zero if running in flush to zero mode
@@ -249,6 +250,16 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
 
@@ -319,7 +330,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
     // Run the kernels
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
 
         if( error )
             goto exit;
@@ -749,6 +760,16 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ftz = f->ftz || gForceFTZ;
 
@@ -820,7 +841,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
 
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
 
         if( error )
             goto exit;
diff --git a/test_conformance/math_brute_force/macro_unary.c b/test_conformance/math_brute_force/macro_unary.c
index 74e1dece..95af882a 100644
--- a/test_conformance/math_brute_force/macro_unary.c
+++ b/test_conformance/math_brute_force/macro_unary.c
@@ -193,6 +193,7 @@ typedef struct TestInfo
     cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
     ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
     cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
     cl_uint     step;                               // step between each chunk and the next.
     cl_uint     scale;                              // stride between individual test values
     int         ftz;                                // non-zero if running in flush to zero mode
@@ -220,6 +221,16 @@ int TestMacro_Int_Float(const Func *f, MTdata d)
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     // cl_kernels aren't thread safe, so we make one for each vector size for every thread
@@ -279,7 +290,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d)
 
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
 
         if( error )
             goto exit;
@@ -602,6 +613,16 @@ int TestMacro_Int_Double(const Func *f, MTdata d)
     }
 
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ftz = f->ftz || gForceFTZ;
 
@@ -664,7 +685,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d)
 
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
 
         if( error )
             goto exit;
diff --git a/test_conformance/math_brute_force/mad.c b/test_conformance/math_brute_force/mad.c
index 844da097..652ab360 100644
--- a/test_conformance/math_brute_force/mad.c
+++ b/test_conformance/math_brute_force/mad.c
@@ -785,7 +785,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                 {
                     double test = ((double*) q)[j];
                     long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
-                    float err = Ulp_Error_Double( test, correct );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
                     int fail = ! (fabsf(err) <= f->double_ulps);
 
                     if( fail && ftz )
@@ -803,8 +803,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                         { // look at me,
                             long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
                             long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
@@ -826,10 +826,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                                 correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
                                 long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
                                 long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                 fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                  (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                 if( fabsf( err2 ) < fabsf(err ) )
@@ -860,14 +860,14 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                                     long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
                                     long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
                                     long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
-                                    err2 = Ulp_Error_Double( test, correct2  );
-                                    err3 = Ulp_Error_Double( test, correct3  );
-                                    err4 = Ulp_Error_Double( test, correct4  );
-                                    err5 = Ulp_Error_Double( test, correct5  );
-                                    float err6 = Ulp_Error_Double( test, correct6  );
-                                    float err7 = Ulp_Error_Double( test, correct7  );
-                                    float err8 = Ulp_Error_Double( test, correct8  );
-                                    float err9 = Ulp_Error_Double( test, correct9  );
+                                    err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                    err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                    err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                    err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                    float err6 = Bruteforce_Ulp_Error_Double( test, correct6  );
+                                    float err7 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                    float err8 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                    float err9 = Bruteforce_Ulp_Error_Double( test, correct9  );
                                     fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                      (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
                                                      (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
@@ -907,10 +907,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                                 correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
                                 long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
                                 long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                 fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                  (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                 if( fabsf( err2 ) < fabsf(err ) )
@@ -936,8 +936,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                         {
                             long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
                             long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
@@ -959,10 +959,10 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                                 correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
                                 long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
                                 long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                 fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                  (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                 if( fabsf( err2 ) < fabsf(err ) )
@@ -988,8 +988,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d)
                         {
                             long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
                             long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
diff --git a/test_conformance/math_brute_force/main.c b/test_conformance/math_brute_force/main.c
index 6e34071b..92600e9a 100644
--- a/test_conformance/math_brute_force/main.c
+++ b/test_conformance/math_brute_force/main.c
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "../../test_common/harness/parseParameters.h"
 #include "Utility.h"
 
 #include <stdio.h>
@@ -39,6 +40,8 @@
 #include <sys/param.h>
 #endif
 
+#include "../../test_common/harness/testHarness.h"
+
 #define kPageSize           4096
 #define DOUBLE_REQUIRED_FEATURES    ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM  )
 
@@ -51,10 +54,11 @@ cl_context      gContext = NULL;
 cl_command_queue gQueue = NULL;
 int             gTestCount = 0;
 int             gFailCount = 0;
-int32_t         gStartTestNumber = -1;
-int32_t         gEndTestNumber = -1;
+static int32_t  gStartTestNumber;
+static int32_t  gEndTestNumber;
 int             gSkipCorrectnessTesting = 0;
 int             gStopOnError = 0;
+static bool     gSkipRestOfTests;
 #if defined( __APPLE__ )
 int             gMeasureTimes = 1;
 #else
@@ -78,8 +82,6 @@ int             gDeviceILogb0 = 1;
 int             gDeviceILogbNaN = 1;
 int             gCheckTininessBeforeRounding = 1;
 int             gIsInRTZMode = 0;
-int             gInfNanSupport = 1;
-int             gIsEmbedded = 0;
 uint32_t        gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
 uint32_t        gMinVectorSizeIndex = 0;
 const char      *method[] = { "Best", "Average" };
@@ -100,7 +102,7 @@ uint32_t        gSimdSize = 1;
 uint32_t        gDeviceFrequency = 0;
 cl_uint         chosen_device_index = 0;
 cl_uint         chosen_platform_index = 0;
-cl_uint         gRandomSeed = 0;
+static MTdata   gMTdata;
 cl_device_fp_config gFloatCapabilities = 0;
 cl_device_fp_config gDoubleCapabilities = 0;
 int             gWimpyReductionFactor = 32;
@@ -136,11 +138,786 @@ static int IsTininessDetectedBeforeRounding( void );
 static int IsInRTZMode( void );         //expensive. Please check gIsInRTZMode global instead.
 static void TestFinishAtExit(void);
 
+
+int doTest( const char* name )
+{
+    if( gSkipRestOfTests )
+    {
+        vlog( "Skipping function because of an earlier error.\n" );
+        return 1;
+    }
+
+    int error = 0;
+    const Func* func_data = NULL;
+
+    for( size_t i = 0; i < functionListCount; i++ )
+    {
+        const Func* const temp_func = functionList + i;
+        if( strcmp( temp_func->name, name ) == 0 )
+        {
+            if( i < gStartTestNumber || i > gEndTestNumber )
+            {
+                vlog( "Skipping function #%d\n", i );
+                return 0;
+            }
+
+            func_data = temp_func;
+            break;
+        }
+    }
+
+    if( func_data == NULL )
+    {
+        vlog( "Function '%s' doesn't exist!\n", name );
+        exit( EXIT_FAILURE );
+    }
+
+    if( func_data->func.p == NULL )
+    {
+        vlog( "'%s' is missing implementation, skipping function.\n", func_data->name );
+        return 0;
+    }
+
+    // if correctly rounded divide & sqrt are supported by the implementation
+    // then test it; otherwise skip the test
+    if( strcmp( func_data->name, "sqrt_cr" ) == 0 || strcmp( func_data->name, "divide_cr" ) == 0 )
+    {
+        if( ( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 )
+        {
+            vlog( "Correctly rounded divide and sqrt are not supported, skipping function.\n" );
+            return 0;
+        }
+    }
+
+    {
+        extern int my_ilogb(double);
+        if( 0 == strcmp( "ilogb", func_data->name ) )
+        {
+            InitILogbConstants();
+        }
+
+        if ( gTestFastRelaxed )
+        {
+            if( func_data->relaxed )
+            {
+                gTestCount++;
+                vlog( "%3d: ", gTestCount );
+                if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata )  )
+                {
+                    gFailCount++;
+                    error++;
+                    if( gStopOnError )
+                    {
+                        gSkipRestOfTests = true;
+                        return error;
+                    }
+                }
+            }
+        }
+
+        if( gTestFloat )
+        {
+            int testFastRelaxedTmp = gTestFastRelaxed;
+            gTestFastRelaxed = 0;
+
+            gTestCount++;
+            vlog( "%3d: ", gTestCount );
+            if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata )  )
+            {
+                gFailCount++;
+                error++;
+                if( gStopOnError )
+                {
+                    gTestFastRelaxed = testFastRelaxedTmp;
+                    gSkipRestOfTests = true;
+                    return error;
+                }
+            }
+            gTestFastRelaxed = testFastRelaxedTmp;
+        }
+
+        if( gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p )
+        {
+            //Disable fast-relaxed-math for double precision floating-point
+            int testFastRelaxedTmp = gTestFastRelaxed;
+            gTestFastRelaxed = 0;
+
+            gTestCount++;
+            vlog( "%3d: ", gTestCount );
+            if( func_data->vtbl_ptr->DoubleTestFunc( func_data, gMTdata )  )
+            {
+                gFailCount++;
+                error++;
+                if( gStopOnError )
+                {
+                    gTestFastRelaxed = testFastRelaxedTmp;
+                    gSkipRestOfTests = true;
+                    return error;
+                }
+            }
+
+            //Re-enable testing fast-relaxed-math mode
+            gTestFastRelaxed = testFastRelaxedTmp;
+        }
+
+#if defined( __APPLE__ )
+        {
+            if( gHasBasicDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p)
+            {
+                //Disable fast-relaxed-math for double precision floating-point
+                int testFastRelaxedTmp = gTestFastRelaxed;
+                gTestFastRelaxed = 0;
+
+                int isBasicTest = 0;
+                for( j = 0; j < gNumBasicDoubleFuncs; j++ ) {
+                    if( 0 == strcmp(gBasicDoubleFuncs[j], func_data->name ) ) {
+                        isBasicTest = 1;
+                        break;
+                    }
+                }
+                if (isBasicTest) {
+                    gTestCount++;
+                    if( gTestFloat )
+                        vlog( "    " );
+                    if( func_data->vtbl->DoubleTestFunc( func_data, gMTdata )  )
+                    {
+                        gFailCount++;
+                        error++;
+                        if( gStopOnError )
+                        {
+                            gTestFastRelaxed = testFastRelaxedTmp;
+                            gSkipRestOfTests = true;
+                            return error;
+                        }
+                    }
+                }
+
+                //Re-enable testing fast-relaxed-math mode
+                gTestFastRelaxed = testFastRelaxedTmp;
+            }
+        }
+#endif
+    }
+
+    return error;
+}
+
+int test_acos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "acos" );
+}
+int test_acosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "acosh" );
+}
+int test_acospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "acospi" );
+}
+int test_asin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "asin" );
+}
+int test_asinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "asinh" );
+}
+int test_asinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "asinpi" );
+}
+int test_atan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atan" );
+}
+int test_atanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atanh" );
+}
+int test_atanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atanpi" );
+}
+int test_atan2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atan2" );
+}
+int test_atan2pi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "atan2pi" );
+}
+int test_cbrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "cbrt" );
+}
+int test_ceil( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "ceil" );
+}
+int test_copysign( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "copysign" );
+}
+int test_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "cos" );
+}
+int test_cosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "cosh" );
+}
+int test_cospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "cospi" );
+}
+int test_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "exp" );
+}
+int test_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "exp2" );
+}
+int test_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "exp10" );
+}
+int test_expm1( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "expm1" );
+}
+int test_fabs( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fabs" );
+}
+int test_fdim( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fdim" );
+}
+int test_floor( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "floor" );
+}
+int test_fma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fma" );
+}
+int test_fmax( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fmax" );
+}
+int test_fmin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fmin" );
+}
+int test_fmod( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fmod" );
+}
+int test_fract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "fract" );
+}
+int test_frexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "frexp" );
+}
+int test_hypot( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "hypot" );
+}
+int test_ilogb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "ilogb" );
+}
+int test_isequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isequal" );
+}
+int test_isfinite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isfinite" );
+}
+int test_isgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isgreater" );
+}
+int test_isgreaterequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isgreaterequal" );
+}
+int test_isinf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isinf" );
+}
+int test_isless( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isless" );
+}
+int test_islessequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "islessequal" );
+}
+int test_islessgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "islessgreater" );
+}
+int test_isnan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isnan" );
+}
+int test_isnormal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isnormal" );
+}
+int test_isnotequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isnotequal" );
+}
+int test_isordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isordered" );
+}
+int test_isunordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "isunordered" );
+}
+int test_ldexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "ldexp" );
+}
+int test_lgamma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "lgamma" );
+}
+int test_lgamma_r( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "lgamma_r" );
+}
+int test_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "log" );
+}
+int test_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "log2" );
+}
+int test_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "log10" );
+}
+int test_log1p( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "log1p" );
+}
+int test_logb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "logb" );
+}
+int test_mad( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "mad" );
+}
+int test_maxmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "maxmag" );
+}
+int test_minmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "minmag" );
+}
+int test_modf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "modf" );
+}
+int test_nan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "nan" );
+}
+int test_nextafter( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "nextafter" );
+}
+int test_pow( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "pow" );
+}
+int test_pown( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "pown" );
+}
+int test_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "powr" );
+}
+int test_remainder( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "remainder" );
+}
+int test_remquo( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "remquo" );
+}
+int test_rint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "rint" );
+}
+int test_rootn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "rootn" );
+}
+int test_round( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "round" );
+}
+int test_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "rsqrt" );
+}
+int test_signbit( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "signbit" );
+}
+int test_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sin" );
+}
+int test_sincos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sincos" );
+}
+int test_sinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sinh" );
+}
+int test_sinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sinpi" );
+}
+int test_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sqrt" );
+}
+int test_sqrt_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "sqrt_cr" );
+}
+int test_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "tan" );
+}
+int test_tanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "tanh" );
+}
+int test_tanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "tanpi" );
+}
+int test_trunc( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "trunc" );
+}
+int test_half_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_cos" );
+}
+int test_half_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_divide" );
+}
+int test_half_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_exp" );
+}
+int test_half_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_exp2" );
+}
+int test_half_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_exp10" );
+}
+int test_half_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_log" );
+}
+int test_half_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_log2" );
+}
+int test_half_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_log10" );
+}
+int test_half_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_powr" );
+}
+int test_half_recip( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_recip" );
+}
+int test_half_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_rsqrt" );
+}
+int test_half_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_sin" );
+}
+int test_half_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_sqrt" );
+}
+int test_half_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "half_tan" );
+}
+int test_add( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "add" );
+}
+int test_subtract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "subtract" );
+}
+int test_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "divide" );
+}
+int test_divide_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "divide_cr" );
+}
+int test_multiply( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "multiply" );
+}
+int test_assignment( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "assignment" );
+}
+int test_not( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return doTest( "not" );
+}
+
+basefn basefn_list[] = {
+    test_acos,
+    test_acosh,
+    test_acospi,
+    test_asin,
+    test_asinh,
+    test_asinpi,
+    test_atan,
+    test_atanh,
+    test_atanpi,
+    test_atan2,
+    test_atan2pi,
+    test_cbrt,
+    test_ceil,
+    test_copysign,
+    test_cos,
+    test_cosh,
+    test_cospi,
+    test_exp,
+    test_exp2,
+    test_exp10,
+    test_expm1,
+    test_fabs,
+    test_fdim,
+    test_floor,
+    test_fma,
+    test_fmax,
+    test_fmin,
+    test_fmod,
+    test_fract,
+    test_frexp,
+    test_hypot,
+    test_ilogb,
+    test_isequal,
+    test_isfinite,
+    test_isgreater,
+    test_isgreaterequal,
+    test_isinf,
+    test_isless,
+    test_islessequal,
+    test_islessgreater,
+    test_isnan,
+    test_isnormal,
+    test_isnotequal,
+    test_isordered,
+    test_isunordered,
+    test_ldexp,
+    test_lgamma,
+    test_lgamma_r,
+    test_log,
+    test_log2,
+    test_log10,
+    test_log1p,
+    test_logb,
+    test_mad,
+    test_maxmag,
+    test_minmag,
+    test_modf,
+    test_nan,
+    test_nextafter,
+    test_pow,
+    test_pown,
+    test_powr,
+    test_remainder,
+    test_remquo,
+    test_rint,
+    test_rootn,
+    test_round,
+    test_rsqrt,
+    test_signbit,
+    test_sin,
+    test_sincos,
+    test_sinh,
+    test_sinpi,
+    test_sqrt,
+    test_sqrt_cr,
+    test_tan,
+    test_tanh,
+    test_tanpi,
+    test_trunc,
+    test_half_cos,
+    test_half_divide,
+    test_half_exp,
+    test_half_exp2,
+    test_half_exp10,
+    test_half_log,
+    test_half_log2,
+    test_half_log10,
+    test_half_powr,
+    test_half_recip,
+    test_half_rsqrt,
+    test_half_sin,
+    test_half_sqrt,
+    test_half_tan,
+    test_add,
+    test_subtract,
+    test_divide,
+    test_divide_cr,
+    test_multiply,
+    test_assignment,
+    test_not,
+};
+
+const char *basefn_names[] = {
+    "acos",
+    "acosh",
+    "acospi",
+    "asin",
+    "asinh",
+    "asinpi",
+    "atan",
+    "atanh",
+    "atanpi",
+    "atan2",
+    "atan2pi",
+    "cbrt",
+    "ceil",
+    "copysign",
+    "cos",
+    "cosh",
+    "cospi",
+    "exp",
+    "exp2",
+    "exp10",
+    "expm1",
+    "fabs",
+    "fdim",
+    "floor",
+    "fma",
+    "fmax",
+    "fmin",
+    "fmod",
+    "fract",
+    "frexp",
+    "hypot",
+    "ilogb",
+    "isequal",
+    "isfinite",
+    "isgreater",
+    "isgreaterequal",
+    "isinf",
+    "isless",
+    "islessequal",
+    "islessgreater",
+    "isnan",
+    "isnormal",
+    "isnotequal",
+    "isordered",
+    "isunordered",
+    "ldexp",
+    "lgamma",
+    "lgamma_r",
+    "log",
+    "log2",
+    "log10",
+    "log1p",
+    "logb",
+    "mad",
+    "maxmag",
+    "minmag",
+    "modf",
+    "nan",
+    "nextafter",
+    "pow",
+    "pown",
+    "powr",
+    "remainder",
+    "remquo",
+    "rint",
+    "rootn",
+    "round",
+    "rsqrt",
+    "signbit",
+    "sin",
+    "sincos",
+    "sinh",
+    "sinpi",
+    "sqrt",
+    "sqrt_cr",
+    "tan",
+    "tanh",
+    "tanpi",
+    "trunc",
+    "half_cos",
+    "half_divide",
+    "half_exp",
+    "half_exp2",
+    "half_exp10",
+    "half_log",
+    "half_log2",
+    "half_log10",
+    "half_powr",
+    "half_recip",
+    "half_rsqrt",
+    "half_sin",
+    "half_sqrt",
+    "half_tan",
+    "add",
+    "subtract",
+    "divide",
+    "divide_cr",
+    "multiply",
+    "assignment",
+    "not",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+const int num_fns = sizeof(basefn_names) / sizeof(char *);
+
 #pragma mark -
 
 int main (int argc, const char * argv[])
 {
-    unsigned int i, j, error = 0;
+    int error;
 
     test_start();
     atexit(TestFinishAtExit);
@@ -178,7 +955,7 @@ int main (int argc, const char * argv[])
             vlog( "   \t                                        ");
         if( gWimpyMode )
             vlog( "   " );
-        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        for( int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
             vlog( "\t  float%s", sizeNames[i] );
     }
     else
@@ -192,147 +969,21 @@ int main (int argc, const char * argv[])
 
     vlog( "\n-----------------------------------------------------------------------------------------------------------\n" );
 
-    uint32_t start = 0;
-    if( gStartTestNumber > (int) start )
+    gMTdata = init_genrand( gRandomSeed );
+    if( gEndTestNumber == 0 )
     {
-        vlog( "Skipping to test %d...\n", gStartTestNumber );
-        start = gStartTestNumber;
+        gEndTestNumber = functionListCount;
     }
 
-    uint32_t stop = (uint32_t) functionListCount;
-    MTdata d = init_genrand( gRandomSeed );
-    if( gStartTestNumber <= gEndTestNumber && -1 != gEndTestNumber && (int) functionListCount > gEndTestNumber + 1)
-        stop = gEndTestNumber + 1;
-
     FPU_mode_type oldMode;
     DisableFTZ( &oldMode );
 
-    for( i = start; i < stop; i++ )
-    {
-        const Func *f = functionList + i;
-
-        // If the user passed a list of functions to run, make sure we are in that list
-        if( gTestNameCount )
-        {
-            for( j = 0; j < gTestNameCount; j++ )
-                if( 0 == strcmp(gTestNames[j], f->name ) )
-                    break;
-
-            // If this function doesn't match any on the list skip to the next function
-            if( j == gTestNameCount )
-                continue;
-        }
-
-        // if correctly rounded divide & sqrt are supported by the implementation
-        // then test it; otherwise skip the test
-        if (!strcmp(f->name, "sqrt_cr") || !strcmp(f->name, "divide_cr"))
-        {
-            if(( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 )
-                continue;
-
-        }
-
-
-        {
-            extern int my_ilogb(double);
-            if( 0 == strcmp( "ilogb", f->name) )
-                InitILogbConstants();
-
-            if ( gTestFastRelaxed )
-            {
-                if( f->relaxed )
-                {
-                    gTestCount++;
-                    vlog( "%3d: ", gTestCount );
-                    if( f->vtbl->TestFunc( f, d )  )
-                    {
-                        gFailCount++;
-                        error++;
-                        if( gStopOnError )
-                            break;
-                    }
-                }
-                }
-
-            if( gTestFloat )
-            {
-                int testFastRelaxedTmp = gTestFastRelaxed;
-                gTestFastRelaxed = 0;
-                gTestCount++;
-                vlog( "%3d: ", gTestCount );
-                if( f->vtbl->TestFunc( f, d )  )
-                {
-                    gFailCount++;
-                    error++;
-                    if( gStopOnError )
-                    {
-                        gTestFastRelaxed = testFastRelaxedTmp;
-                        break;
-                    }
-                }
-                gTestFastRelaxed = testFastRelaxedTmp;
-            }
-
-            if( gHasDouble && NULL != f->vtbl->DoubleTestFunc && NULL != f->dfunc.p )
-            {
-                //Disable fast-relaxed-math for double precision floating-point
-                int testFastRelaxedTmp = gTestFastRelaxed;
-                gTestFastRelaxed = 0;
-
-                gTestCount++;
-                vlog( "%3d: ", gTestCount );
-                if( f->vtbl->DoubleTestFunc( f, d )  )
-                {
-                    gFailCount++;
-                    error++;
-                    if( gStopOnError )
-                        break;
-                }
-
-                //Re-enable testing fast-relaxed-math mode
-                gTestFastRelaxed = testFastRelaxedTmp;
-            }
-
-#if defined( __APPLE__ )
-            {
-                if( gHasBasicDouble && NULL != f->vtbl->DoubleTestFunc && NULL != f->dfunc.p)
-                {
-                    //Disable fast-relaxed-math for double precision floating-point
-                    int testFastRelaxedTmp = gTestFastRelaxed;
-                    gTestFastRelaxed = 0;
-
-                    int isBasicTest = 0;
-                    for( j = 0; j < gNumBasicDoubleFuncs; j++ ) {
-                        if( 0 == strcmp(gBasicDoubleFuncs[j], f->name ) ) {
-                            isBasicTest = 1;
-                            break;
-                        }
-                    }
-                    if (isBasicTest) {
-                        gTestCount++;
-                        if( gTestFloat )
-                            vlog( "    " );
-                        if( f->vtbl->DoubleTestFunc( f, d )  )
-                        {
-                            gFailCount++;
-                            error++;
-                            if( gStopOnError )
-                                break;
-                        }
-                    }
-
-                    //Re-enable testing fast-relaxed-math mode
-                    gTestFastRelaxed = testFastRelaxedTmp;
-                }
-            }
-#endif
-        }
-    }
+    int ret = parseAndCallCommandLineTests( gTestNameCount, gTestNames, NULL, num_fns, basefn_list, basefn_names, true, 0, 0 );
 
     RestoreFPState( &oldMode );
 
-    free_mtdata(d); d = NULL;
-    vlog( "\ndone.\n" );
+    free_mtdata(gMTdata);
+    free(gTestNames);
 
     int error_code = clFinish(gQueue);
     if (error_code)
@@ -341,16 +992,16 @@ int main (int argc, const char * argv[])
     if (gFailCount == 0)
     {
         if (gTestCount > 1)
-            vlog("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+            vlog("PASSED %d of %d sub-tests.\n", gTestCount, gTestCount);
         else
-            vlog("PASSED test.\n");
+            vlog("PASSED sub-test.\n");
     }
     else if (gFailCount > 0)
     {
         if (gTestCount > 1)
-            vlog_error("FAILED %d of %d tests.\n", gFailCount, gTestCount);
+            vlog_error("FAILED %d of %d sub-tests.\n", gFailCount, gTestCount);
         else
-            vlog_error("FAILED test.\n");
+            vlog_error("FAILED sub-test.\n");
     }
 
     ReleaseCL();
@@ -363,23 +1014,22 @@ int main (int argc, const char * argv[])
     vlog( "time: %f s\n", time );
 #endif
 
-
-    if (gFailCount > 0)
-        return -1;
-    return error;
+    return ret;
 }
 
 static int ParseArgs( int argc, const char **argv )
 {
     int i;
     gTestNames = (const char**) calloc( argc - 1, sizeof( char*) );
-    gTestNameCount = 0;
+    if( NULL == gTestNames )
+    {
+        vlog( "Failed to allocate memory for gTestNames array.\n" );
+        return 1;
+    }
+    gTestNames[0] = argv[0];
+    gTestNameCount = 1;
     int singleThreaded = 0;
 
-    // Parse arg list
-    if( NULL == gTestNames && argc > 1 )
-        return -1;
-
     { // Extract the app name
         strncpy( appName, argv[0], MAXPATHLEN );
 
@@ -490,24 +1140,7 @@ static int ParseArgs( int argc, const char **argv )
                         break;
 
                     case '[':
-                        // wimpy reduction factor can be set with the option -[2^n]
-                        // Default factor is 32, and n practically can be from 1 to 10
-                        {
-                            const char *arg_temp = strchr(&arg[1], ']');
-                            if( arg_temp != 0)
-                            {
-                                int new_factor = atoi(&arg[1]);
-                                arg=arg_temp; // Advance until ']'
-                                if(new_factor && !(new_factor & (new_factor - 1)))
-                                {
-                                    vlog( " WimpyReduction factor changed from %d to %d \n",gWimpyReductionFactor, new_factor);
-                                    gWimpyReductionFactor = new_factor;
-                                }else
-                                {
-                                     vlog( " Error in WimpyReduction factor %d, must be power of 2 \n",gWimpyReductionFactor);
-                                }
-                            }
-                        }
+                        parseWimpyReductionFactor(arg, gWimpyReductionFactor);
                         break;
 
                     case 'z':
@@ -573,7 +1206,7 @@ static int ParseArgs( int argc, const char **argv )
             long number = strtol( arg, &t, 0 );
             if( t != arg )
             {
-                if( -1 == gStartTestNumber )
+                if( 0 == gStartTestNumber )
                     gStartTestNumber = (int32_t) number;
                 else
                     gEndTestNumber = gStartTestNumber + (int32_t) number;
@@ -596,19 +1229,18 @@ static int ParseArgs( int argc, const char **argv )
                 if (k >= functionListCount)
                 {
                     //It may be a device type or rundomize parameter
-                  if( 0 == strcmp(arg, "CL_DEVICE_TYPE_CPU")) {
+                    if( 0 == strcmp(arg, "CL_DEVICE_TYPE_CPU")) {
                         gDeviceType = CL_DEVICE_TYPE_CPU;
-                } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_GPU")) {
+                    } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_GPU")) {
                         gDeviceType = CL_DEVICE_TYPE_GPU;
-                } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_ACCELERATOR")) {
+                    } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_ACCELERATOR")) {
                         gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
-                } else if( 0 == strcmp(arg, "randomize")) {
+                    } else if( 0 == strcmp(arg, "randomize")) {
                         gRandomSeed = (cl_uint) time( NULL );
                         vlog( "\nRandom seed: %u.\n", gRandomSeed );
-                } else {
-                        vlog_error("\nInvalid function name: %s\n", arg);
-                              test_finish();
-                              exit(-1);
+                    } else {
+                        gTestNames[gTestNameCount] = arg;
+                        gTestNameCount++;
                     }
                 }
             }
@@ -690,6 +1322,8 @@ static void PrintArch( void )
         vlog( "\tARCH:\tx86_64\n" );
     #elif defined( __arm__ )
         vlog( "\tARCH:\tarm\n" );
+    #elif defined( __aarch64__ )
+        vlog( "\tARCH:\taarch64\n");
     #else
         vlog( "\tARCH:\tunknown\n" );
     #endif
@@ -770,7 +1404,7 @@ static void PrintUsage( void )
     vlog( "\n" );
 }
 
-static void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
 {
     vlog( "%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data );
 }
@@ -979,7 +1613,7 @@ static int InitCL( void )
     else
         isEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE"); // we will verify this with a kernel below
 
-    gContext = clCreateContext( NULL, 1, &gDevice, notify_callback, NULL, &error );
+    gContext = clCreateContext( NULL, 1, &gDevice, bruteforce_notify_callback, NULL, &error );
     if( NULL == gContext || error )
     {
         vlog_error( "clCreateContext failed. (%d) \n", error );
@@ -1137,7 +1771,7 @@ static int InitCL( void )
         vlog( "\t\t         All double results that do not match the reference result have their reported\n" );
         vlog( "\t\t         error inflated by 0.5 ulps to account for the fact that this system\n" );
         vlog( "\t\t         can not accurately represent the right result to an accuracy closer\n" );
-        vlog( "\t\t         than half an ulp. See comments in Ulp_Error_Double() for more details.\n\n" );
+        vlog( "\t\t         than half an ulp. See comments in Bruteforce_Ulp_Error_Double() for more details.\n\n" );
     }
 #if defined( __APPLE__ )
     vlog( "\tTesting basic double precision? %s\n", no_yes[0 != gHasBasicDouble] );
@@ -1512,7 +2146,7 @@ const char *sizeNames[ VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
 const int  sizeValues[ VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
 
 // TODO: There is another version of Ulp_Error_Double defined in test_common/harness/errorHelpers.c
-float Ulp_Error_Double( double test, long double reference )
+float Bruteforce_Ulp_Error_Double( double test, long double reference )
 {
 //Check for Non-power-of-two and NaN
 
@@ -1609,74 +2243,6 @@ float Ulp_Error_Double( double test, long double reference )
     return result;
 }
 
-
-float Ulp_Error( float test, double reference )
-{
-    union{ double d; uint64_t u; }u;     u.d = reference;
-    double testVal = test;
-
-  // Note: This function presumes that someone has already tested whether the result is correctly,
-  // rounded before calling this function.  That test:
-  //
-  //    if( (float) reference == test )
-  //        return 0.0f;
-  //
-  // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
-  // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
-  // results.
-
-
-    if( isinf( reference ) )
-    {
-        if( testVal == reference )
-            return 0.0f;
-
-        return (float) (testVal - reference );
-    }
-
-    if( isinf( testVal) )
-    { // infinite test value, but finite (but possibly overflowing in float) reference.
-      //
-      // The function probably overflowed prematurely here. Formally, the spec says this is
-      // an infinite ulp error and should not be tolerated. Unfortunately, this would mean
-      // that the internal precision of some half_pow implementations would have to be 29+ bits
-      // at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
-      // is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
-      // after rounding to single is 4*32 = 128, which will ultimately result in premature
-      // overflow, even though a good faith representation would be correct to within 2**-29
-      // interally.
-
-      // In the interest of not requiring the implementation go to extraordinary lengths to
-      // deliver a half precision function, we allow premature overflow within the limit
-      // of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
-      // 2**128, the next value that would appear in the number line if float had sufficient range.
-        testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
-
-      // Note that the same hack may not work in long double, which is not guaranteed to have
-      // more range than double.  It is not clear that premature overflow should be tolerated for
-      // double.
-    }
-
-    if( u.u & 0x000fffffffffffffULL )
-    { // Non-power of two and NaN
-        if( isnan( reference ) && isnan( test ) )
-            return 0.0f;    // if we are expecting a NaN, any NaN is fine
-
-        // The unbiased exponent of the ulp unit place
-        int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
-
-        // Scale the exponent of the error
-        return (float) scalbn( testVal - reference, ulp_exp );
-    }
-
-    // reference is a normal power of two or a zero
-    // The unbiased exponent of the ulp unit place
-    int ulp_exp =  FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
-
-    // Scale the exponent of the error
-    return (float) scalbn( testVal - reference, ulp_exp );
-}
-
 float Abs_Error( float test, double reference )
 {
   if( isnan(test) && isnan(reference) )
@@ -1786,25 +2352,6 @@ cl_uint RoundUpToNextPowerOfTwo( cl_uint x )
     return x+x;
 }
 
-#if !defined( __APPLE__ )
-void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
-{
-  uint32_t pat = ((uint32_t*) src_pattern)[0];
-  size_t count = bytes / 4;
-  size_t i;
-  uint32_t *d = (uint32_t*)dest;
-
-  for( i = 0; i < count; i++ )
-    d[i] = pat;
-
-  d += i;
-
-  bytes &= 3;
-  if( bytes )
-    memcpy( d, src_pattern, bytes );
-}
-#endif
-
 void TestFinishAtExit(void) {
   test_finish();
 }
diff --git a/test_conformance/math_brute_force/reference_math.c b/test_conformance/math_brute_force/reference_math.c
index d90f4508..6e8f79aa 100644
--- a/test_conformance/math_brute_force/reference_math.c
+++ b/test_conformance/math_brute_force/reference_math.c
@@ -1790,7 +1790,7 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
 //    *signgamp = 1;
     ix = hx&0x7fffffff;
     if(ix>=0x7ff00000) return x*x;
-    if((ix|lx)==0) return one/zero;
+    if((ix|lx)==0) return INFINITY;
     if(ix<0x3b900000) {    /* |x|<2**-70, return -log(|x|) */
         if(hx<0) {
 //            *signgamp = -1;
@@ -1799,9 +1799,9 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
     }
     if(hx<0) {
         if(ix>=0x43300000)     /* |x|>=2**52, must be -integer */
-        return one/zero;
+        return INFINITY;
         t = reference_sinpi(x);
-        if(t==zero) return one/zero; /* -integer */
+        if(t==zero) return INFINITY; /* -integer */
         nadj = reference_log(pi/reference_fabs(t*x));
 //        if(t<zero) *signgamp = -1;
         x = -x;
diff --git a/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py b/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
old mode 100644
new mode 100755
diff --git a/test_conformance/math_brute_force/ternary.c b/test_conformance/math_brute_force/ternary.c
index a90271d9..8ac48d64 100644
--- a/test_conformance/math_brute_force/ternary.c
+++ b/test_conformance/math_brute_force/ternary.c
@@ -1010,7 +1010,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                 {
                     double test = ((double*) q)[j];
                     long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
-                    float err = Ulp_Error_Double( test, correct );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
                     int fail = ! (fabsf(err) <= f->double_ulps);
 
                     if( fail && ftz )
@@ -1028,8 +1028,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                         { // look at me,
                             long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
                             long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
@@ -1051,10 +1051,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                                 correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
                                 long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
                                 long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                 fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                  (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                 if( fabsf( err2 ) < fabsf(err ) )
@@ -1085,14 +1085,14 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                                     long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
                                     long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
                                     long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
-                                    err2 = Ulp_Error_Double( test, correct2  );
-                                    err3 = Ulp_Error_Double( test, correct3  );
-                                    err4 = Ulp_Error_Double( test, correct4  );
-                                    err5 = Ulp_Error_Double( test, correct5  );
-                                    float err6 = Ulp_Error_Double( test, correct6  );
-                                    float err7 = Ulp_Error_Double( test, correct7  );
-                                    float err8 = Ulp_Error_Double( test, correct8  );
-                                    float err9 = Ulp_Error_Double( test, correct9  );
+                                    err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                    err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                    err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                    err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
+                                    float err6 = Bruteforce_Ulp_Error_Double( test, correct6  );
+                                    float err7 = Bruteforce_Ulp_Error_Double( test, correct7  );
+                                    float err8 = Bruteforce_Ulp_Error_Double( test, correct8  );
+                                    float err9 = Bruteforce_Ulp_Error_Double( test, correct9  );
                                     fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                      (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
                                                      (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
@@ -1132,10 +1132,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                                 correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
                                 long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
                                 long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                 fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                  (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                 if( fabsf( err2 ) < fabsf(err ) )
@@ -1161,8 +1161,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                         {
                             long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
                             long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
@@ -1184,10 +1184,10 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                                 correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
                                 long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
                                 long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
-                                err2 = Ulp_Error_Double( test, correct2  );
-                                err3 = Ulp_Error_Double( test, correct3  );
-                                float err4 = Ulp_Error_Double( test, correct4  );
-                                float err5 = Ulp_Error_Double( test, correct5  );
+                                err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                                err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                                float err4 = Bruteforce_Ulp_Error_Double( test, correct4  );
+                                float err5 = Bruteforce_Ulp_Error_Double( test, correct5  );
                                 fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
                                                  (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
                                 if( fabsf( err2 ) < fabsf(err ) )
@@ -1213,8 +1213,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
                         {
                             long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
                             long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
diff --git a/test_conformance/math_brute_force/unary.c b/test_conformance/math_brute_force/unary.c
index d8b3dbae..fc802327 100644
--- a/test_conformance/math_brute_force/unary.c
+++ b/test_conformance/math_brute_force/unary.c
@@ -200,6 +200,7 @@ typedef struct TestInfo
     cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
     ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
     cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     jobCount;                           // Number of jobs
     cl_uint     step;                               // step between each chunk and the next.
     cl_uint     scale;                              // stride between individual test values
     float       ulps;                               // max_allowed ulps
@@ -234,6 +235,16 @@ int TestFunc_Float_Float(const Func *f, MTdata d)
         test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
     }
     test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
     test_info.f = f;
     test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
     test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -309,7 +320,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d)
 
     if( !gSkipCorrectnessTesting || skipTestingRelaxed)
     {
-        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestFloat, test_info.jobCount, &test_info );
 
         // Accumulate the arithmetic errors
         for( i = 0; i < test_info.threadCount; i++ )
@@ -892,7 +903,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
             {
                 cl_double test = ((cl_double*) q)[j];
                 long double correct = func.f_f( s[j] );
-                float err = Ulp_Error_Double( test, correct );
+                float err = Bruteforce_Ulp_Error_Double( test, correct );
                 int fail = ! (fabsf(err) <= ulps);
 
                 if( fail )
@@ -912,8 +923,8 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
                         {
                             long double correct2 = func.f_f( 0.0L );
                             long double correct3 = func.f_f( -0.0L );
-                            float err2 = Ulp_Error_Double( test, correct2  );
-                            float err3 = Ulp_Error_Double( test, correct3  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct2  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct3  );
                             fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
                             if( fabsf( err2 ) < fabsf(err ) )
                                 err = err2;
@@ -997,7 +1008,16 @@ int TestFunc_Double_Double(const Func *f, MTdata d)
         test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
     }
-   test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        //there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
 
     test_info.f = f;
     test_info.ulps = f->double_ulps;
@@ -1062,7 +1082,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d)
 
     if( !gSkipCorrectnessTesting )
     {
-        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+        error = ThreadPool_Do( TestDouble, test_info.jobCount, &test_info );
 
         // Accumulate the arithmetic errors
         for( i = 0; i < test_info.threadCount; i++ )
diff --git a/test_conformance/math_brute_force/unary_two_results.c b/test_conformance/math_brute_force/unary_two_results.c
index a3fb307d..26756edf 100644
--- a/test_conformance/math_brute_force/unary_two_results.c
+++ b/test_conformance/math_brute_force/unary_two_results.c
@@ -800,8 +800,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d)
                     double test2 = ((double*) q2)[j];
                     long double correct2;
                     long double correct = f->dfunc.f_fpf( s[j], &correct2 );
-                    float err = Ulp_Error_Double( test, correct );
-                    float err2 = Ulp_Error_Double( test2, correct2 );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
+                    float err2 = Bruteforce_Ulp_Error_Double( test2, correct2 );
                     int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps);
                     if( ftz )
                     {
@@ -837,10 +837,10 @@ int TestFunc_Double2_Double(const Func *f, MTdata d)
                             long double correct2p, correct2n;
                             long double correctp = f->dfunc.f_fpf( 0.0, &correct2p );
                             long double correctn = f->dfunc.f_fpf( -0.0, &correct2n );
-                            float errp = Ulp_Error_Double( test, correctp  );
-                            float err2p = Ulp_Error_Double( test, correct2p  );
-                            float errn = Ulp_Error_Double( test, correctn  );
-                            float err2n = Ulp_Error_Double( test, correct2n  );
+                            float errp = Bruteforce_Ulp_Error_Double( test, correctp  );
+                            float err2p = Bruteforce_Ulp_Error_Double( test, correct2p  );
+                            float errn = Bruteforce_Ulp_Error_Double( test, correctn  );
+                            float err2n = Bruteforce_Ulp_Error_Double( test, correct2n  );
                             fail =  fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps))    &&
                                             ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) );
                             if( fabsf( errp ) < fabsf(err ) )
diff --git a/test_conformance/math_brute_force/unary_two_results_i.c b/test_conformance/math_brute_force/unary_two_results_i.c
index c4eea8f1..cdd24f8e 100644
--- a/test_conformance/math_brute_force/unary_two_results_i.c
+++ b/test_conformance/math_brute_force/unary_two_results_i.c
@@ -633,7 +633,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d)
                     double test = ((double*) q)[j];
                     int correct2 = INT_MIN;
                     long double correct = f->dfunc.f_fpI( s[j], &correct2 );
-                    float err = Ulp_Error_Double( test, correct );
+                    float err = Bruteforce_Ulp_Error_Double( test, correct );
                     cl_long iErr = (long long) q2[j] - (long long) correct2;
                     int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError );
                     if( ftz )
@@ -652,8 +652,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d)
                             int correct5, correct6;
                             long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 );
                             long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 );
-                            float err2 = Ulp_Error_Double( test, correct3  );
-                            float err3 = Ulp_Error_Double( test, correct4  );
+                            float err2 = Bruteforce_Ulp_Error_Double( test, correct3  );
+                            float err3 = Bruteforce_Ulp_Error_Double( test, correct4  );
                             cl_long iErr2 = (long long) q2[j] - (long long) correct5;
                             cl_long iErr3 = (long long) q2[j] - (long long) correct6;
 
diff --git a/test_conformance/math_brute_force/unary_u.c b/test_conformance/math_brute_force/unary_u.c
index 131d1e7e..336fdd24 100644
--- a/test_conformance/math_brute_force/unary_u.c
+++ b/test_conformance/math_brute_force/unary_u.c
@@ -567,7 +567,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d)
                 {
                     double test = ((double*) q)[j];
                     long double correct = f->dfunc.f_u( s[j] );
-                    float err = Ulp_Error_Double(test, correct);
+                    float err = Bruteforce_Ulp_Error_Double(test, correct);
                     int fail = ! (fabsf(err) <= f->double_ulps);
 
                     // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
index d9366293..16ddad44 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
@@ -613,6 +613,28 @@ void TestNonUniformWorkGroup::showTestInfo () {
   }
 }
 
+size_t TestNonUniformWorkGroup::adjustLocalArraySize (size_t localArraySize) {
+  // In case if localArraySize is too big, sometimes we can not run kernel because of lack
+  // of resources due to kernel itself requires some local memory to run
+  int err;
+
+  cl_ulong kernelLocalMemSize = 0;
+  err = clGetKernelWorkGroupInfo(_testKernel, _device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalMemSize), &kernelLocalMemSize, NULL);
+  test_error(err, "clGetKernelWorkGroupInfo failed");
+
+  cl_ulong deviceLocalMemSize = 0;
+  err = clGetDeviceInfo(_device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMemSize), &deviceLocalMemSize, NULL);
+  test_error(err, "clGetDeviceInfo failed");
+
+  if (kernelLocalMemSize + localArraySize > deviceLocalMemSize) {
+    size_t adjustedLocalArraySize = deviceLocalMemSize - kernelLocalMemSize;
+    log_info("localArraySize was adjusted from %lu to %lu\n", localArraySize, adjustedLocalArraySize);
+    localArraySize = adjustedLocalArraySize;
+  }
+
+  return localArraySize;
+}
+
 int TestNonUniformWorkGroup::runKernel () {
   int err;
 
@@ -630,7 +652,8 @@ int TestNonUniformWorkGroup::runKernel () {
   test_error(err, "clSetKernelArg failed");
 
   //creating local buffer
-  err = clSetKernelArg(_testKernel, 1, localArraySize*sizeof(unsigned int), NULL);
+  localArraySize = adjustLocalArraySize(localArraySize*sizeof(unsigned int));
+  err = clSetKernelArg(_testKernel, 1, localArraySize, NULL);
   test_error(err, "clSetKernelArg failed");
 
   clMemWrapper testGlobalArray = clCreateBuffer(_context, CL_MEM_READ_WRITE, _numOfGlobalWorkItems*sizeof(cl_uint), NULL, &err);
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
index e7f261b5..a077296d 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
@@ -116,6 +116,7 @@ private:
   void verifyData (DataContainerAttrib * reference, DataContainerAttrib * results, short regionNumber);
   void calculateExpectedValues ();
   void showTestInfo ();
+  size_t adjustLocalArraySize(size_t localArraySize);
 };
 
 // Class responsible for running subtest scenarios in test function
diff --git a/test_conformance/printf/test_printf.c b/test_conformance/printf/test_printf.c
index a5c5d585..cd5f38cd 100644
--- a/test_conformance/printf/test_printf.c
+++ b/test_conformance/printf/test_printf.c
@@ -17,6 +17,7 @@
 
 #include <string.h>
 #include <errno.h>
+#include <memory>
 
 #if ! defined( _WIN32)
 #if ! defined( __ANDROID__ )
@@ -356,9 +357,6 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
 //-----------------------------------------
 static bool isLongSupported(cl_device_id device_id)
 {
-    //profile type && device extention for long support checking
-    char *profileType = NULL,*devExt = NULL;
-
     size_t tempSize = 0;
     cl_int status;
     bool extSupport = true;
@@ -377,7 +375,7 @@ static bool isLongSupported(cl_device_id device_id)
         return false;
     }
 
-    profileType = new char[tempSize];
+    std::unique_ptr<char[]> profileType(new char[tempSize]);
     if(profileType == NULL)
     {
         log_error("Failed to allocate memory(profileType)");
@@ -388,11 +386,11 @@ static bool isLongSupported(cl_device_id device_id)
         device_id,
         CL_DEVICE_PROFILE,
         sizeof(char) * tempSize,
-        profileType,
+        profileType.get(),
         NULL);
 
 
-    if(!strcmp("EMBEDDED_PROFILE",profileType))
+    if(!strcmp("EMBEDDED_PROFILE",profileType.get()))
     {
         // Device extention
         status = clGetDeviceInfo(
@@ -408,7 +406,7 @@ static bool isLongSupported(cl_device_id device_id)
             return false;
         }
 
-        devExt = new char[tempSize];
+        std::unique_ptr<char[]> devExt(new char[tempSize]);
         if(devExt == NULL)
         {
             log_error("Failed to allocate memory(devExt)");
@@ -419,13 +417,10 @@ static bool isLongSupported(cl_device_id device_id)
             device_id,
             CL_DEVICE_EXTENSIONS,
             sizeof(char) * tempSize,
-            devExt,
+            devExt.get(),
             NULL);
 
-        extSupport  = (strstr(devExt,"cles_khr_int64") != NULL);
-
-        delete devExt;
-        delete profileType;
+        extSupport  = (strstr(devExt.get(),"cles_khr_int64") != NULL);
     }
     return extSupport;
 }
@@ -501,7 +496,8 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
     int err;
     cl_program program;
     cl_kernel  kernel;
-    cl_mem d_out;
+    cl_mem d_out = NULL;
+    cl_mem d_a = NULL;
     char _analysisBuffer[ANALYSIS_BUFFER_SIZE];
     cl_uint out32 = 0;
     cl_ulong out64 = 0;
@@ -523,7 +519,7 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
         if(isKernelArgument(allTestCase[testId],testNum))
         {
             int a = 2;
-            cl_mem d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
+            d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
                 sizeof(int), &a, &err);
             if(err!= CL_SUCCESS || d_a == NULL) {
                 log_error("clCreateBuffer failed\n");
@@ -613,6 +609,10 @@ exit:
         log_error("clReleaseKernel failed\n");
     if(clReleaseProgram(program) != CL_SUCCESS)
         log_error("clReleaseProgram failed\n");
+    if(d_out)
+	    clReleaseMemObject(d_out);
+    if(d_a)
+	    clReleaseMemObject(d_a);
     ++s_test_cnt;
 
 
@@ -639,6 +639,8 @@ static void printArch( void )
     log_info( "ARCH:\tx86_64\n" );
 #elif defined( __arm__ )
     log_info( "ARCH:\tarm\n" );
+#elif defined( __aarch64__ )
+    log_info( "ARCH:\taarch64\n" );
 #else
 #error unknown arch
 #endif
@@ -757,7 +759,6 @@ int test_float_15(cl_device_id deviceID, cl_context context, cl_command_queue qu
 {
     return doTest(gQueue, gContext, TYPE_FLOAT, 15, gDevice);
 }
-#if ! defined( __ANDROID__ )
 int test_float_16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return doTest(gQueue, gContext, TYPE_FLOAT, 16, gDevice);
@@ -766,7 +767,6 @@ int test_float_17(cl_device_id deviceID, cl_context context, cl_command_queue qu
 {
     return doTest(gQueue, gContext, TYPE_FLOAT, 17, gDevice);
 }
-#endif
 int test_float_18(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return doTest(gQueue, gContext, TYPE_FLOAT, 18, gDevice);
@@ -857,10 +857,6 @@ int test_string_2(cl_device_id deviceID, cl_context context, cl_command_queue qu
 {
     return doTest(gQueue, gContext, TYPE_STRING, 2, gDevice);
 }
-int test_string_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_STRING, 3, gDevice);
-}
 
 
 int test_vector_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
@@ -933,10 +929,8 @@ basefn basefn_list[] = {
     test_float_13,
     test_float_14,
     test_float_15,
-#if ! defined( __ANDROID__ )
     test_float_16,
     test_float_17,
-#endif
     test_float_18,
     test_float_19,
     test_float_20,
@@ -962,7 +956,6 @@ basefn basefn_list[] = {
     test_string_0,
     test_string_1,
     test_string_2,
-    test_string_3,
 
     test_vector_0,
     test_vector_1,
@@ -1004,10 +997,8 @@ const char *basefn_names[] = {
     "float_13",
     "float_14",
     "float_15",
-#if ! defined( __ANDROID__ )
     "float_16",
     "float_17",
-#endif
     "float_18",
     "float_19",
     "float_20",
@@ -1033,7 +1024,6 @@ const char *basefn_names[] = {
     "string_0",
     "string_1",
     "string_2",
-    "string_3",
 
     "vector_0",
     "vector_1",
diff --git a/test_conformance/printf/util_printf.c b/test_conformance/printf/util_printf.c
index 5bd9be1a..544072a7 100644
--- a/test_conformance/printf/util_printf.c
+++ b/test_conformance/printf/util_printf.c
@@ -212,8 +212,6 @@ struct printDataGenParameters printFloatGenParameters[] = {
 
     {"%+#21.15E","789456123.0"},
 
-#if ! defined( __ANDROID__ )
-
     //Double argument representing floating-point,in [-]xh.hhhhpAd style
 
     {"%.6a","0.1"},
@@ -222,8 +220,6 @@ struct printDataGenParameters printFloatGenParameters[] = {
 
     {"%10.2a","9990.235"},
 
-#endif
-
     //Infinity (1.0/0.0)
 
     {"%f","1.0f/0.0f"},
@@ -275,14 +271,10 @@ const char* correctBufferFloat[] = {
 
     "+7.894561230000000E+8",
 
-#if ! defined( __ANDROID__ )
-
     "0x1.99999ap-4",
 
     "0x1.38p+13",
 
-#endif
-
     "inf",
 
     "-nan",
@@ -622,10 +614,6 @@ struct printDataGenParameters printStringGenParameters[] = {
 
     {"%s","\"%%\""},
 
-    //null string
-
-    {"%s","(void*)0"}
-
 };
 
 //---------------------------------------------------------
@@ -887,7 +875,7 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
     if(!strcmp(pTestCase->_correctBuffer[testId],"inf"))
     return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf");
     if(!strcmp(pTestCase->_correctBuffer[testId],"nan") || !strcmp(pTestCase->_correctBuffer[testId],"-nan")) {
-      return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)");
+       return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)")&&strcmp(analysisBuffer,"-nan(ind)");
     }
     return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]);
 }
diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py
old mode 100644
new mode 100755
diff --git a/test_conformance/select/CMakeLists.txt b/test_conformance/select/CMakeLists.txt
index d8d90229..6f25c466 100644
--- a/test_conformance/select/CMakeLists.txt
+++ b/test_conformance/select/CMakeLists.txt
@@ -8,6 +8,7 @@ set(${MODULE_NAME}_SOURCES
         ../../test_common/harness/msvc9.c
         ../../test_common/harness/kernelHelpers.c
         ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/parseParameters.cpp
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/select/test_select.c b/test_conformance/select/test_select.c
index 8aa39a2a..a94d7229 100644
--- a/test_conformance/select/test_select.c
+++ b/test_conformance/select/test_select.c
@@ -27,12 +27,10 @@
 #include <limits.h>
 #include "test_select.h"
 
-
 #include "../../test_common/harness/testHarness.h"
-
-
 #include "../../test_common/harness/kernelHelpers.h"
 #include "../../test_common/harness/mt19937.h"
+#include "../../test_common/harness/parseParameters.h"
 
 
 //-----------------------------------------
@@ -57,6 +55,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont
 static int doTest(cl_command_queue queue, cl_context context,
                   Type stype, Type cmptype, cl_device_id device);
 
+
+static void printUsage( void );
+
 //-----------------------------------------
 // Definitions and initializations
 //-----------------------------------------
@@ -71,6 +72,7 @@ static int doTest(cl_command_queue queue, cl_context context,
 // range.  Otherwise, we test a subset of the range
 // [-min_short, min_short]
 static bool  s_wimpy_mode = false;
+static int s_wimpy_reduction_factor = 256;
 
 // Tests are broken into the major test which is based on the
 // src and cmp type and their corresponding vector types and
@@ -344,7 +346,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
 
     cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE;
     size_t block_elements = BUFFER_SIZE / type_size[stype];
-    size_t step = s_wimpy_mode ? 256 : 1;
+    size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1;
     cl_ulong cmp_stride = block_elements * step;
 
     // It is more efficient to create the tests all at once since we
@@ -506,99 +508,152 @@ exit:
     return err;
 }
 
-static void printUsage( void )
+int test_select_uchar_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
-    log_info("test_select:  [-cghw] [test_name|start_test_num] \n");
-    log_info("  default is to run the full test on the default device\n");
-    log_info("  -w run in wimpy mode (smoke test)\n");
-    log_info("  test_name will run only one test of that name\n");
-    log_info("  start_test_num will start running from that num\n");
+    return doTest(queue, context, kuchar, kuchar, deviceID);
+}
+int test_select_uchar_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kuchar, kchar, deviceID);
+}
+int test_select_char_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kchar, kuchar, deviceID);
+}
+int test_select_char_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kchar, kchar, deviceID);
+}
+int test_select_ushort_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kushort, kushort, deviceID);
+}
+int test_select_ushort_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kushort, kshort, deviceID);
+}
+int test_select_short_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kshort, kushort, deviceID);
+}
+int test_select_short_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kshort, kshort, deviceID);
+}
+int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kuint, kuint, deviceID);
+}
+int test_select_uint_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kuint, kint, deviceID);
+}
+int test_select_int_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kint, kuint, deviceID);
+}
+int test_select_int_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kint, kint, deviceID);
+}
+int test_select_float_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kfloat, kuint, deviceID);
+}
+int test_select_float_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kfloat, kint, deviceID);
+}
+int test_select_ulong_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kulong, kulong, deviceID);
+}
+int test_select_ulong_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kulong, klong, deviceID);
+}
+int test_select_long_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, klong, kulong, deviceID);
+}
+int test_select_long_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, klong, klong, deviceID);
+}
+int test_select_double_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kdouble, kulong, deviceID);
+}
+int test_select_double_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return doTest(queue, context, kdouble, klong, deviceID);
 }
 
-static void printArch( void )
+basefn basefn_list[] = {
+    test_select_uchar_uchar,
+    test_select_uchar_char,
+    test_select_char_uchar,
+    test_select_char_char,
+    test_select_ushort_ushort,
+    test_select_ushort_short,
+    test_select_short_ushort,
+    test_select_short_short,
+    test_select_uint_uint,
+    test_select_uint_int,
+    test_select_int_uint,
+    test_select_int_int,
+    test_select_float_uint,
+    test_select_float_int,
+    test_select_ulong_ulong,
+    test_select_ulong_long,
+    test_select_long_ulong,
+    test_select_long_long,
+    test_select_double_ulong,
+    test_select_double_long,
+};
+
+const char *basefn_names[] = {
+    "select_uchar_uchar",
+    "select_uchar_char",
+    "select_char_uchar",
+    "select_char_char",
+    "select_ushort_ushort",
+    "select_ushort_short",
+    "select_short_ushort",
+    "select_short_short",
+    "select_uint_uint",
+    "select_uint_int",
+    "select_int_uint",
+    "select_int_int",
+    "select_float_uint",
+    "select_float_int",
+    "select_ulong_ulong",
+    "select_ulong_long",
+    "select_long_ulong",
+    "select_long_long",
+    "select_double_ulong",
+    "select_double_long",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, char* argv[])
 {
-    log_info( "sizeof( void*) = %d\n", (int) sizeof( void *) );
+    const char ** argList = (const char **)calloc( argc, sizeof( char*) );
 
-#if defined( __APPLE__ )
-
-#if defined( __ppc__ )
-    log_info( "ARCH:\tppc\n" );
-#elif defined( __ppc64__ )
-    log_info( "ARCH:\tppc64\n" );
-#elif defined( __i386__ )
-    log_info( "ARCH:\ti386\n" );
-#elif defined( __x86_64__ )
-    log_info( "ARCH:\tx86_64\n" );
-#elif defined( __arm__ )
-    log_info( "ARCH:\tarm\n" );
-#else
-#error unknown arch
-#endif
-
-    int type = 0;
-    size_t typeSize = sizeof( type );
-    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
-    log_info( "cpu type:\t%d\n", type );
-    typeSize = sizeof( type );
-    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
-    log_info( "cpu subtype:\t%d\n", type );
-
-#endif
-}
-
-
-
-
-//-----------------------------------------
-// main
-//-----------------------------------------
-int main(int argc, char* argv[]) {
-    int i;
-    cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
-    cl_platform_id platform_id;
-    long           test_start_num = 0;   // start test number
-    const char*    exec_testname = NULL;
-    cl_device_id      device_id;
-    uint32_t       device_frequency = 0;
-    uint32_t       compute_devices = 0;
-
-
-    test_start();
-
-    // Maybe we want turn off sleep
-
-    // Check the environmental to see if there is device preference
-    char *device_env = getenv("CL_DEVICE_TYPE");
-    if (device_env != NULL) {
-        if( strcmp( device_env, "gpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_GPU" ) == 0 )
-            device_type = CL_DEVICE_TYPE_GPU;
-        else if( strcmp( device_env, "cpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_CPU" ) == 0 )
-            device_type = CL_DEVICE_TYPE_CPU;
-        else if( strcmp( device_env, "accelerator" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
-            device_type = CL_DEVICE_TYPE_ACCELERATOR;
-        else if( strcmp( device_env, "default" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
-            device_type = CL_DEVICE_TYPE_DEFAULT;
-        else
-        {
-            log_error( "Unknown CL_DEVICE_TYPE environment variable: %s.\nAborting...\n", device_env );
-            abort();
-        }
+    if( NULL == argList )
+    {
+        log_error( "Failed to allocate memory for argList array.\n" );
+        return 1;
     }
 
-    // Check for the wimpy mode environment variable
-    if (getenv("CL_WIMPY_MODE")) {
-      log_info("*** Detected CL_WIMPY_MODE env\n");
-      s_wimpy_mode = 1;
-    }
+    argList[0] = argv[0];
+    size_t argCount = 1;
 
-    // Determine if we want to run a particular test or if we want to
-    // start running from a certain point and if we want to run on cpu/gpu
-    // usage: test_selects [test_name] [start test num] [run_long]
-    // default is to run all tests on the gpu and be short
-    // test names are of the form select_[src/dest type]_[cmp_type]
-    // In the long test, we run the full range for any type >= 32 bits
-    // and 32 bits subset for the 64 bit value.
-    for (i=1; i < argc; ++i) {
+    for( int i = 1; i < argc; ++i )
+    {
         const char *arg = argv[i];
         if (arg == NULL)
             break;
@@ -612,177 +667,55 @@ int main(int argc, char* argv[]) {
                     case 'h':
                         printUsage();
                         return 0;
-                    case 'w':  // Wimpy mode
+                    case 'w':
                         s_wimpy_mode = true;
                         break;
+                    case '[':
+                        parseWimpyReductionFactor(arg, s_wimpy_reduction_factor);
+                        break;
                     default:
-                        log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
-                        printUsage();
-                        return 0;
+                        break;
                 }
                 arg++;
             }
         }
-        else {
-            char* t = NULL;
-            long num = strtol(argv[i], &t, 0);
-            if (t != argv[i])
-                test_start_num = num;
-            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_CPU" ) )
-                device_type = CL_DEVICE_TYPE_CPU;
-            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_GPU" ) )
-                device_type = CL_DEVICE_TYPE_GPU;
-            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_ACCELERATOR" ) )
-                device_type = CL_DEVICE_TYPE_ACCELERATOR;
-            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_DEFAULT" ) )
-                device_type = CL_DEVICE_TYPE_DEFAULT;
-            else if( 0 == strcmp( argv[i], "randomize" ) ) {
-                gRandomSeed = (cl_uint) time( NULL );
-                log_info("\nRandom seed: %u.\n", gRandomSeed );
-            } else {
-                // assume it is a test name that we want to execute
-                exec_testname = argv[i];
-            }
+        else
+        {
+            argList[argCount] = arg;
+            argCount++;
         }
     }
 
-
-    int err;
-
-    // Get platform
-    err = clGetPlatformIDs(1, &platform_id, NULL);
-    checkErr(err,"clGetPlatformIDs failed");
-
-    // Get Device information
-    err = clGetDeviceIDs(platform_id, device_type, 1, &device_id, 0);
-    checkErr(err,"clGetComputeDevices");
-
-    err =  clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, NULL);
-    checkErr(err,"clGetComputeConfigInfo 1");
-
-    size_t config_size = sizeof( device_frequency );
-#if MULTITHREAD
-    if( (err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, config_size, &compute_devices, NULL )) )
-#endif
-        compute_devices = 1;
-
-    config_size = sizeof(device_frequency);
-    if((err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, config_size, &device_frequency, NULL )))
-        device_frequency = 1;
-
-    //detect whether profile of the device is embedded
-    char profile[1024] = "";
-    if( (err = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) ){}
-    else if( strstr(profile, "EMBEDDED_PROFILE" ) )
-    {
-        gIsEmbedded = 1;
+    if (getenv("CL_WIMPY_MODE")) {
+        s_wimpy_mode = true;
     }
 
-
-    log_info( "\nCompute Device info:\n" );
-    log_info( "\tProcessing with %d devices\n", compute_devices );
-    log_info( "\tDevice Frequency: %d MHz\n", device_frequency );
-
-    printDeviceHeader( device_id );
-    printArch();
-
     log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
     if (s_wimpy_mode) {
         log_info("\n");
         log_info("*** WARNING: Testing in Wimpy mode!                     ***\n");
         log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n");
         log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n");
+        log_info("*** Wimpy Reduction Factor: %-27u ***\n\n", s_wimpy_reduction_factor);
     }
 
-    cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL);
-    checkNull(context, "clCreateContext");
+    int err = runTestHarness( argCount, argList, num_fns, basefn_list, basefn_names, false, false, 0 );
 
-    cl_command_queue queue = clCreateCommandQueueWithProperties(context, device_id, 0, NULL);
-    checkNull(queue, "clCreateCommandQueue");
+    free( argList );
 
-
-    if (exec_testname) {
-        // Parse name
-        // Skip the first part of the name
-        bool success = false;
-        if (strncmp(exec_testname, "select_", 7) == 0) {
-            int i;
-            Type src_type = kTypeCount;
-            Type cmp_type = kTypeCount;
-            char* sptr = (char *)strchr(exec_testname, '_');
-            if (sptr) {
-                for (++sptr, i=0; i < kTypeCount; i++) {
-                    if (strncmp(sptr, type_name[i], strlen(type_name[i])) == 0) {
-                        src_type = (Type)i;
-                        break;
-                    }
-                }
-                sptr = strchr(sptr, '_');
-                if (sptr) {
-                    for (++sptr, i=0; i < kTypeCount; i++) {
-                        if (strncmp(sptr, type_name[i], strlen(type_name[i])) == 0) {
-                            cmp_type = (Type)i;
-                            break;
-                        }
-                    }
-                }
-            }
-            if (src_type != kTypeCount && cmp_type != kTypeCount) {
-                success = true;
-                log_info("Testing only select_%s_%s\n",
-                         type_name[src_type], type_name[cmp_type]);
-                if (doTest(queue, context, src_type, cmp_type, device_id) != 0)
-                    log_error("*** select_%s_%s FAILED ***\n\n",
-                              type_name[src_type], type_name[cmp_type]);
-            }
-        }
-        if (!success) {
-            log_error("can not find test:%s", exec_testname);
-            return -1;
-        }
-    }
-    else {
-        int src_type, j;
-        int test_num;
-        test_num = 0;
-        for (src_type = 0; src_type < kTypeCount; ++src_type) {
-            for (j = 0; j < 2; ++j) {
-                Type cmp_type = ctype[src_type][j];
-                if (++test_num < test_start_num) {
-                    log_info("%d) skipping select_%s_%s\n", test_num,
-                             type_name[src_type], type_name[cmp_type]);
-                }
-                else {
-                    log_info("%d) Testing select_%s_%s\n",
-                             test_num, type_name[src_type], type_name[cmp_type]);
-                    if (doTest(queue, context, (Type)src_type, cmp_type, device_id) != 0)
-                        log_error("*** %d) select_%s_%s FAILED ***\n\n", test_num,
-                                  type_name[src_type], type_name[cmp_type]);
-                }
-            }
-        }
-    }
-
-    int error = clFinish(queue);
-    if (error) {
-        log_error("clFinish failed: %d\n", error);
-    }
-
-    clReleaseContext(context);
-    clReleaseCommandQueue(queue);
-
-    if (s_test_fail == 0) {
-        if (s_test_cnt > 1)
-            log_info("PASSED %d of %d tests.\n", s_test_cnt, s_test_cnt);
-        else
-            log_info("PASSED test.\n");
-    } else if (s_test_fail > 0) {
-        if (s_test_cnt > 1)
-            log_error("FAILED %d of %d tests.\n", s_test_fail, s_test_cnt);
-        else
-            log_error("FAILED test.\n");
-    }
-
-    test_finish();
-    return s_test_fail;
+    return err;
+}
+
+static void printUsage( void )
+{
+    log_info("test_select:  [-w] <optional: test_names> \n");
+    log_info("\tdefault is to run the full test on the default device\n");
+    log_info("\t-w run in wimpy mode (smoke test)\n");
+    log_info("\t-[2^n] Set wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", s_wimpy_reduction_factor);
+    log_info("\n");
+    log_info("Test names:\n");
+    for( int i = 0; i < num_fns; i++ )
+    {
+        log_info( "\t%s\n", basefn_names[i] );
+    }
 }
diff --git a/test_conformance/spir/basic.zip b/test_conformance/spir/basic.zip
index d60636fb..14c7a3f4 100644
Binary files a/test_conformance/spir/basic.zip and b/test_conformance/spir/basic.zip differ
diff --git a/test_conformance/spir/half.zip b/test_conformance/spir/half.zip
index f0d3e8ff..0b1deca1 100644
Binary files a/test_conformance/spir/half.zip and b/test_conformance/spir/half.zip differ
diff --git a/test_conformance/spir/images_kernel_read_write.zip b/test_conformance/spir/images_kernel_read_write.zip
index cbb84d89..dc701137 100644
Binary files a/test_conformance/spir/images_kernel_read_write.zip and b/test_conformance/spir/images_kernel_read_write.zip differ
diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp
index 1c1ce6b3..52628e37 100644
--- a/test_conformance/spir/main.cpp
+++ b/test_conformance/spir/main.cpp
@@ -346,9 +346,12 @@ bool test_suite(cl_device_id device, cl_uint size_t_width, const char *folder,
     {
         std::cout << "Failed tests:" << std::endl;
         std::for_each(ErrList.begin(), ErrList.end(), printError);
+        std::cout << std::endl;
+        return false;
     }
+
     std::cout << std::endl;
-    return tests_passed == number_of_tests;
+    return true;
 }
 
 static std::string getTestFolder(const std::string& TS)
@@ -840,7 +843,6 @@ bool test_basic (cl_device_id device, cl_uint size_t_width, const char *folder)
         "test_sizeof.sizeof_uintptr_t",
         "test_sizeof.sizeof_image2d_t",
         "test_sizeof.sizeof_image3d_t",
-        "test_sizeof.sizeof_sampler_t",
         "test_sizeof.sizeof_double",
         "test_sizeof.sizeof_double2",
         "test_sizeof.sizeof_double4",
diff --git a/test_conformance/spir/profiling.zip b/test_conformance/spir/profiling.zip
index 17f1fbec..c792bbec 100644
Binary files a/test_conformance/spir/profiling.zip and b/test_conformance/spir/profiling.zip differ
diff --git a/test_conformance/spir/run_build_test.cpp b/test_conformance/spir/run_build_test.cpp
index 4dd3f0d4..b602303d 100644
--- a/test_conformance/spir/run_build_test.cpp
+++ b/test_conformance/spir/run_build_test.cpp
@@ -378,7 +378,10 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
 
     if (strstr(test_name, "div_cr") || strstr(test_name, "sqrt_cr")) {
       if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
+      {
+        std::cout << "Skipped. CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT is not supported." << std::endl;
         return true;
+      }
       else {
         bcoptions += " -cl-fp32-correctly-rounded-divide-sqrt";
         cloptions += " -cl-fp32-correctly-rounded-divide-sqrt";
diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp
index 5a289126..9a0bf037 100644
--- a/test_conformance/subgroups/main.cpp
+++ b/test_conformance/subgroups/main.cpp
@@ -40,15 +40,15 @@ ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_lis
 
 static const int num_fns = sizeof(basefn_names) / sizeof(char *);
 
-static int
+static test_status
 checkSubGroupsExtension(cl_device_id device)
 {
     if (!is_extension_available(device, "cl_khr_subgroups")) {
         log_info("Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
-        return CL_INVALID_DEVICE;
+        return TEST_SKIP;
     }
 
-    return CL_SUCCESS;
+    return TEST_PASS;
 }
 
 int
diff --git a/test_extensions/media_sharing/test_create_context.cpp b/test_extensions/media_sharing/test_create_context.cpp
index 42deaa04..5637bc54 100644
--- a/test_extensions/media_sharing/test_create_context.cpp
+++ b/test_extensions/media_sharing/test_create_context.cpp
@@ -40,6 +40,13 @@ int context_create(cl_device_id deviceID, cl_context context, cl_command_queue q
 
   while (deviceWrapper->AdapterNext())
   {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
     if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
     {
       std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
@@ -68,7 +75,6 @@ int context_create(cl_device_id deviceID, cl_context context, cl_command_queue q
       0,
     };
 
-    cl_int error;
     clContextWrapper ctx;
     switch(functionCreate)
     {
@@ -230,13 +236,20 @@ int context_create(cl_device_id deviceID, cl_context context, cl_command_queue q
     }
   }
 
-  if (!deviceWrapper->Status())
+  if (deviceWrapper->Status() != DEVICE_PASS)
   {
     std::string adapterName;
     AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
     log_error("%s init failed\n", adapterName.c_str());
     result.ResultSub(CResult::TEST_FAIL);
-    return result.Result();
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
   }
 
   return result.Result();
diff --git a/test_extensions/media_sharing/test_functions_api.cpp b/test_extensions/media_sharing/test_functions_api.cpp
index 21fe403d..cdc6ce86 100644
--- a/test_extensions/media_sharing/test_functions_api.cpp
+++ b/test_extensions/media_sharing/test_functions_api.cpp
@@ -51,6 +51,13 @@ int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue qu
   //iterates through all devices
   while (deviceWrapper->AdapterNext())
   {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
     if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
     {
       std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
@@ -81,7 +88,6 @@ int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue qu
       0,
     };
 
-    cl_int error;
     clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
     if (error != CL_SUCCESS)
     {
@@ -506,13 +512,20 @@ int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue qu
     }
   }
 
-  if (!deviceWrapper->Status())
+  if (deviceWrapper->Status() != DEVICE_PASS)
   {
     std::string adapterName;
     AdapterToString(adapterType, adapterName);
-    log_error("%s init failed\n", adapterName.c_str());
-    result.ResultSub(CResult::TEST_FAIL);
-    return result.Result();
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
+      log_error("%s init failed\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
   }
 
   return result.Result();
diff --git a/test_extensions/media_sharing/test_functions_kernel.cpp b/test_extensions/media_sharing/test_functions_kernel.cpp
index eff297dc..51ac0b70 100644
--- a/test_extensions/media_sharing/test_functions_kernel.cpp
+++ b/test_extensions/media_sharing/test_functions_kernel.cpp
@@ -71,6 +71,13 @@ int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue
 
   while (deviceWrapper->AdapterNext())
   {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
     if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
     {
       std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
@@ -109,7 +116,6 @@ int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue
       0,
     };
 
-    cl_int error;
     clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
     if (error != CL_SUCCESS)
     {
@@ -335,13 +341,20 @@ int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue
     }
   }
 
-  if (!deviceWrapper->Status())
+  if (deviceWrapper->Status() != DEVICE_PASS)
   {
-    std::string adapter;
-    AdapterToString(adapterType, adapter);
-    log_error("%s init failed\n", adapter.c_str());
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
     result.ResultSub(CResult::TEST_FAIL);
-    return result.Result();
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
   }
 
   return result.Result();
diff --git a/test_extensions/media_sharing/test_get_device_ids.cpp b/test_extensions/media_sharing/test_get_device_ids.cpp
index 68fdbf74..f8947ea6 100644
--- a/test_extensions/media_sharing/test_get_device_ids.cpp
+++ b/test_extensions/media_sharing/test_get_device_ids.cpp
@@ -53,6 +53,12 @@ int get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue q
     std::vector<void *> mediaDevices;
     mediaDevices.push_back(deviceWrapper->Device());
 
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result)))
+    {
+      return result.Result();
+    }
+
     cl_uint devicesAllNum = 0;
     error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
       CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
@@ -140,13 +146,20 @@ int get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue q
     }
   }
 
-  if (!deviceWrapper->Status())
+  if (deviceWrapper->Status() != DEVICE_PASS)
   {
-    std::string adapter;
-    AdapterToString(adapterType, adapter);
-    log_error("%s init failed\n", adapter.c_str());
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
     result.ResultSub(CResult::TEST_FAIL);
-    return result.Result();
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
   }
 
   return result.Result();
diff --git a/test_extensions/media_sharing/test_interop_sync.cpp b/test_extensions/media_sharing/test_interop_sync.cpp
index d15e4fbf..6831a14d 100644
--- a/test_extensions/media_sharing/test_interop_sync.cpp
+++ b/test_extensions/media_sharing/test_interop_sync.cpp
@@ -40,6 +40,13 @@ int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queu
 
   while (deviceWrapper->AdapterNext())
   {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
     if (surfaceFormat != SURFACE_FORMAT_NV12 &&
       !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
     {
@@ -71,7 +78,7 @@ int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queu
       0,
     };
 
-    cl_int error;
+
     clContextWrapper ctx;
     switch(functionCreate)
     {
@@ -258,13 +265,21 @@ int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queu
     }
   }
 
-  if (!deviceWrapper->Status())
+  if (deviceWrapper->Status() != DEVICE_PASS)
   {
     std::string adapterName;
     AdapterToString(adapterType, adapterName);
+
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
     log_error("%s init failed\n", adapterName.c_str());
     result.ResultSub(CResult::TEST_FAIL);
-    return result.Result();
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
   }
 
   return result.Result();
diff --git a/test_extensions/media_sharing/test_memory_access.cpp b/test_extensions/media_sharing/test_memory_access.cpp
index 10fe7543..5aabaf6f 100644
--- a/test_extensions/media_sharing/test_memory_access.cpp
+++ b/test_extensions/media_sharing/test_memory_access.cpp
@@ -45,6 +45,13 @@ int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue qu
   //iterates through all devices
   while (deviceWrapper->AdapterNext())
   {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
     if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
     {
       std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
@@ -81,7 +88,6 @@ int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue qu
       0,
     };
 
-    cl_int error;
     clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
     if (error != CL_SUCCESS)
     {
@@ -357,13 +363,20 @@ int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue qu
     }
   }
 
-  if (!deviceWrapper->Status())
+  if (deviceWrapper->Status() != DEVICE_PASS)
   {
     std::string adapterName;
     AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+    {
     log_error("%s init failed\n", adapterName.c_str());
     result.ResultSub(CResult::TEST_FAIL);
-    return result.Result();
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
   }
 
   return result.Result();
diff --git a/test_extensions/media_sharing/test_other_data_types.cpp b/test_extensions/media_sharing/test_other_data_types.cpp
index 0e0bce35..a2aa7cbd 100644
--- a/test_extensions/media_sharing/test_other_data_types.cpp
+++ b/test_extensions/media_sharing/test_other_data_types.cpp
@@ -65,13 +65,19 @@ int other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue
 
   while (deviceWrapper->AdapterNext())
   {
+    cl_int error;
+    //check if the test can be run on the adapter
+    if (CL_SUCCESS != (error = deviceExistForCLTest(gPlatformIDdetected, adapterType, deviceWrapper->Device(), result, sharedHandle)))
+    {
+      return result.Result();
+    }
+
     cl_context_properties contextProperties[] = {
       CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
       AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
       0,
     };
 
-    cl_int error;
     clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
     if (error != CL_SUCCESS)
     {
@@ -419,13 +425,20 @@ int other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue
     }
   }
 
-  if (!deviceWrapper->Status())
+  if (deviceWrapper->Status() != DEVICE_PASS)
   {
-    std::string adapter;
-    AdapterToString(adapterType, adapter);
-    log_error("%s init failed\n", adapter.c_str());
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    if (deviceWrapper->Status() == DEVICE_FAIL)
+  {
+      log_error("%s init failed\n", adapterName.c_str());
     result.ResultSub(CResult::TEST_FAIL);
-    return result.Result();
+    }
+    else
+    {
+      log_error("%s init incomplete due to unsupported device\n", adapterName.c_str());
+      result.ResultSub(CResult::TEST_NOTSUPPORTED);
+    }
   }
 
   return result.Result();
diff --git a/test_extensions/media_sharing/utils.cpp b/test_extensions/media_sharing/utils.cpp
index b32e9556..41899b76 100644
--- a/test_extensions/media_sharing/utils.cpp
+++ b/test_extensions/media_sharing/utils.cpp
@@ -1935,3 +1935,36 @@ float convert_half_to_float( unsigned short halfValue )
   outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
   return outFloat.floatValue;
 }
+
+cl_int deviceExistForCLTest(cl_platform_id platform,
+     cl_dx9_media_adapter_type_khr media_adapters_type,
+     void *media_adapters,
+     CResult &result,
+     TSharedHandleType sharedHandle /*default SHARED_HANDLE_ENABLED*/
+     )
+{
+    cl_int _error;
+    cl_uint devicesAllNum = 0;
+    std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+    std::string adapterStr;
+    AdapterToString(media_adapters_type, adapterStr);
+
+    _error = clGetDeviceIDsFromDX9MediaAdapterKHR(platform, 1,
+        &media_adapters_type, &media_adapters, CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
+
+    if (_error != CL_SUCCESS)
+    {
+        if(_error != CL_DEVICE_NOT_FOUND)
+        {
+           log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(_error));
+           result.ResultSub(CResult::TEST_ERROR);
+        }
+        else
+        {
+          log_info("Skipping test case, device type is not supported by a device (adapter type: %s, shared handle: %s)\n", adapterStr.c_str(), sharedHandleStr.c_str());
+          result.ResultSub(CResult::TEST_NOTSUPPORTED);
+        }
+    }
+
+    return _error;
+}
diff --git a/test_extensions/media_sharing/utils.h b/test_extensions/media_sharing/utils.h
index 4731b268..295a2d25 100644
--- a/test_extensions/media_sharing/utils.h
+++ b/test_extensions/media_sharing/utils.h
@@ -15,17 +15,19 @@
 //
 #ifndef __UTILS_KHR_MEDIA_H
 #define __UTILS_KHR_MEDIA_H
-
+#include "../../test_common/harness/compat.h"
+#include <iostream>
 #include "wrappers.h"
 #include "CL/cl_dx9_media_sharing.h"
-
 #include "../../test_common/harness/typeWrappers.h"
-
-#include <iostream>
 #include <string>
 #include <memory>
 #include <vector>
 
+
+
+
+
 extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR;
 extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR;
 extern clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR;
@@ -168,4 +170,5 @@ cl_ushort convert_float_to_half( float f );
 float convert_half_to_float( unsigned short halfValue );
 int DetectFloatToHalfRoundingMode( cl_command_queue );
 
+cl_int deviceExistForCLTest(cl_platform_id platform,cl_dx9_media_adapter_type_khr media_adapters_type,void *media_adapters,CResult &result,TSharedHandleType sharedHandle=SHARED_HANDLE_DISABLED);
 #endif  // __UTILS_KHR_MEDIA_H
diff --git a/test_extensions/media_sharing/wrappers.cpp b/test_extensions/media_sharing/wrappers.cpp
index 27907ca3..af13633b 100644
--- a/test_extensions/media_sharing/wrappers.cpp
+++ b/test_extensions/media_sharing/wrappers.cpp
@@ -141,7 +141,7 @@ CDeviceWrapper::~CDeviceWrapper()
 
 #if defined(_WIN32)
 CD3D9Wrapper::CD3D9Wrapper():
-_d3d9(NULL), _d3dDevice(NULL), _status(true), _adapterIdx(0), _adapterFound(false)
+_d3d9(NULL), _d3dDevice(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false)
 {
   WindowInit();
 
@@ -149,7 +149,7 @@ _d3d9(NULL), _d3dDevice(NULL), _status(true), _adapterIdx(0), _adapterFound(fals
   if (!_d3d9)
   {
     log_error("Direct3DCreate9 failed\n");
-    _status = false;
+    _status = DEVICE_FAIL;
   }
 }
 
@@ -169,16 +169,16 @@ void CD3D9Wrapper::Destroy()
   _d3dDevice = 0;
 }
 
-bool CD3D9Wrapper::Init()
+cl_int CD3D9Wrapper::Init()
 {
   if (!WindowHandle())
   {
     log_error("D3D9: Window is not created\n");
-    _status = false;
-    return false;
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
   }
 
-  if(!_d3d9 || !_status || !_adapterFound)
+  if(!_d3d9 || DEVICE_PASS  != _status || !_adapterFound)
     return false;
 
   _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm);
@@ -201,8 +201,8 @@ bool CD3D9Wrapper::Init()
     processingType, &d3dParams, &_d3dDevice) ) )
   {
     log_error("CreateDevice failed\n");
-    _status = false;
-    return false;
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
   }
 
   _d3dDevice->BeginScene();
@@ -232,14 +232,14 @@ D3DADAPTER_IDENTIFIER9 CD3D9Wrapper::Adapter()
   return _adapter;
 }
 
-bool CD3D9Wrapper::Status() const
+TDeviceStatus CD3D9Wrapper::Status() const
 {
   return _status;
 }
 
 bool CD3D9Wrapper::AdapterNext()
 {
-  if (!_status)
+  if (DEVICE_PASS != _status)
     return false;
 
   _adapterFound = false;
@@ -253,7 +253,7 @@ bool CD3D9Wrapper::AdapterNext()
     if(FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
     {
       log_error("D3D9: GetAdapterIdentifier failed\n");
-      _status = false;
+      _status = DEVICE_FAIL;
       return false;
     }
 
@@ -262,7 +262,7 @@ bool CD3D9Wrapper::AdapterNext()
     Destroy();
     if(!Init())
     {
-      _status = false;
+      _status = DEVICE_FAIL;
       _adapterFound = false;
     }
     break;
@@ -278,7 +278,7 @@ unsigned int CD3D9Wrapper::AdapterIdx() const
 
 
 CD3D9ExWrapper::CD3D9ExWrapper():
-_d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(true), _adapterIdx(0), _adapterFound(false)
+_d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(DEVICE_PASS), _adapterIdx(0), _adapterFound(false)
 {
   WindowInit();
 
@@ -286,7 +286,7 @@ _d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(true), _adapterIdx(0), _adapterFound(
   if (FAILED(result) || !_d3d9Ex)
   {
     log_error("Direct3DCreate9Ex failed\n");
-    _status = false;
+    _status = DEVICE_FAIL;
   }
 }
 
@@ -319,17 +319,17 @@ D3DADAPTER_IDENTIFIER9 CD3D9ExWrapper::Adapter()
   return _adapter;
 }
 
-bool CD3D9ExWrapper::Init()
+cl_int CD3D9ExWrapper::Init()
 {
   if (!WindowHandle())
   {
     log_error("D3D9EX: Window is not created\n");
-    _status = false;
-    return false;
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
   }
 
-  if(!_d3d9Ex || !_status || !_adapterFound)
-    return false;
+  if(!_d3d9Ex || DEVICE_FAIL == _status || !_adapterFound)
+    return DEVICE_FAIL;
 
   RECT rect;
   GetClientRect(WindowHandle(),&rect);
@@ -353,15 +353,15 @@ bool CD3D9ExWrapper::Init()
     processingType, &d3dParams, NULL, &_d3dDeviceEx) ) )
   {
     log_error("CreateDeviceEx failed\n");
-    _status = false;
-    return false;
+    _status = DEVICE_FAIL;
+    return DEVICE_FAIL;
   }
 
   _d3dDeviceEx->BeginScene();
   _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
   _d3dDeviceEx->EndScene();
 
-  return true;
+  return DEVICE_PASS;
 }
 
 void CD3D9ExWrapper::Destroy()
@@ -371,14 +371,14 @@ void CD3D9ExWrapper::Destroy()
   _d3dDeviceEx = 0;
 }
 
-bool CD3D9ExWrapper::Status() const
+TDeviceStatus CD3D9ExWrapper::Status() const
 {
   return _status;
 }
 
 bool CD3D9ExWrapper::AdapterNext()
 {
-  if (!_status)
+  if (DEVICE_FAIL == _status)
     return false;
 
   _adapterFound = false;
@@ -392,7 +392,7 @@ bool CD3D9ExWrapper::AdapterNext()
     if(FAILED(_d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
     {
       log_error("D3D9EX: GetAdapterIdentifier failed\n");
-      _status = false;
+      _status = DEVICE_FAIL;
       return false;
     }
 
@@ -400,8 +400,8 @@ bool CD3D9ExWrapper::AdapterNext()
     Destroy();
     if(!Init())
     {
-      _status = false;
-      _adapterFound = false;
+      _status = DEVICE_FAIL;
+      _adapterFound = _status;
     }
 
     break;
@@ -416,7 +416,7 @@ unsigned int CD3D9ExWrapper::AdapterIdx() const
 }
 
 CDXVAWrapper::CDXVAWrapper():
-_dxvaDevice(NULL), _status(true), _adapterFound(false)
+_dxvaDevice(NULL), _status(DEVICE_PASS), _adapterFound(false)
 {
   _status = _d3d9.Status();
 }
@@ -431,19 +431,24 @@ void * CDXVAWrapper::Device() const
   return _dxvaDevice;
 }
 
-bool CDXVAWrapper::Status() const
+TDeviceStatus CDXVAWrapper::Status() const
 {
-  return _status && _d3d9.Status();
+    if(_status == DEVICE_FAIL || _d3d9.Status() == DEVICE_FAIL)
+        return DEVICE_FAIL;
+    else if(_status == DEVICE_NOTSUPPORTED || _d3d9.Status() == DEVICE_NOTSUPPORTED)
+        return DEVICE_NOTSUPPORTED;
+    else
+        return DEVICE_PASS;
 }
 
 bool CDXVAWrapper::AdapterNext()
 {
-  if (!_status)
+  if (DEVICE_PASS != _status)
     return false;
 
   _adapterFound = _d3d9.AdapterNext();
   _status = _d3d9.Status();
-  if (!_status)
+  if (DEVICE_PASS != _status)
   {
     _adapterFound = false;
     return false;
@@ -454,7 +459,7 @@ bool CDXVAWrapper::AdapterNext()
 
   DXVAHDDestroy();
   _status = DXVAHDInit();
-  if (!_status)
+  if (DEVICE_PASS != _status)
   {
     _adapterFound = false;
     return false;
@@ -463,10 +468,10 @@ bool CDXVAWrapper::AdapterNext()
   return true;
 }
 
-bool CDXVAWrapper::DXVAHDInit()
+TDeviceStatus CDXVAWrapper::DXVAHDInit()
 {
-  if (!_status || !_d3d9.Status() || !_adapterFound)
-    return false;
+  if ((_status == DEVICE_FAIL) || (_d3d9.Status() == DEVICE_FAIL) || !_adapterFound)
+    return DEVICE_FAIL;
 
   DXVAHD_RATIONAL fps = { VIDEO_FPS, 1 };
 
@@ -480,20 +485,27 @@ bool CDXVAWrapper::DXVAHDInit()
   desc.OutputHeight = WindowHeight();
 
 #ifdef USE_SOFTWARE_PLUGIN
-  _status = false;
-  return false;
+  _status = DEVICE_FAIL;
+  return DEVICE_FAIL;
 #endif
 
   HRESULT hr = DXVAHD_CreateDevice(static_cast<IDirect3DDevice9Ex *>(_d3d9.Device()),
     &desc, DXVAHD_DEVICE_USAGE_PLAYBACK_NORMAL, NULL, &_dxvaDevice);
   if(FAILED(hr))
   {
+    if (hr == E_NOINTERFACE)
+    {
+      log_error("DXVAHD_CreateDevice skipped due to no supported devices!\n");
+      _status = DEVICE_NOTSUPPORTED;
+    }
+    else
+    {
     log_error("DXVAHD_CreateDevice failed\n");
-    _status = false;
-    return false;
+    _status = DEVICE_FAIL;
+    }
   }
 
-  return true;
+  return _status;
 }
 
 void CDXVAWrapper::DXVAHDDestroy()
diff --git a/test_extensions/media_sharing/wrappers.h b/test_extensions/media_sharing/wrappers.h
index bafc8a0a..45b70326 100644
--- a/test_extensions/media_sharing/wrappers.h
+++ b/test_extensions/media_sharing/wrappers.h
@@ -18,10 +18,37 @@
 
 #if defined(_WIN32)
 #include <d3d9.h>
+#if defined (__MINGW32__)
+#include <rpcsal.h>
+typedef unsigned char UINT8;
+#define __out
+#define __in
+#define __inout
+#define __out_bcount(size)
+#define __out_bcount_opt(size)
+#define __in_opt
+#define __in_ecount(size)
+#define __in_ecount_opt(size)
+#define __out_opt
+#define __out_ecount(size)
+#define __out_ecount_opt(size)
+#define __in_bcount_opt(size)
+#define __inout_opt
+#define __inout_bcount(size)
+#define __in_bcount(size)
+#define __deref_out
+#endif
 #include <dxvahd.h>
 #include <tchar.h>
 #endif
 
+enum TDeviceStatus
+{
+  DEVICE_NOTSUPPORTED,
+  DEVICE_PASS,
+  DEVICE_FAIL,
+};
+
 class CDeviceWrapper {
 public:
   enum TAccelerationType
@@ -36,7 +63,7 @@ public:
   virtual bool AdapterNext() = 0;
   virtual unsigned int AdapterIdx() const = 0;
   virtual void *Device() const = 0;
-  virtual bool Status() const = 0;
+  virtual TDeviceStatus Status() const = 0;
   virtual void *D3D() const = 0;
 
 #if defined(_WIN32)
@@ -81,7 +108,7 @@ public:
   virtual bool AdapterNext();
   virtual unsigned int AdapterIdx() const;
   virtual void *Device() const;
-  virtual bool Status() const;
+  virtual TDeviceStatus Status() const;
   virtual void *D3D() const;
 
 private:
@@ -89,13 +116,13 @@ private:
   LPDIRECT3DDEVICE9 _d3dDevice;
   D3DDISPLAYMODE _d3ddm;
   D3DADAPTER_IDENTIFIER9 _adapter;
-  bool _status;
+  TDeviceStatus _status;
   unsigned int _adapterIdx;
   bool _adapterFound;
 
   D3DFORMAT Format();
   D3DADAPTER_IDENTIFIER9 Adapter();
-  bool Init();
+  int Init();
   void Destroy();
 };
 
@@ -107,7 +134,7 @@ public:
   virtual bool AdapterNext();
   virtual unsigned int AdapterIdx() const;
   virtual void *Device() const;
-  virtual bool Status() const;
+  virtual TDeviceStatus Status() const;
   virtual void *D3D() const;
 
 private:
@@ -115,13 +142,13 @@ private:
   LPDIRECT3DDEVICE9EX _d3dDeviceEx;
   D3DDISPLAYMODEEX _d3ddmEx;
   D3DADAPTER_IDENTIFIER9 _adapter;
-  bool _status;
+  TDeviceStatus _status;
   unsigned int _adapterIdx;
   bool _adapterFound;
 
   D3DFORMAT Format();
   D3DADAPTER_IDENTIFIER9 Adapter();
-  bool Init();
+  int Init();
   void Destroy();
 };
 
@@ -133,21 +160,21 @@ public:
   virtual bool AdapterNext();
   virtual unsigned int AdapterIdx() const;
   virtual void *Device() const;
-  virtual bool Status() const;
+  virtual TDeviceStatus Status() const;
   virtual void *D3D() const;
   const CD3D9ExWrapper &D3D9() const;
 
 private:
   CD3D9ExWrapper _d3d9;
   IDXVAHD_Device *_dxvaDevice;
-  bool _status;
+  TDeviceStatus _status;
   bool _adapterFound;
 
   static const D3DFORMAT RENDER_TARGET_FORMAT;
   static const D3DFORMAT VIDEO_FORMAT;
   static const unsigned int VIDEO_FPS;
 
-  bool DXVAHDInit();
+  TDeviceStatus DXVAHDInit();
   void DXVAHDDestroy();
 };