diff --git a/CMakeLists.txt b/CMakeLists.txt
index 40deed8c..898f62e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.5.1)
+cmake_minimum_required(VERSION 3.12.0)
 
 set( CONFORMANCE_SUFFIX "" )
 set(CLConform_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
@@ -107,6 +107,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
     add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
     add_cxx_flag_if_supported(-Wno-unknown-pragmas) # Issue #785
     add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784
+    add_cxx_flag_if_supported(-Wno-strict-aliasing)  # Issue 2234
 
     # -msse -mfpmath=sse to force gcc to use sse for float math,
     # avoiding excess precision problems that cause tests like int2float
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index b354baeb..3de7e948 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -2415,6 +2415,12 @@ int debug_find_vector_in_image(void *imagePtr, image_descriptor *imageInfo,
                 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
             depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
             break;
+        default:
+            log_error("ERROR: Invalid imageInfo->type = %d\n", imageInfo->type);
+            width = 0;
+            depth = 0;
+            height = 0;
+            break;
     }
 
     row_pitch = width * get_pixel_size(imageInfo->format);
@@ -3661,6 +3667,11 @@ void copy_image_data(image_descriptor *srcImageInfo,
                     ? (srcImageInfo->height >> src_lod)
                     : 1;
                 break;
+            default:
+                log_error("ERROR: Invalid srcImageInfo->type = %d\n",
+                          srcImageInfo->type);
+                src_lod = 0;
+                break;
         }
         src_mip_level_offset = compute_mip_level_offset(srcImageInfo, src_lod);
         src_row_pitch_lod =
@@ -3707,6 +3718,11 @@ void copy_image_data(image_descriptor *srcImageInfo,
                     ? (dstImageInfo->height >> dst_lod)
                     : 1;
                 break;
+            default:
+                log_error("ERROR: Invalid dstImageInfo->num_mip_levels = %d\n",
+                          dstImageInfo->num_mip_levels);
+                dst_lod = 0;
+                break;
         }
         dst_mip_level_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
         dst_row_pitch_lod =
diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index c64c5901..b7087511 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -577,7 +577,7 @@ char* get_temp_filename()
     close(fd);
 #elif defined(_WIN32)
     UINT ret = GetTempFileName(".", "tmp", 0, gFileName);
-    if (ret == 0) return gFileName;
+    if (ret == 0) return strdup(gFileName);
 #else
     MTdata d = init_genrand((cl_uint)time(NULL));
     sprintf(gFileName, "tmpfile.%u", genrand_int32(d));
diff --git a/test_common/harness/typeWrappers.cpp b/test_common/harness/typeWrappers.cpp
index e6520b1c..ed7d3ce3 100644
--- a/test_common/harness/typeWrappers.cpp
+++ b/test_common/harness/typeWrappers.cpp
@@ -348,7 +348,7 @@ cl_int clProtectedImage::Create(cl_context context,
                                 const cl_image_format *fmt, size_t width,
                                 size_t height, size_t depth, size_t arraySize)
 {
-    cl_int error;
+    cl_int error = 0;
 #if defined(__APPLE__)
     int protect_pages = 1;
     cl_device_id devices[16];
diff --git a/test_conformance/SVM/test_enqueue_api.cpp b/test_conformance/SVM/test_enqueue_api.cpp
index 20fa4432..83e0b9af 100644
--- a/test_conformance/SVM/test_enqueue_api.cpp
+++ b/test_conformance/SVM/test_enqueue_api.cpp
@@ -160,8 +160,8 @@ REGISTER_TEST(svm_enqueue_api)
             error = clSetUserEventStatus(userEvent, CL_COMPLETE);
             test_error(error, "clSetUserEventStatus failed");
 
-            cl_uchar *src_ptr;
-            cl_uchar *dst_ptr;
+            cl_uchar *src_ptr = nullptr;
+            cl_uchar *dst_ptr = nullptr;
             if (test_case.srcAlloc == host)
             {
                 src_ptr = srcHostData.data();
diff --git a/test_conformance/allocations/allocation_execute.cpp b/test_conformance/allocations/allocation_execute.cpp
index f01dfd8f..3af7f85e 100644
--- a/test_conformance/allocations/allocation_execute.cpp
+++ b/test_conformance/allocations/allocation_execute.cpp
@@ -26,13 +26,18 @@ const char *buffer_kernel_pattern = {
     "\tint tid = get_global_id(0);\n"
     "\tuint r = 0;\n"
     "\t%s i;\n"
-    "\tfor(i=(%s)tid*(%s)per_item; i<(%s)(1+tid)*(%s)per_item; i++) {\n"
     "%s"
-    "\t}\n"
     "\tresult[tid] = r;\n"
     "}\n"
 };
 
+const char *accumulate_pattern = {
+    "\t%s end%d = min((%s)(1+tid)*(%s)per_item, array_sizes[%d]);\n"
+    "\tfor(i=(%s)tid*(%s)per_item; i<end%d; i++) {\n"
+    "\t\tr += buffer%d[i];\n"
+    "\t}\n"
+};
+
 const char *image_kernel_pattern = {
     "__kernel void sample_test(%s __global uint *result)\n"
     "{\n"
@@ -159,7 +164,7 @@ int check_image(cl_command_queue queue, cl_mem mem)
 int execute_kernel(cl_context context, cl_command_queue *queue,
                    cl_device_id device_id, int test, cl_mem mems[],
                    int number_of_mems_used, int verify_checksum,
-                   unsigned int number_of_work_itmes)
+                   unsigned int number_of_work_items)
 {
 
     char *argument_string;
@@ -174,24 +179,38 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     cl_uint per_item;
     cl_uint per_item_uint;
     cl_uint final_result;
-    std::vector<cl_uint> returned_results(number_of_work_itmes);
+    std::vector<cl_uint> returned_results(number_of_work_items);
     clEventWrapper event;
     cl_int event_status;
 
     // Allocate memory for the kernel source
+    char *used_pattern = nullptr;
+    if (test == BUFFER || test == BUFFER_NON_BLOCKING)
+    {
+        used_pattern = (char *)accumulate_pattern;
+    }
+    else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING)
+    {
+        used_pattern = (char *)read_pattern;
+    }
+    else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING)
+    {
+        used_pattern = (char *)write_pattern;
+    }
     argument_string =
         (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE * 64);
     access_string = (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE
-                                   * (strlen(read_pattern) + 10));
+                                   * (strlen(used_pattern) + 10));
     kernel_string = (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE
-                                       * (strlen(read_pattern) + 10 + 64)
+                                       * (strlen(used_pattern) + 10 + 64)
                                    + 1024);
+
     argument_string[0] = '\0';
     access_string[0] = '\0';
     kernel_string[0] = '\0';
 
     // Zero the results.
-    for (i = 0; i < number_of_work_itmes; i++) returned_results[i] = 0;
+    for (i = 0; i < number_of_work_items; i++) returned_results[i] = 0;
 
     // detect if device supports ulong/int64
     // detect whether profile of the device is embedded
@@ -209,13 +228,6 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     // Build the kernel source
     if (test == BUFFER || test == BUFFER_NON_BLOCKING)
     {
-        for (i = 0; i < number_of_mems_used; i++)
-        {
-            sprintf(argument_string + strlen(argument_string),
-                    " __global uint *buffer%d, ", i);
-            sprintf(access_string + strlen(access_string),
-                    "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
-        }
         char type[10];
         if (support64)
         {
@@ -225,8 +237,15 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
         {
             sprintf(type, "uint");
         }
+        for (i = 0; i < number_of_mems_used; i++)
+        {
+            sprintf(argument_string + strlen(argument_string),
+                    " __global uint *buffer%d, ", i);
+            sprintf(access_string + strlen(access_string), accumulate_pattern,
+                    type, i, type, type, i, type, type, i, i);
+        }
         sprintf(kernel_string, buffer_kernel_pattern, argument_string, type,
-                type, type, type, type, type, access_string);
+                type, access_string);
     }
     else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING)
     {
@@ -282,14 +301,14 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     // Set the result
     result_mem =
         clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-                       sizeof(cl_uint) * number_of_work_itmes,
+                       sizeof(cl_uint) * number_of_work_items,
                        returned_results.data(), &error);
     test_error(error, "clCreateBuffer failed");
     error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
     test_error(error, "clSetKernelArg failed");
 
     // Thread dimensions for execution
-    global_dims[0] = number_of_work_itmes;
+    global_dims[0] = number_of_work_items;
     global_dims[1] = 1;
     global_dims[2] = 1;
 
@@ -427,7 +446,7 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     // Verify the checksum.
     // Read back the result
     error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0,
-                                sizeof(cl_uint) * number_of_work_itmes,
+                                sizeof(cl_uint) * number_of_work_items,
                                 returned_results.data(), 0, NULL, NULL);
     test_error_abort(error, "clEnqueueReadBuffer failed");
     final_result = 0;
@@ -436,7 +455,7 @@ int execute_kernel(cl_context context, cl_command_queue *queue,
     {
         // For buffers or read images we are just looking at the sum of what
         // each thread summed up
-        for (i = 0; i < number_of_work_itmes; i++)
+        for (i = 0; i < number_of_work_items; i++)
         {
             final_result += returned_results[i];
         }
diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp
index 50533d62..e0310bb8 100644
--- a/test_conformance/allocations/main.cpp
+++ b/test_conformance/allocations/main.cpp
@@ -125,7 +125,7 @@ int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
     int number_of_mems_used;
     cl_ulong max_individual_allocation_size = g_max_individual_allocation_size;
     cl_ulong global_mem_size = g_global_mem_size;
-    unsigned int number_of_work_itmes = 8192 * 32;
+    unsigned int number_of_work_items = 8192 * 32;
     const bool allocate_image =
         (alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING);
 
@@ -183,7 +183,7 @@ int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
                  g_reduction_percentage);
         g_max_size = (size_t)((double)g_max_size
                               * (double)g_reduction_percentage / 100.0);
-        number_of_work_itmes = 8192 * 2;
+        number_of_work_items = 8192 * 2;
     }
 
     // Round to nearest MB.
@@ -220,7 +220,7 @@ int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
                 error =
                     execute_kernel(context, &queue, device, alloc_type, mems,
                                    number_of_mems_used, g_write_allocations,
-                                   number_of_work_itmes);
+                                   number_of_work_items);
             }
 
             // If we failed to allocate more than 1/8th of the requested amount
diff --git a/test_conformance/api/test_wg_suggested_local_work_size.cpp b/test_conformance/api/test_wg_suggested_local_work_size.cpp
index f8464638..9b4a0b5e 100644
--- a/test_conformance/api/test_wg_suggested_local_work_size.cpp
+++ b/test_conformance/api/test_wg_suggested_local_work_size.cpp
@@ -208,7 +208,7 @@ int do_test_work_group_suggested_local_size(
     bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr,
     cl_ulong max_local_mem_size, size_t global_work_offset[], num_dims dim)
 {
-    int err;
+    int err = 0;
     size_t test_values[] = { 1, 1, 1 };
     std::string kernel_names[6] = {
         "test_wg_scan_local_work_group_size",
diff --git a/test_conformance/basic/test_imagereadwrite.cpp b/test_conformance/basic/test_imagereadwrite.cpp
index c074238d..fee73ea3 100644
--- a/test_conformance/basic/test_imagereadwrite.cpp
+++ b/test_conformance/basic/test_imagereadwrite.cpp
@@ -314,6 +314,12 @@ test_imagereadwrite(cl_device_id device, cl_context context, cl_command_queue qu
                 }
                 outp = (void *)rgbafp_outptr;
                 break;
+            default:
+                log_error("ERROR Invalid j = %d\n", j);
+                elem_size = 0;
+                p = nullptr;
+                outp = nullptr;
+                break;
         }
 
         const char* update_packed_pitch_name = "";
diff --git a/test_conformance/basic/test_imagereadwrite3d.cpp b/test_conformance/basic/test_imagereadwrite3d.cpp
index 66f64e09..d52f16d0 100644
--- a/test_conformance/basic/test_imagereadwrite3d.cpp
+++ b/test_conformance/basic/test_imagereadwrite3d.cpp
@@ -320,6 +320,12 @@ test_imagereadwrite3d(cl_device_id device, cl_context context, cl_command_queue
                 }
                 outp = (void *)rgbafp_outptr;
                 break;
+            default:
+                log_error("ERROR Invalid j = %d\n", j);
+                elem_size = 0;
+                p = nullptr;
+                outp = nullptr;
+                break;
         }
 
         const char* update_packed_pitch_name = "";
diff --git a/test_conformance/buffers/test_buffer_write.cpp b/test_conformance/buffers/test_buffer_write.cpp
index e57e1c18..bf41e48c 100644
--- a/test_conformance/buffers/test_buffer_write.cpp
+++ b/test_conformance/buffers/test_buffer_write.cpp
@@ -852,8 +852,8 @@ int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_comm
                     buffers[0] =
                         clCreateBuffer(context, flag_set[src_flag_id],
                                        ptrSizes[i] * num_elements, NULL, &err);
-                if ( err ){
-                    align_free( outptr[i] );
+                if (err)
+                {
                     print_error(err, " clCreateBuffer failed\n" );
                     free_mtdata(d);
                     return -1;
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index 880a23ab..abe95af5 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -365,16 +365,18 @@ static int ParseArgs( int argc, const char **argv )
     int length_of_seed = 0;
 
     { // Extract the app name
-        strncpy( appName, argv[0], MAXPATHLEN );
+        strncpy(appName, argv[0], MAXPATHLEN - 1);
+        appName[MAXPATHLEN - 1] = '\0';
 
 #if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
         char baseName[MAXPATHLEN];
         char *base = NULL;
-        strncpy( baseName, argv[0], MAXPATHLEN );
+        strncpy(baseName, argv[0], MAXPATHLEN - 1);
+        baseName[MAXPATHLEN - 1] = '\0';
         base = basename( baseName );
         if( NULL != base )
         {
-            strncpy( appName, base, sizeof( appName )  );
+            strncpy(appName, base, sizeof(appName) - 1);
             appName[ sizeof( appName ) -1 ] = '\0';
         }
 #elif defined (_WIN32)
@@ -385,7 +387,7 @@ static int ParseArgs( int argc, const char **argv )
                                    fname, _MAX_FNAME, ext, _MAX_EXT );
         if (err == 0) { // no error
             strcat (fname, ext); //just cat them, size of frame can keep both
-            strncpy (appName, fname, sizeof(appName));
+            strncpy(appName, fname, sizeof(appName) - 1);
             appName[ sizeof( appName ) -1 ] = '\0';
         }
 #endif
diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index 3880c820..edad671b 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -1448,7 +1448,9 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
         char inName[32];
         char outName[32];
         strncpy(inName, gTypeNames[inType], sizeof(inName));
+        inName[sizeof(inName) - 1] = '\0';
         strncpy(outName, gTypeNames[outType], sizeof(outName));
+        outName[sizeof(outName) - 1] = '\0';
         sprintf(testName, "test_implicit_%s_%s", outName, inName);
 
         source << "__kernel void " << testName << "( __global " << inName
@@ -1473,8 +1475,10 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
         switch (vectorSizetmp)
         {
             case 1:
-                strncpy(inName, gTypeNames[inType], sizeof(inName));
-                strncpy(outName, gTypeNames[outType], sizeof(outName));
+                strncpy(inName, gTypeNames[inType], sizeof(inName) - 1);
+                inName[sizeof(inName) - 1] = '\0';
+                strncpy(outName, gTypeNames[outType], sizeof(outName) - 1);
+                outName[sizeof(outName) - 1] = '\0';
                 snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
                          outName, gSaturationNames[sat],
                          gRoundingModeNames[round]);
@@ -1482,8 +1486,10 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
                 vlog("Building %s( %s ) test\n", convertString, inName);
                 break;
             case 3:
-                strncpy(inName, gTypeNames[inType], sizeof(inName));
-                strncpy(outName, gTypeNames[outType], sizeof(outName));
+                strncpy(inName, gTypeNames[inType], sizeof(inName) - 1);
+                inName[sizeof(inName) - 1] = '\0';
+                strncpy(outName, gTypeNames[outType], sizeof(outName) - 1);
+                outName[sizeof(outName) - 1] = '\0';
                 snprintf(convertString, sizeof(convertString),
                          "convert_%s3%s%s", outName, gSaturationNames[sat],
                          gRoundingModeNames[round]);
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 8225769e..c63572c3 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -182,11 +182,12 @@ static int ParseArgs(int argc, const char **argv)
 #if (defined(__APPLE__) || defined(__linux__) || defined(__MINGW32__))
     { // Extract the app name
         char baseName[MAXPATHLEN];
-        strncpy(baseName, argv[0], MAXPATHLEN);
+        strncpy(baseName, argv[0], MAXPATHLEN - 1);
+        baseName[sizeof(baseName) - 1] = '\0';
         char *base = basename(baseName);
         if (NULL != base)
         {
-            strncpy(appName, base, sizeof(appName));
+            strncpy(appName, base, sizeof(appName) - 1);
             appName[sizeof(appName) - 1] = '\0';
         }
     }
@@ -200,7 +201,7 @@ static int ParseArgs(int argc, const char **argv)
         if (err == 0)
         { // no error
             strcat(fname, ext); // just cat them, size of frame can keep both
-            strncpy(appName, fname, sizeof(appName));
+            strncpy(appName, fname, sizeof(appName) - 1);
             appName[sizeof(appName) - 1] = '\0';
         }
     }
diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index b40a69dd..72e0f8e4 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -89,7 +89,7 @@ int test_event_enqueue_wait_for_events_run_test(
 
     // If we are to use two devices, then get them and create a context with
     // both.
-    cl_device_id *two_device_ids;
+    cl_device_id *two_device_ids = nullptr;
     if (two_devices)
     {
         two_device_ids = (cl_device_id *)malloc(sizeof(cl_device_id) * 2);
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
index 803daf6b..c17b65d6 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
@@ -61,10 +61,9 @@ bool BasicCommandBufferTest::Skip()
                "CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR");
 
     cl_command_queue_properties queue_properties;
-    error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
-                                  sizeof(queue_properties), &queue_properties,
-                                  NULL);
-    test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
+    error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
+                            sizeof(queue_properties), &queue_properties, NULL);
+    test_error(error, "Unable to query CL_DEVICE_QUEUE_PROPERTIES");
     queue_out_of_order_support =
         queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
 
@@ -198,8 +197,6 @@ struct MultiFlagCreationTest : public BasicCommandBufferTest
     cl_int Run() override
     {
         cl_command_buffer_properties_khr flags = 0;
-        size_t num_flags_set = 0;
-        bool multi_flags_supported = true;
         cl_int error = CL_SUCCESS;
 
         // First try to find multiple flags that are supported by the driver and
@@ -207,30 +204,18 @@ struct MultiFlagCreationTest : public BasicCommandBufferTest
         if (simultaneous_use_support)
         {
             flags |= CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
-            num_flags_set++;
         }
 
-        if (device_side_enqueue_support)
+        if (is_extension_available(
+                device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME))
         {
             flags |= CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
-            num_flags_set++;
         }
 
         if (is_extension_available(
                 device, CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME))
         {
             flags |= CL_COMMAND_BUFFER_MUTABLE_KHR;
-            num_flags_set++;
-        }
-
-        // If we can't find multiple supported flags, still set a bitfield but
-        // expect CL_INVALID_PROPERTY to be returned on creation.
-        if (num_flags_set < 2)
-        {
-            flags = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR
-                | CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
-
-            multi_flags_supported = false;
         }
 
         cl_command_buffer_properties_khr props[] = {
@@ -238,17 +223,7 @@ struct MultiFlagCreationTest : public BasicCommandBufferTest
         };
 
         command_buffer = clCreateCommandBufferKHR(1, &queue, props, &error);
-        if (multi_flags_supported)
-        {
-            test_error(error, "clCreateCommandBufferKHR failed");
-        }
-        else
-        {
-            test_failure_error_ret(
-                error, CL_INVALID_PROPERTY,
-                "clCreateCommandBufferKHR should return CL_INVALID_PROPERTY",
-                TEST_FAIL);
-        }
+        test_error(error, "clCreateCommandBufferKHR failed");
 
         return CL_SUCCESS;
     }
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
index aa902013..f663e0a9 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
@@ -104,9 +104,9 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
         cl_version extension_version =
             get_extension_version(device, "cl_khr_command_buffer");
 
-        if (extension_version != CL_MAKE_VERSION(0, 9, 6))
+        if (extension_version != CL_MAKE_VERSION(0, 9, 7))
         {
-            log_info("cl_khr_command_buffer version 0.9.6 is required to run "
+            log_info("cl_khr_command_buffer version 0.9.7 is required to run "
                      "the test, skipping.\n ");
             return TEST_SKIPPED_ITSELF;
         }
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
index e715ddc9..199d2261 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
@@ -21,11 +21,75 @@
 
 namespace {
 
+#define ADD_PROF_PARAM(prop)                                                   \
+    {                                                                          \
+        prop, #prop, 0                                                         \
+    }
+
+struct ProfilingParam
+{
+    cl_profiling_info param;
+    std::string name;
+    cl_ulong value;
+};
+
+cl_int VerifyResult(const clEventWrapper& event)
+{
+    cl_int error = CL_SUCCESS;
+    cl_int status;
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "clGetEventInfo() failed");
+
+    if (status != CL_SUCCESS)
+        test_fail("Kernel execution status %d! (%s:%d)\n", status, __FILE__,
+                  __LINE__);
+
+    std::vector<ProfilingParam> prof_params = {
+        ADD_PROF_PARAM(CL_PROFILING_COMMAND_QUEUED),
+        ADD_PROF_PARAM(CL_PROFILING_COMMAND_SUBMIT),
+        ADD_PROF_PARAM(CL_PROFILING_COMMAND_START),
+        ADD_PROF_PARAM(CL_PROFILING_COMMAND_END),
+    };
+
+    // gather profiling timestamps
+    for (auto&& p : prof_params)
+    {
+        error = clGetEventProfilingInfo(event, p.param, sizeof(p.value),
+                                        &p.value, NULL);
+        test_error(error, "clGetEventProfilingInfo() failed");
+    }
+
+    // verify the results by comparing timestamps
+    bool all_vals_0 = prof_params.front().value != 0;
+    for (size_t i = 1; i < prof_params.size(); i++)
+    {
+        all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0;
+        if (prof_params[i - 1].value > prof_params[i].value)
+        {
+            log_error("Profiling %s=0x%x should be smaller than or equal "
+                      "to %s=0x%x for "
+                      "kernels that use the on-device queue",
+                      prof_params[i - 1].name.c_str(), prof_params[i - 1].param,
+                      prof_params[i].name.c_str(), prof_params[i].param);
+            return TEST_FAIL;
+        }
+    }
+
+    if (all_vals_0)
+    {
+        log_error("All values are 0. This is exceedingly unlikely.\n");
+        return TEST_FAIL;
+    }
+
+    log_info("Profiling info for command-buffer kernel succeeded.\n");
+    return TEST_PASS;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Command-buffer profiling test cases:
 // -all commands are recorded to a single command-queue
 // -profiling a command-buffer with simultaneous use
-
 template <bool simultaneous_request>
 struct CommandBufferProfiling : public BasicCommandBufferTest
 {
@@ -133,73 +197,6 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
         return CL_SUCCESS;
     }
 
-    //--------------------------------------------------------------------------
-#define ADD_PROF_PARAM(prop)                                                   \
-    {                                                                          \
-        prop, #prop, 0                                                         \
-    }
-    struct ProfilingParam
-    {
-        cl_profiling_info param;
-        std::string name;
-        cl_ulong value;
-    };
-
-    //--------------------------------------------------------------------------
-    cl_int VerifyResult(const clEventWrapper& event)
-    {
-        cl_int error = CL_SUCCESS;
-        cl_int status;
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                               sizeof(status), &status, NULL);
-        test_error(error, "clGetEventInfo() failed");
-
-        if (status != CL_SUCCESS)
-            test_fail("Kernel execution status %d! (%s:%d)\n", status, __FILE__,
-                      __LINE__);
-
-        std::vector<ProfilingParam> prof_params = {
-            ADD_PROF_PARAM(CL_PROFILING_COMMAND_QUEUED),
-            ADD_PROF_PARAM(CL_PROFILING_COMMAND_SUBMIT),
-            ADD_PROF_PARAM(CL_PROFILING_COMMAND_START),
-            ADD_PROF_PARAM(CL_PROFILING_COMMAND_END),
-        };
-
-        // gather profiling timestamps
-        for (auto&& p : prof_params)
-        {
-            error = clGetEventProfilingInfo(event, p.param, sizeof(p.value),
-                                            &p.value, NULL);
-            test_error(error, "clGetEventProfilingInfo() failed");
-        }
-
-        // verify the results by comparing timestamps
-        bool all_vals_0 = prof_params.front().value != 0;
-        for (size_t i = 1; i < prof_params.size(); i++)
-        {
-            all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0;
-            if (prof_params[i - 1].value > prof_params[i].value)
-            {
-                log_error("Profiling %s=0x%x should be smaller than or equal "
-                          "to %s=0x%x for "
-                          "kernels that use the on-device queue",
-                          prof_params[i - 1].name.c_str(),
-                          prof_params[i - 1].param, prof_params[i].name.c_str(),
-                          prof_params[i].param);
-                return TEST_FAIL;
-            }
-        }
-
-        if (all_vals_0)
-        {
-            log_error("All values are 0. This is exceedingly unlikely.\n");
-            return TEST_FAIL;
-        }
-
-        log_info("Profiling info for command-buffer kernel succeeded.\n");
-        return TEST_PASS;
-    }
-
     //--------------------------------------------------------------------------
     cl_int RunSingle()
     {
@@ -301,6 +298,63 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
     const cl_int pattern = 0xA;
 };
 
+// Test that we can create a command-buffer using a queue without the profiling
+// property, which is enqueued to an queue with the profiling property, and
+// the event returned can queried for profiling info.
+struct CommandBufferSubstituteQueueProfiling : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        clEventWrapper event;
+        error = clEnqueueCommandBufferKHR(1, &profiling_queue, command_buffer,
+                                          0, nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clFinish(profiling_queue);
+        test_error(error, "clFinish failed");
+
+        error = VerifyResult(event);
+        test_error(error, "VerifyResult failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_command_queue_properties supported_properties;
+        cl_int error = clGetDeviceInfo(
+            device, CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR,
+            sizeof(supported_properties), &supported_properties, NULL);
+        test_error(error,
+                   "Unable to query "
+                   "CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR");
+
+        // CL_QUEUE_PROFILING_ENABLE is mandated minimum property returned by
+        // CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR
+        if (!(supported_properties & CL_QUEUE_PROFILING_ENABLE))
+        {
+            return TEST_FAIL;
+        }
+
+        profiling_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_PROFILING_ENABLE, &error);
+        test_error(error, "clCreateCommandQueue failed");
+
+        return BasicCommandBufferTest::SetUp(elements);
+    }
+
+    clCommandQueueWrapper profiling_queue = nullptr;
+};
 } // anonymous namespace
 
 int test_basic_profiling(cl_device_id device, cl_context context,
@@ -316,3 +370,10 @@ int test_simultaneous_profiling(cl_device_id device, cl_context context,
     return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
                                                         num_elements);
 }
+
+int test_substitute_queue_profiling(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandBufferSubstituteQueueProfiling>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp
index 7aa262aa..1f74485b 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp
@@ -252,6 +252,148 @@ struct SubstituteQueueTest : public BasicCommandBufferTest
     clEventWrapper user_event;
 };
 
+// Command-queue substitution tests which handles below cases:
+// * Template param is true - Create a command-buffer with an in-order queue,
+//   and enqueue command-buffer to an out-of-order queue.
+// * Template param is false - Create a command-buffer with an out-of-order
+//   queue, and enqueue command-buffer to an in-order queue.
+template <bool is_ooo_test>
+struct QueueOrderTest : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    QueueOrderTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue), ooo_queue(nullptr),
+          ooo_command_buffer(this)
+    {}
+
+    cl_int RecordOutOfOrderCommandBuffer()
+    {
+        cl_sync_point_khr sync_points[2];
+        const cl_int pattern = pattern_pri;
+        cl_int error =
+            clCommandFillBufferKHR(ooo_command_buffer, nullptr, nullptr, in_mem,
+                                   &pattern, sizeof(cl_int), 0, data_size(), 0,
+                                   nullptr, &sync_points[0], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandFillBufferKHR(ooo_command_buffer, nullptr, nullptr,
+                                       out_mem, &overwritten_pattern,
+                                       sizeof(cl_int), 0, data_size(), 0,
+                                       nullptr, &sync_points[1], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            ooo_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &num_elements, nullptr, 2, sync_points, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int RecordInOrderCommandBuffer()
+    {
+        const cl_int pattern = pattern_pri;
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, nullptr, in_mem, &pattern, sizeof(cl_int),
+            0, data_size(), 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandFillBufferKHR(
+            command_buffer, nullptr, nullptr, out_mem, &overwritten_pattern,
+            sizeof(cl_int), 0, data_size(), 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+        if (is_ooo_test)
+        {
+            // command-buffer created in-order, but executed on ooo queue
+            error = RecordInOrderCommandBuffer();
+            test_error(error, "RecordInOrderCommandBuffer failed");
+        }
+        else
+        {
+            // command-buffer created ooo with sync point deps, but
+            // executed on in-order queue
+            error = RecordOutOfOrderCommandBuffer();
+            test_error(error, "RecordOutOfOrderCommandBuffer failed");
+        }
+
+        clCommandBufferWrapper& test_command_buffer =
+            is_ooo_test ? command_buffer : ooo_command_buffer;
+        error = clFinalizeCommandBufferKHR(test_command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        clCommandQueueWrapper& test_queue = is_ooo_test ? ooo_queue : queue;
+        error = clEnqueueCommandBufferKHR(1, &test_queue, test_command_buffer,
+                                          0, nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clFinish(test_queue);
+        test_error(error, "clFinish failed");
+
+        // Verify output
+        std::vector<cl_int> output_buffer(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+                                    output_buffer.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_buffer[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        ooo_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+        test_error(error,
+                   "clCreateCommandQueue with "
+                   "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE failed");
+
+        ooo_command_buffer =
+            clCreateCommandBufferKHR(1, &ooo_queue, nullptr, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        // Skip if we want to enqueue to an out-of-order command-queue,
+        // and this isn't supported.
+        bool skip = is_ooo_test ? !out_of_order_support : false;
+
+        // Skip if device doesn't support out-of-order queues, we need
+        // to create one for both instantiations of the test.
+        return skip || !queue_out_of_order_support;
+    }
+
+    clCommandQueueWrapper ooo_queue;
+    clCommandBufferWrapper ooo_command_buffer;
+
+    const cl_int overwritten_pattern = 0xACDC;
+    const cl_int pattern_pri = 42;
+};
 } // anonymous namespace
 
 int test_queue_substitution(cl_device_id device, cl_context context,
@@ -276,3 +418,17 @@ int test_simultaneous_queue_substitution(cl_device_id device,
     return MakeAndRunTest<SubstituteQueueTest<false, true>>(
         device, context, queue, num_elements);
 }
+
+int test_queue_substitute_in_order(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<QueueOrderTest<false>>(device, context, queue,
+                                                 num_elements);
+}
+
+int test_queue_substitute_out_of_order(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<QueueOrderTest<true>>(device, context, queue,
+                                                num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
index 93e1448e..941a9bb0 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
@@ -30,6 +30,7 @@ test_definition test_list[] = {
     ADD_TEST(info_context),
     ADD_TEST(basic_profiling),
     ADD_TEST(simultaneous_profiling),
+    ADD_TEST(substitute_queue_profiling),
     ADD_TEST(regular_wait_for_command_buffer),
     ADD_TEST(command_buffer_wait_for_command_buffer),
     ADD_TEST(command_buffer_wait_for_sec_command_buffer),
@@ -44,6 +45,8 @@ test_definition test_list[] = {
     ADD_TEST(queue_substitution),
     ADD_TEST(properties_queue_substitution),
     ADD_TEST(simultaneous_queue_substitution),
+    ADD_TEST(queue_substitute_in_order),
+    ADD_TEST(queue_substitute_out_of_order),
     ADD_TEST(fill_image),
     ADD_TEST(fill_buffer),
     ADD_TEST(fill_svm_buffer),
@@ -93,9 +96,6 @@ test_definition test_list[] = {
     ADD_TEST(negative_create_command_buffer_null_queues),
     ADD_TEST(negative_create_command_buffer_repeated_properties),
     ADD_TEST(negative_create_command_buffer_not_supported_properties),
-    ADD_TEST(negative_create_command_buffer_queue_without_min_properties),
-    ADD_TEST(
-        negative_create_command_buffer_device_does_not_support_out_of_order_queue),
     ADD_TEST(negative_command_ndrange_queue_not_null),
     ADD_TEST(negative_command_ndrange_kernel_with_different_context),
     ADD_TEST(negative_command_ndrange_kernel_sync_points_null_or_num_zero),
@@ -155,10 +155,12 @@ test_definition test_list[] = {
     ADD_TEST(
         negative_enqueue_command_buffer_num_queues_not_zero_different_while_buffer_creation),
     ADD_TEST(negative_enqueue_command_buffer_not_valid_queue_in_queues),
-    ADD_TEST(negative_enqueue_queue_not_compatible),
     ADD_TEST(negative_enqueue_queue_with_different_context),
     ADD_TEST(negative_enqueue_command_buffer_different_context_than_event),
     ADD_TEST(negative_enqueue_event_wait_list_null_or_events_null),
+    ADD_TEST(negative_enqueue_queue_without_reqd_properties),
+    ADD_TEST(negative_enqueue_with_unsupported_queue_property),
+    ADD_TEST(negative_enqueue_inconsistent_device),
 };
 
 int main(int argc, const char *argv[])
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
index 72e50e66..4b4727c7 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_create.cpp
@@ -137,7 +137,8 @@ struct CreateCommandBufferRepeatedProperties : public BasicCommandBufferTest
             rep_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
             skip = false;
         }
-        else if (device_side_enqueue_support)
+        else if (is_extension_available(
+                     device, CL_KHR_COMMAND_BUFFER_MULTI_DEVICE_EXTENSION_NAME))
         {
             rep_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
             skip = false;
@@ -190,116 +191,12 @@ struct CreateCommandBufferNotSupportedProperties : public BasicCommandBufferTest
             unsupported_prop = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
             skip = false;
         }
-        else if (!device_side_enqueue_support)
-        {
-            unsupported_prop = CL_COMMAND_BUFFER_DEVICE_SIDE_SYNC_KHR;
-            skip = false;
-        }
 
         return skip;
     }
 
     cl_command_buffer_properties_khr unsupported_prop = 0;
 };
-
-// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if the properties of any command-queue in
-// queues does not contain the minimum properties specified by
-// CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR.
-struct CreateCommandBufferQueueWithoutMinProperties
-    : public BasicCommandBufferTest
-{
-    using BasicCommandBufferTest::BasicCommandBufferTest;
-
-    cl_int Run() override
-    {
-        cl_int error = CL_SUCCESS;
-
-        command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
-        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
-                               "clCreateCommandBufferKHR should return "
-                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
-                               TEST_FAIL);
-
-        return CL_SUCCESS;
-    }
-
-    bool Skip() override
-    {
-        if (BasicCommandBufferTest::Skip()) return true;
-
-        cl_command_queue_properties required_properties;
-        cl_int error = clGetDeviceInfo(
-            device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR,
-            sizeof(required_properties), &required_properties, NULL);
-        test_error(error,
-                   "Unable to query "
-                   "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR");
-
-        cl_command_queue_properties queue_properties;
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
-                                      sizeof(queue_properties),
-                                      &queue_properties, NULL);
-        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
-
-        // Skip if queue properties contains those required
-        return required_properties == (required_properties & queue_properties);
-    }
-};
-
-// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if any command-queue in queues is an
-// out-of-order command-queue and the device associated with the command-queue
-// does not return CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE from
-// CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR
-struct CreateCommandBufferDeviceDoesNotSupportOutOfOderQueue
-    : public BasicCommandBufferTest
-{
-    CreateCommandBufferDeviceDoesNotSupportOutOfOderQueue(
-        cl_device_id device, cl_context context, cl_command_queue queue)
-        : BasicCommandBufferTest(device, context, queue),
-          out_of_order_queue(nullptr)
-    {}
-
-    cl_int Run() override
-    {
-        cl_int error = CL_SUCCESS;
-
-        command_buffer =
-            clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
-        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
-                               "clCreateCommandBufferKHR should return "
-                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
-                               TEST_FAIL);
-
-        return CL_SUCCESS;
-    }
-
-    cl_int SetUp(int elements) override
-    {
-        cl_int error = CL_SUCCESS;
-
-        error = BasicCommandBufferTest::SetUp(elements);
-        test_error(error, "BasicCommandBufferTest::SetUp failed");
-
-        out_of_order_queue = clCreateCommandQueue(
-            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
-        test_error(error,
-                   "clCreateCommandQueue with "
-                   "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE failed");
-
-        return CL_SUCCESS;
-    }
-
-    bool Skip() override
-    {
-        if (BasicCommandBufferTest::Skip()) return true;
-
-        // If device does not support out of order queue or if device supports
-        // out of order command buffer test should be skipped
-        return !queue_out_of_order_support || out_of_order_support;
-    }
-
-    clCommandQueueWrapper out_of_order_queue;
-};
 };
 
 int test_negative_create_command_buffer_num_queues(cl_device_id device,
@@ -335,20 +232,3 @@ int test_negative_create_command_buffer_not_supported_properties(
     return MakeAndRunTest<CreateCommandBufferNotSupportedProperties>(
         device, context, queue, num_elements);
 }
-
-int test_negative_create_command_buffer_queue_without_min_properties(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return MakeAndRunTest<CreateCommandBufferQueueWithoutMinProperties>(
-        device, context, queue, num_elements);
-}
-
-int test_negative_create_command_buffer_device_does_not_support_out_of_order_queue(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements)
-{
-    return MakeAndRunTest<
-        CreateCommandBufferDeviceDoesNotSupportOutOfOderQueue>(
-        device, context, queue, num_elements);
-}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp
index f13836fb..cdc9398a 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_enqueue.cpp
@@ -16,7 +16,6 @@
 #include "basic_command_buffer.h"
 #include "procs.h"
 
-
 //--------------------------------------------------------------------------
 namespace {
 
@@ -293,63 +292,6 @@ struct EnqueueCommandBufferNotValidQueueInQueues : public BasicCommandBufferTest
     }
 };
 
-// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if any element of queues is not compatible
-// with the command-queue set on command_buffer creation at the same list index.
-struct EnqueueCommandBufferQueueNotCompatible : public BasicCommandBufferTest
-{
-    EnqueueCommandBufferQueueNotCompatible(cl_device_id device,
-                                           cl_context context,
-                                           cl_command_queue queue)
-        : BasicCommandBufferTest(device, context, queue),
-          queue_not_compatible(nullptr)
-    {}
-
-    cl_int Run() override
-    {
-        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
-        test_error(error, "clFinalizeCommandBufferKHR failed");
-
-        error = clEnqueueCommandBufferKHR(1, &queue_not_compatible,
-                                          command_buffer, 0, nullptr, nullptr);
-
-        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
-                               "clEnqueueCommandBufferKHR should return "
-                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
-                               TEST_FAIL);
-
-        return CL_SUCCESS;
-    }
-
-    cl_int SetUp(int elements) override
-    {
-        cl_int error = BasicCommandBufferTest::SetUp(elements);
-        test_error(error, "BasicCommandBufferTest::SetUp failed");
-
-        queue_not_compatible = clCreateCommandQueue(
-            context, device, CL_QUEUE_PROFILING_ENABLE, &error);
-        test_error(error, "clCreateCommandQueue failed");
-
-        cl_command_queue_properties queue_properties;
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
-                                      sizeof(queue_properties),
-                                      &queue_properties, NULL);
-        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
-
-        cl_command_queue_properties queue_not_compatible_properties;
-        error = clGetCommandQueueInfo(queue_not_compatible, CL_QUEUE_PROPERTIES,
-                                      sizeof(queue_not_compatible_properties),
-                                      &queue_not_compatible_properties, NULL);
-        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
-
-        test_assert_error(queue_properties != queue_not_compatible_properties,
-                          "Queues properties must be different");
-
-        return CL_SUCCESS;
-    }
-
-    clCommandQueueWrapper queue_not_compatible;
-};
-
 // CL_INVALID_CONTEXT if any element of queues does not have the same context as
 // the command-queue set on command_buffer creation at the same list index.
 struct EnqueueCommandBufferQueueWithDifferentContext
@@ -491,6 +433,185 @@ struct EnqueueCommandBufferEventWaitListNullOrEventsNull
         return CL_SUCCESS;
     }
 };
+
+// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if the properties of any command-queue in
+// queues does not contain the minimum properties specified by
+// CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR.
+struct EnqueueCommandBufferQueueWithoutReqdProperties
+    : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
+                               TEST_FAIL);
+
+        error = clEnqueueCommandBufferKHR(1, &queue, command_buffer, 0, nullptr,
+                                          nullptr);
+        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        // Omit BasicCommandBufferTest::Skip() here because it skips
+        // if we don't have required properties, which is what we want to
+        // test an error for.
+
+        cl_command_queue_properties required_properties;
+        cl_int error = clGetDeviceInfo(
+            device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR,
+            sizeof(required_properties), &required_properties, NULL);
+        test_error(error,
+                   "Unable to query "
+                   "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR");
+
+        cl_command_queue_properties queue_properties;
+        error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
+                                      sizeof(queue_properties),
+                                      &queue_properties, NULL);
+        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
+
+        // Skip if queue properties contains those required
+        return required_properties == (required_properties & queue_properties);
+    }
+};
+
+// CL_INCOMPATIBLE_COMMAND_QUEUE_KHR if any command-queue in queues is an
+// out-of-order command-queue and the device associated with the command-queue
+// does not return CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE from
+// CL_DEVICE_COMMAND_BUFFER_SUPPORTED_QUEUE_PROPERTIES_KHR
+struct EnqueueCommandBufferWithUnsupportedQueueProperty
+    : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(1, &out_of_order_queue,
+                                          command_buffer, 0, nullptr, nullptr);
+        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
+                               TEST_FAIL);
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        out_of_order_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+        test_error(error,
+                   "clCreateCommandQueue with "
+                   "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        // If device does not support out of order queue or if device supports
+        // out of order command buffer test should be skipped
+        return !queue_out_of_order_support || out_of_order_support;
+    }
+
+    clCommandQueueWrapper out_of_order_queue = nullptr;
+};
+
+// CL_INVALID_DEVICE if any element of queues does not have the same device
+// as the command-queue set on command_buffer creation at the
+// same list index.
+struct EnqueueCommandBufferInconsistentDevice : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(1, &second_device_queue,
+                                          command_buffer, 0, nullptr, nullptr);
+        test_failure_error_ret(error, CL_INCOMPATIBLE_COMMAND_QUEUE_KHR,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INCOMPATIBLE_COMMAND_QUEUE_KHR",
+                               TEST_FAIL);
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        cl_device_id second_device = nullptr;
+        for (auto query_device : devices)
+        {
+            if (query_device != device)
+            {
+                second_device = query_device;
+                break;
+            }
+        }
+
+        test_assert_error(second_device != nullptr,
+                          "Second device not found for testing");
+
+        second_device_queue =
+            clCreateCommandQueue(context, second_device, 0, &error);
+        test_error(error, "clCreateCommandQueue failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        size_t context_devices_size;
+        cl_int error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
+                                        &context_devices_size);
+        test_error(error, "clGetContextInfo failed");
+
+        size_t num_devices = context_devices_size / sizeof(cl_device_id);
+
+        if (num_devices < 2)
+        {
+            // We need a second device for test
+            return true;
+        }
+
+        devices.resize(num_devices);
+        error = clGetContextInfo(context, CL_CONTEXT_DEVICES, num_devices,
+                                 devices.data(), nullptr);
+        test_error(error, "clGetContextInfo failed");
+
+        return false;
+    }
+
+    std::vector<cl_device_id> devices;
+    clCommandQueueWrapper second_device_queue = nullptr;
+};
 };
 
 int test_negative_enqueue_command_buffer_invalid_command_buffer(
@@ -544,15 +665,6 @@ int test_negative_enqueue_command_buffer_not_valid_queue_in_queues(
         device, context, queue, num_elements);
 }
 
-int test_negative_enqueue_queue_not_compatible(cl_device_id device,
-                                               cl_context context,
-                                               cl_command_queue queue,
-                                               int num_elements)
-{
-    return MakeAndRunTest<EnqueueCommandBufferQueueNotCompatible>(
-        device, context, queue, num_elements);
-}
-
 int test_negative_enqueue_queue_with_different_context(cl_device_id device,
                                                        cl_context context,
                                                        cl_command_queue queue,
@@ -577,3 +689,29 @@ int test_negative_enqueue_event_wait_list_null_or_events_null(
     return MakeAndRunTest<EnqueueCommandBufferEventWaitListNullOrEventsNull>(
         device, context, queue, num_elements);
 }
+
+int test_negative_enqueue_queue_without_reqd_properties(cl_device_id device,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements)
+{
+    return MakeAndRunTest<EnqueueCommandBufferQueueWithoutReqdProperties>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_enqueue_with_unsupported_queue_property(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<EnqueueCommandBufferWithUnsupportedQueueProperty>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_enqueue_inconsistent_device(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements)
+{
+    return MakeAndRunTest<EnqueueCommandBufferInconsistentDevice>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h
index 2a1e199b..ba89de8e 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h
@@ -91,6 +91,10 @@ extern int test_basic_profiling(cl_device_id device, cl_context context,
 extern int test_simultaneous_profiling(cl_device_id device, cl_context context,
                                        cl_command_queue queue,
                                        int num_elements);
+extern int test_substitute_queue_profiling(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
 extern int test_queue_substitution(cl_device_id device, cl_context context,
                                    cl_command_queue queue, int num_elements);
 extern int test_properties_queue_substitution(cl_device_id device,
@@ -101,6 +105,14 @@ extern int test_simultaneous_queue_substitution(cl_device_id device,
                                                 cl_context context,
                                                 cl_command_queue queue,
                                                 int num_elements);
+extern int test_queue_substitute_in_order(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_queue_substitute_out_of_order(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements);
 extern int test_fill_image(cl_device_id device, cl_context context,
                            cl_command_queue queue, int num_elements);
 extern int test_fill_buffer(cl_device_id device, cl_context context,
@@ -211,13 +223,6 @@ extern int test_negative_create_command_buffer_repeated_properties(
 extern int test_negative_create_command_buffer_not_supported_properties(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
-extern int test_negative_create_command_buffer_queue_without_min_properties(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements);
-extern int
-test_negative_create_command_buffer_device_does_not_support_out_of_order_queue(
-    cl_device_id device, cl_context context, cl_command_queue queue,
-    int num_elements);
 extern int test_negative_command_ndrange_queue_not_null(cl_device_id device,
                                                         cl_context context,
                                                         cl_command_queue queue,
@@ -383,10 +388,6 @@ extern int test_negative_command_buffer_copy_image_mutable_handle_not_null(
 extern int test_negative_enqueue_command_buffer_not_valid_queue_in_queues(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
-extern int test_negative_enqueue_queue_not_compatible(cl_device_id device,
-                                                      cl_context context,
-                                                      cl_command_queue queue,
-                                                      int num_elements);
 extern int test_negative_enqueue_queue_with_different_context(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
@@ -396,6 +397,14 @@ extern int test_negative_enqueue_command_buffer_different_context_than_event(
 extern int test_negative_enqueue_event_wait_list_null_or_events_null(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
-
-
+extern int test_negative_enqueue_queue_without_reqd_properties(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_enqueue_with_unsupported_queue_property(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_enqueue_inconsistent_device(cl_device_id device,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     int num_elements);
 #endif // CL_KHR_COMMAND_BUFFER_PROCS_H
diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index e5a425b0..cf914a9a 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -341,8 +341,8 @@ int Test_vStoreHalf_private(cl_device_id device, f2h referenceFunc,
     int vectorSize, error;
     cl_program programs[kVectorSizeCount + kStrangeVectorSizeCount][3];
     cl_kernel kernels[kVectorSizeCount + kStrangeVectorSizeCount][3];
-    cl_program resetProgram;
-    cl_kernel resetKernel;
+    cl_program resetProgram = nullptr;
+    cl_kernel resetKernel = nullptr;
 
     uint64_t time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
     uint64_t min_time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
@@ -1225,8 +1225,8 @@ int Test_vStoreaHalf_private(cl_device_id device, f2h referenceFunc,
     int vectorSize, error;
     cl_program programs[kVectorSizeCount + kStrangeVectorSizeCount][3];
     cl_kernel kernels[kVectorSizeCount + kStrangeVectorSizeCount][3];
-    cl_program resetProgram;
-    cl_kernel resetKernel;
+    cl_program resetProgram = nullptr;
+    cl_kernel resetKernel = nullptr;
 
     uint64_t time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
     uint64_t min_time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index ee44fb2d..82b2d769 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -144,11 +144,12 @@ static int ParseArgs( int argc, const char **argv )
 #if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
     { // Extract the app name
         char baseName[ MAXPATHLEN ];
-        strncpy( baseName, argv[0], MAXPATHLEN );
+        strncpy(baseName, argv[0], MAXPATHLEN - 1);
+        baseName[MAXPATHLEN - 1] = '\0';
         char *base = basename( baseName );
         if( NULL != base )
         {
-            strncpy( appName, base, sizeof( appName )  );
+            strncpy(appName, base, sizeof(appName) - 1);
             appName[ sizeof( appName ) -1 ] = '\0';
         }
     }
diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
index b4ae8308..7d13eaab 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -25,7 +25,7 @@ int test_copy_image_size_1D( cl_context context, cl_command_queue queue, image_d
     size_t src_lod = 0, src_width_lod = imageInfo->width, src_row_pitch_lod;
     size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
     size_t width_lod = imageInfo->width;
-    size_t max_mip_level;
+    size_t max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
index f0b610bb..d94ba8ef 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -25,7 +25,7 @@ int test_copy_image_size_1D_array( cl_context context, cl_command_queue queue, i
     size_t src_lod = 0, src_width_lod = imageInfo->width, src_row_pitch_lod;
     size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
     size_t width_lod = imageInfo->width;
-    size_t max_mip_level;
+    size_t max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
index 448b47f0..97cca26c 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -27,7 +27,7 @@ int test_copy_image_size_2D( cl_context context, cl_command_queue queue, image_d
     size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
     size_t dst_height_lod = imageInfo->height;
     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
-    size_t max_mip_level;
+    size_t max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
index 1819d87c..9ba8718a 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -72,7 +72,7 @@ int test_copy_image_size_2D_2D_array( cl_context context, cl_command_queue queue
     size_t threeImage_lod = 0, threeImage_width_lod = threeImage->width, threeImage_row_pitch_lod, threeImage_slice_pitch_lod;
     size_t threeImage_height_lod = threeImage->height;
     size_t width_lod, height_lod;
-    size_t twoImage_max_mip_level,threeImage_max_mip_level;
+    size_t twoImage_max_mip_level = 0, threeImage_max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
index 4ab6b42a..5f522e3e 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -68,7 +68,7 @@ int test_copy_image_size_2D_3D( cl_context context, cl_command_queue queue, imag
     size_t threeImage_lod = 0, threeImage_width_lod = threeImage->width, threeImage_row_pitch_lod, threeImage_slice_pitch_lod;
     size_t threeImage_height_lod = threeImage->height, depth_lod = threeImage->depth;
     size_t width_lod, height_lod;
-    size_t twoImage_max_mip_level,threeImage_max_mip_level;
+    size_t twoImage_max_mip_level = 0, threeImage_max_mip_level = 0;
 
     if( gTestMipmaps )
     {
diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp
index ea60d356..d9c54854 100644
--- a/test_conformance/images/clCopyImage/test_loops.cpp
+++ b/test_conformance/images/clCopyImage/test_loops.cpp
@@ -39,8 +39,8 @@ extern int test_copy_image_set_1D_buffer_1D(cl_device_id device,
 
 int test_image_type( cl_device_id device, cl_context context, cl_command_queue queue, MethodsToTest testMethod, cl_mem_flags flags )
 {
-    const char *name;
-    cl_mem_object_type imageType;
+    const char *name = nullptr;
+    cl_mem_object_type imageType = 0;
 
     if ( gTestMipmaps )
     {
diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp
index 17b6182e..24c91813 100644
--- a/test_conformance/images/clFillImage/test_fill_generic.cpp
+++ b/test_conformance/images/clFillImage/test_fill_generic.cpp
@@ -277,6 +277,11 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr
             depth = imageInfo->depth;
             imageSize = imageInfo->slicePitch * imageInfo->depth;
             break;
+        default:
+            log_error("ERROR Invalid imageInfo->type = %d\n", imageInfo->type);
+            height = 0;
+            depth = 0;
+            break;
     }
 
     size_t origin[ 3 ] = { 0, 0, 0 };
diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
index 49631bf4..76588a09 100644
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -181,6 +181,10 @@ int test_get_image_info_single( cl_context context, image_descriptor *imageInfo,
     case CL_MEM_OBJECT_IMAGE3D:
       required_height = imageInfo->height;
       break;
+    default:
+        log_error("ERROR: Invalid imageInfo->type = %d\n", imageInfo->type);
+        required_height = 0;
+        break;
   }
 
     size_t outHeight;
@@ -204,6 +208,10 @@ int test_get_image_info_single( cl_context context, image_descriptor *imageInfo,
     case CL_MEM_OBJECT_IMAGE3D:
       required_depth = imageInfo->depth;
       break;
+    default:
+        log_error("ERROR: Invalid imageInfo->type = %d\n", imageInfo->type);
+        required_depth = 0;
+        break;
   }
 
   size_t outDepth;
@@ -227,6 +235,10 @@ int test_get_image_info_single( cl_context context, image_descriptor *imageInfo,
     case CL_MEM_OBJECT_IMAGE2D_ARRAY:
       required_array_size = imageInfo->arraySize;
       break;
+    default:
+        log_error("ERROR: Invalid imageInfo->type = %d\n", imageInfo->type);
+        required_array_size = 0;
+        break;
   }
 
   size_t outArraySize;
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
index 887c9dca..77784b20 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
@@ -69,7 +69,7 @@ static inline size_t get_format_size(cl_context context,
     }
 
     cl_int error = 0;
-    cl_mem buffer;
+    cl_mem buffer = nullptr;
     if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
     {
         buffer = clCreateBuffer(context, flags,
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 2dcc1827..8b82b9f9 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -769,7 +769,7 @@ int image_from_buffer_fill_positive(cl_device_id device, cl_context context,
                 err = clFinish(queue);
                 test_error(err, "Error clFinish");
 
-                cl_mem image1d_buffer;
+                cl_mem image1d_buffer = nullptr;
                 if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
                 {
                     image1d_buffer = clCreateBuffer(context, flag, buffer_size,
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp
index c506528e..e3bd1e48 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp
@@ -20,6 +20,7 @@
 
 extern int gTypesToTest;
 extern int gtestTypesToRun;
+extern int gNormalizedModeToUse;
 extern bool gTestImage2DFromBuffer;
 extern cl_mem_flags gMemFlagsToUse;
 
@@ -38,16 +39,20 @@ static int test_image_set(cl_device_id device, cl_context context,
     log_info("---- Supported %s %s formats for this device for "
              "cl_ext_image_raw10_raw12---- \n",
              convert_image_type_to_string(imageType), "read");
-    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW10_EXT", 0);
-    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW12_EXT", 0);
+    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW10_EXT", 1);
+    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW12_EXT", 1);
     log_info("------------------------------------------- \n");
 
     image_sampler_data imageSampler;
     ImageTestTypes test{ kTestUInt, kUInt, uintFormats, "uint" };
+
     if (gTypesToTest & test.type)
     {
         std::vector<bool> filterFlags(formatList.size(), false);
         imageSampler.filter_mode = CL_FILTER_NEAREST;
+        // `CL_UNSIGNED_INT_RAW10_EXT` and `CL_UNSIGNED_INT_RAW12_EXT` image
+        // channel data types are unnormalised
+        imageSampler.normalized_coords = false;
         ret = test_read_image_formats(device, context, queue, formatList,
                                       filterFlags, &imageSampler,
                                       test.explicitType, imageType);
@@ -60,19 +65,34 @@ int ext_image_raw10_raw12(cl_device_id device, cl_context context,
 {
     int ret = 0;
 
-    if (0 == is_extension_available(device, "cl_ext_image_raw10_raw12"))
+    if (true != gNormalizedModeToUse)
     {
-        log_info("-----------------------------------------------------\n");
-        log_info("This device does not support "
-                 "cl_ext_image_raw10_raw12.\n");
-        log_info("Skipping cl_ext_image_raw10_raw12 "
-                 "image test.\n");
-        log_info("-----------------------------------------------------\n\n");
-        return 0;
+        if (0 == is_extension_available(device, "cl_ext_image_raw10_raw12"))
+        {
+            log_info("-----------------------------------------------------\n");
+            log_info("This device does not support "
+                     "cl_ext_image_raw10_raw12.\n");
+            log_info("Skipping cl_ext_image_raw10_raw12 "
+                     "image test.\n");
+            log_info(
+                "-----------------------------------------------------\n\n");
+            ret = TEST_SKIPPED_ITSELF;
+        }
+        else
+        {
+            gtestTypesToRun = kReadTests;
+            ret +=
+                test_image_set(device, context, queue, CL_MEM_OBJECT_IMAGE2D);
+        }
+    }
+    else
+    {
+        // skip the test if it is forced to be NORMALIZED from the command line
+        // argument i.e. gNormalizedModeToUse is true
+        log_info("cl_ext_image_raw10_raw12 does not support normalized channel "
+                 "components. Skipping the test.\n");
+        ret = TEST_SKIPPED_ITSELF;
     }
-    gtestTypesToRun = kReadTests;
-
-    ret += test_image_set(device, context, queue, CL_MEM_OBJECT_IMAGE2D);
 
     return ret;
 }
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 0c87d87d..9c4e332a 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -1191,7 +1191,7 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel ke
 {
     int error;
     static int initHalf = 0;
-    cl_mem imageBuffer;
+    cl_mem imageBuffer = nullptr;
     cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
     size_t threads[2];
 
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index 32f7c22f..ab73e6e1 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -223,7 +223,7 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
         clProtectedImage protImage;
         clMemWrapper unprotImage;
         cl_mem image;
-        cl_mem imageBuffer;
+        cl_mem imageBuffer = nullptr;
 
         if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
         {
@@ -910,7 +910,7 @@ int test_write_image_formats(cl_device_id device, cl_context context,
         gTestCount++;
 
         print_write_header( &imageFormat, false );
-        int retCode;
+        int retCode = 0;
         switch (imageType)
         {
             case CL_MEM_OBJECT_IMAGE1D:
diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp
index da3de6d1..1f7fe855 100644
--- a/test_conformance/integer_ops/test_unary_ops.cpp
+++ b/test_conformance/integer_ops/test_unary_ops.cpp
@@ -97,7 +97,7 @@ int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants which
         get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inData, &error);
     test_error( error, "Creating input data array failed" );
 
-    cl_uint bits;
+    cl_uint bits = 0;
     for( i = 0; i < TEST_SIZE; i++ )
     {
         size_t which = i & 7;
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index feeedc47..5510c7a2 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -344,26 +344,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index deea1ce9..6db812e5 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -349,26 +349,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_half.cpp b/test_conformance/math_brute_force/binary_half.cpp
index 70057db5..5bbcbbda 100644
--- a/test_conformance/math_brute_force/binary_half.cpp
+++ b/test_conformance/math_brute_force/binary_half.cpp
@@ -229,26 +229,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index a6c28557..5f563c73 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -346,26 +346,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index dfe25efc..a9a65719 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -337,26 +337,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_i_half.cpp b/test_conformance/math_brute_force/binary_i_half.cpp
index 0f167fc9..bd91be46 100644
--- a/test_conformance/math_brute_force/binary_i_half.cpp
+++ b/test_conformance/math_brute_force/binary_i_half.cpp
@@ -225,26 +225,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 7600ab16..4dce5052 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -214,6 +214,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_double *s;
     cl_double *s2;
 
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const double reciprocalArrayX[] = { 1.0 };
+    const double *specialValuesX =
+        reciprocal ? reciprocalArrayX : specialValues;
+    size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
+
     Force64BitFPUPrecision();
 
     cl_event e[VECTOR_SIZE_COUNT];
@@ -242,7 +248,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
     cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
+    int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     // Test edge cases
@@ -252,14 +258,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         cl_double *fp2 = (cl_double *)p2;
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesCount;
+        x = (job_id * buffer_elements) % specialValuesCountX;
         y = (job_id * buffer_elements) / specialValuesCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            fp[idx] = specialValues[x];
+            fp[idx] = specialValuesX[x];
             fp2[idx] = specialValues[y];
-            if (++x >= specialValuesCount)
+            ++x;
+            if (x >= specialValuesCountX)
             {
                 x = 0;
                 y++;
@@ -271,7 +278,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = genrand_int64(d);
+        p[idx] =
+            reciprocal ? ((cl_ulong *)specialValuesX)[0] : genrand_int64(d);
         p2[idx] = genrand_int64(d);
     }
 
@@ -337,26 +345,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
@@ -375,8 +372,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
     s = (cl_double *)gIn + thread_id * buffer_elements;
     s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
-    for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
+
+    if (reciprocal)
+        for (size_t j = 0; j < buffer_elements; j++)
+            r[j] = (float)func.f_f(s2[j]);
+    else
+        for (size_t j = 0; j < buffer_elements; j++)
+            r[j] = (cl_double)func.f_ff(s[j], s2[j]);
 
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
@@ -406,7 +408,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
+                long double correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
+
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
@@ -479,8 +483,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
                     else if (IsDoubleSubnormal(s2[j]))
                     {
-                        long double correct2 = func.f_ff(s[j], 0.0);
-                        long double correct3 = func.f_ff(s[j], -0.0);
+                        long double correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        long double correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
+
                         float err2 =
                             Bruteforce_Ulp_Error_Double(test, correct2);
                         float err3 =
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 6f5a3645..c0c11c2e 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -208,6 +208,11 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_float *s2 = 0;
     RoundingMode oldRoundMode;
 
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const float reciprocalArrayX[] = { 1.f };
+    const float *specialValuesX = reciprocal ? reciprocalArrayX : specialValues;
+    size_t specialValuesCountX = reciprocal ? 1 : specialValuesCount;
+
     if (relaxedMode)
     {
         func = job->f->rfunc;
@@ -239,7 +244,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
     cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
-    int totalSpecialValueCount = specialValuesCount * specialValuesCount;
+    int totalSpecialValueCount = specialValuesCountX * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     if (job_id <= (cl_uint)lastSpecialJobIndex)
@@ -247,15 +252,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         // Insert special values
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesCount;
+        x = (job_id * buffer_elements) % specialValuesCountX;
         y = (job_id * buffer_elements) / specialValuesCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            p[idx] = ((cl_uint *)specialValues)[x];
+            p[idx] = ((cl_uint *)specialValuesX)[x];
             p2[idx] = ((cl_uint *)specialValues)[y];
             ++x;
-            if (x >= specialValuesCount)
+            if (x >= specialValuesCountX)
             {
                 x = 0;
                 y++;
@@ -269,13 +274,19 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
                 if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
             }
+            else if (relaxedMode && reciprocal)
+            {
+                cl_uint p2j = p2[idx] & 0x7fffffff;
+                // Replace values outside [2^-126, 2^126] with QNaN
+                if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
+            }
         }
     }
 
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = genrand_int32(d);
+        p[idx] = reciprocal ? ((cl_uint *)specialValuesX)[0] : genrand_int32(d);
         p2[idx] = genrand_int32(d);
 
         if (relaxedMode && strcmp(name, "divide") == 0)
@@ -286,6 +297,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (pj < 0x20800000 || pj > 0x5e800000) p[idx] = 0x7fc00000;
             if (p2j < 0x20800000 || p2j > 0x5e800000) p2[idx] = 0x7fc00000;
         }
+        else if (relaxedMode && reciprocal)
+        {
+            cl_uint p2j = p2[idx] & 0x7fffffff;
+            // Replace values outside [2^-126, 2^126] with QNaN
+            if (p2j < 0x00807d99 || p2j > 0x7e800000) p2[idx] = 0x7fc00000;
+        }
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -350,26 +367,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
@@ -402,18 +408,31 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     s2 = (float *)gIn2 + thread_id * buffer_elements;
     if (gInfNanSupport)
     {
-        for (size_t j = 0; j < buffer_elements; j++)
-            r[j] = (float)func.f_ff(s[j], s2[j]);
+        if (reciprocal)
+            for (size_t j = 0; j < buffer_elements; j++)
+                r[j] = (float)func.f_f(s2[j]);
+        else
+            for (size_t j = 0; j < buffer_elements; j++)
+                r[j] = (float)func.f_ff(s[j], s2[j]);
     }
     else
     {
-        for (size_t j = 0; j < buffer_elements; j++)
-        {
-            feclearexcept(FE_OVERFLOW);
-            r[j] = (float)func.f_ff(s[j], s2[j]);
-            overflow[j] =
-                FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
-        }
+        if (reciprocal)
+            for (size_t j = 0; j < buffer_elements; j++)
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float)func.f_f(s2[j]);
+                overflow[j] =
+                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        else
+            for (size_t j = 0; j < buffer_elements; j++)
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float)func.f_ff(s[j], s2[j]);
+                overflow[j] =
+                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
     }
 
     if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
@@ -448,7 +467,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 float test = ((float *)q)[j];
-                double correct = func.f_ff(s[j], s2[j]);
+                double correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
 
                 // Per section 10 paragraph 6, accept any result if an input or
                 // output is a infinity or NaN or overflow
@@ -485,7 +505,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
 
                     // retry per section 6.5.3.3
-                    if (IsFloatSubnormal(s[j]))
+                    if (!reciprocal && IsFloatSubnormal(s[j]))
                     {
                         double correct2, correct3;
                         float err2, err3;
@@ -591,8 +611,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                         if (!gInfNanSupport) feclearexcept(FE_OVERFLOW);
 
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
+                        correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow
@@ -625,7 +647,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     }
                 }
 
-
                 if (fabsf(err) > tinfo->maxError)
                 {
                     tinfo->maxError = fabsf(err);
diff --git a/test_conformance/math_brute_force/binary_operator_half.cpp b/test_conformance/math_brute_force/binary_operator_half.cpp
index b4abf490..3bd45857 100644
--- a/test_conformance/math_brute_force/binary_operator_half.cpp
+++ b/test_conformance/math_brute_force/binary_operator_half.cpp
@@ -120,6 +120,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     std::vector<float> s(0), s2(0);
     RoundingMode oldRoundMode;
 
+    bool reciprocal = strcmp(name, "reciprocal") == 0;
+    const cl_half reciprocalArrayHalfX[] = { 0x3c00 };
+    const cl_half *specialValuesHalfX =
+        reciprocal ? reciprocalArrayHalfX : specialValuesHalf;
+    size_t specialValuesHalfCountX = reciprocal ? 1 : specialValuesHalfCount;
+
     cl_event e[VECTOR_SIZE_COUNT];
     cl_half *out[VECTOR_SIZE_COUNT];
 
@@ -148,7 +154,7 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
     cl_uint idx = 0;
     int totalSpecialValueCount =
-        specialValuesHalfCount * specialValuesHalfCount;
+        specialValuesHalfCountX * specialValuesHalfCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
     if (job_id <= (cl_uint)lastSpecialJobIndex)
@@ -156,14 +162,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
         // Insert special values
         uint32_t x, y;
 
-        x = (job_id * buffer_elements) % specialValuesHalfCount;
+        x = (job_id * buffer_elements) % specialValuesHalfCountX;
         y = (job_id * buffer_elements) / specialValuesHalfCount;
 
         for (; idx < buffer_elements; idx++)
         {
-            p[idx] = specialValuesHalf[x];
+            p[idx] = specialValuesHalfX[x];
             p2[idx] = specialValuesHalf[y];
-            if (++x >= specialValuesHalfCount)
+            ++x;
+            if (x >= specialValuesHalfCountX)
             {
                 x = 0;
                 y++;
@@ -175,7 +182,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
-        p[idx] = (cl_half)genrand_int32(d);
+        p[idx] = reciprocal ? ((cl_half *)specialValuesHalfX)[0]
+                            : (cl_half)genrand_int32(d);
         p2[idx] = (cl_half)genrand_int32(d);
     }
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
@@ -232,26 +240,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
@@ -283,11 +280,23 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
     s.resize(buffer_elements);
     s2.resize(buffer_elements);
 
-    for (size_t j = 0; j < buffer_elements; j++)
+    if (reciprocal)
     {
-        s[j] = HTF(p[j]);
-        s2[j] = HTF(p2[j]);
-        r[j] = HFF(func.f_ff(s[j], s2[j]));
+        for (size_t j = 0; j < buffer_elements; j++)
+        {
+            s[j] = HTF(p[j]);
+            s2[j] = HTF(p2[j]);
+            r[j] = HFF(func.f_f(s2[j]));
+        }
+    }
+    else
+    {
+        for (size_t j = 0; j < buffer_elements; j++)
+        {
+            s[j] = HTF(p[j]);
+            s2[j] = HTF(p2[j]);
+            r[j] = HFF(func.f_ff(s[j], s2[j]));
+        }
     }
 
     if (ftz) RestoreFPState(&oldMode);
@@ -320,7 +329,8 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             if (r[j] != q[j])
             {
                 float test = HTF(q[j]);
-                float correct = func.f_ff(s[j], s2[j]);
+                float correct =
+                    reciprocal ? func.f_f(s2[j]) : func.f_ff(s[j], s2[j]);
 
                 // Per section 10 paragraph 6, accept any result if an input or
                 // output is a infinity or NaN or overflow
@@ -446,9 +456,10 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
                         double correct2, correct3;
                         float err2, err3;
 
-                        correct2 = func.f_ff(s[j], 0.0);
-                        correct3 = func.f_ff(s[j], -0.0);
-
+                        correct2 =
+                            reciprocal ? func.f_f(0.0) : func.f_ff(s[j], 0.0);
+                        correct3 =
+                            reciprocal ? func.f_f(-0.0) : func.f_ff(s[j], -0.0);
 
                         // Per section 10 paragraph 6, accept any result if an
                         // input or output is a infinity or NaN or overflow
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index ec244c17..eca33f2f 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -185,31 +185,18 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 36d71898..213535a9 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -187,31 +187,18 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/binary_two_results_i_half.cpp b/test_conformance/math_brute_force/binary_two_results_i_half.cpp
index 3c3ef71a..a2379431 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_half.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_half.cpp
@@ -179,31 +179,18 @@ int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
             // align working group size with the bigger output type
             size_t vectorSize = sizeValues[j] * sizeof(int32_t);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index f06921bd..14e0830a 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -29,31 +29,32 @@
 // Only use ulps information in spir test
 #ifdef FUNCTION_LIST_ULPS_ONLY
 
-#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type)            \
+#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _half_embedded_ulp,       \
+              _rmode, _type)                                                   \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
-            RELAXED_OFF, _type                                                 \
+            _ulp, _ulp, _half_ulp, _half_embedded_ulp, _embedded_ulp,          \
+            INFINITY, INFINITY, _rmode, RELAXED_OFF, _type                     \
     }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
-                  _type, _relaxed_embedded_ulp)                                \
+#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _half_embedded_ulp,   \
+                  _relaxed_ulp, _rmode, _type, _relaxed_embedded_ulp)          \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, _relaxed_ulp,                \
-            _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type                   \
+            _ulp, _ulp, _half_ulp, _half_embedded_ulp, _embedded_ulp,          \
+            _relaxed_ulp, _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type     \
     }
 #define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
     {                                                                          \
         "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL },          \
-            { NULL }, { NULL }, _ulp, _ulp, _ulp, _embedded_ulp, INFINITY,     \
-            INFINITY, _rmode, RELAXED_OFF, _type                               \
+            { NULL }, { NULL }, _ulp, _ulp, _ulp, _ulp, _embedded_ulp,         \
+            INFINITY, INFINITY, _rmode, RELAXED_OFF, _type                     \
     }
 #define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp,       \
-                       _rmode, _type)                                          \
+                       _half_embedded_ulp, _rmode, _type)                      \
     {                                                                          \
         STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
-            _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
-            _type                                                              \
+            _half_ulp, _half_embedded_ulp, _embedded_ulp, INFINITY, INFINITY,  \
+            _rmode, RELAXED_OFF, _type                                         \
     }
 
 #define unaryF NULL
@@ -78,41 +79,47 @@
 #define reference_copysign NULL
 #define reference_sqrt NULL
 #define reference_sqrtl NULL
+#define reference_reciprocal NULL
+#define reference_reciprocall NULL
+#define reference_relaxed_reciprocal NULL
+
 #define reference_divide NULL
 #define reference_dividel NULL
 #define reference_relaxed_divide NULL
 
 #else // FUNCTION_LIST_ULPS_ONLY
 
-#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type)            \
+#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _half_embedded_ulp,       \
+              _rmode, _type)                                                   \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
             { (void*)reference_##_name##l }, { (void*)reference_##_name },     \
-            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
-            RELAXED_OFF, _type                                                 \
+            _ulp, _ulp, _half_ulp, _half_embedded_ulp, _embedded_ulp,          \
+            INFINITY, INFINITY, _rmode, RELAXED_OFF, _type                     \
     }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
-                  _type, _relaxed_embedded_ulp)                                \
+#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _half_embedded_ulp,   \
+                  _relaxed_ulp, _rmode, _type, _relaxed_embedded_ulp)          \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
             { (void*)reference_##_name##l },                                   \
             { (void*)reference_##relaxed_##_name }, _ulp, _ulp, _half_ulp,     \
-            _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp, _rmode,        \
-            RELAXED_ON, _type                                                  \
+            _half_embedded_ulp, _embedded_ulp, _relaxed_ulp,                   \
+            _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type                   \
     }
 #define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
     {                                                                          \
         "half_" STRINGIFY(_name), "half_" STRINGIFY(_name),                    \
             { (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp,      \
-            _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF,      \
-            _type                                                              \
+            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
+            RELAXED_OFF, _type                                                 \
     }
 #define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp,       \
-                       _rmode, _type)                                          \
+                       _half_embedded_ulp, _rmode, _type)                      \
     {                                                                          \
         STRINGIFY(_name), _operator, { (void*)reference_##_name },             \
             { (void*)reference_##_name##l }, { NULL }, _ulp, _ulp, _half_ulp,  \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
+            _half_embedded_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,     \
+            RELAXED_OFF, _type                                                 \
     }
 
 static constexpr vtbl _unary = {
@@ -248,19 +255,19 @@ static constexpr vtbl _mad_tbl = {
 
 // clang-format off
 const Func functionList[] = {
-    ENTRY_EXT(acos, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(acosh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(acospi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(asin, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(asinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(asinpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(atan, 5.0f, 5.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(atanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(atanpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(atan2, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
-    ENTRY(atan2pi, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
-    ENTRY(cbrt, 2.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
-    ENTRY(ceil, 0.0f, 0.0f, 0.f, FTZ_OFF, unaryF),
+    ENTRY_EXT(acos, 4.0f, 4.0f, 2.0f, 3.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(acosh, 4.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(acospi, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(asin, 4.0f, 4.0f, 2.0f, 3.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(asinh, 4.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(asinpi, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(atan, 5.0f, 5.0f, 2.0f, 3.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(atanh, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(atanpi, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(atan2, 6.0f, 6.0f, 2.0f, 3.0f, FTZ_OFF, binaryF),
+    ENTRY(atan2pi, 6.0f, 6.0f, 2.0f, 3.0f, FTZ_OFF, binaryF),
+    ENTRY(cbrt, 2.0f, 4.0f, 2.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(ceil, 0.0f, 0.0f, 0.f, 0.f, FTZ_OFF, unaryF),
     { "copysign",
       "copysign",
       { (void*)reference_copysignf },
@@ -270,97 +277,97 @@ const Func functionList[] = {
       0.0f,
       0.0f,
       0.0f,
+      0.0f,
       INFINITY,
       INFINITY,
       FTZ_OFF,
       RELAXED_OFF,
       binaryF },
-    ENTRY_EXT(cos, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY_EXT(cos, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(cosh, 4.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
-    ENTRY_EXT(cospi, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(cosh, 4.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(cospi, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(erfc, 16.0f, 16.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(erf,  16.0f, 16.0f, 4.0f, FTZ_OFF, unaryF),
+    ENTRY(erfc, 16.0f, 16.0f, 4.0f, 4.0f, FTZ_OFF, unaryF),
+    ENTRY(erf,  16.0f, 16.0f, 4.0f, 4.0f, FTZ_OFF, unaryF),
 
     // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF, 4.0f),
+    ENTRY_EXT(exp, 3.0f, 4.0f, 2.0f, 3.0f, 3.0f, FTZ_OFF, unaryF, 4.0f),
 
     // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp2, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF, 4.0f),
+    ENTRY_EXT(exp2, 3.0f, 4.0f, 2.0f, 3.0f, 3.0f, FTZ_OFF, unaryF, 4.0f),
 
     // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x)) in derived mode;
     // in non-derived mode it uses the ulp error for half_exp10.
-    ENTRY_EXT(exp10, 3.0f, 4.0f, 2.f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
+    ENTRY_EXT(exp10, 3.0f, 4.0f, 2.0f, 3.0f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
 
-    ENTRY(expm1, 3.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
-    ENTRY(fabs, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fdim, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(floor, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fma, 0.0f, 0.0f, 0.0f, FTZ_OFF, ternaryF),
-    ENTRY(fmax, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmin, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmod, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fract, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(frexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
-    ENTRY(hypot, 4.0f, 4.0f, 2.0f, FTZ_OFF, binaryF),
-    ENTRY(ilogb, 0.0f, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
-    ENTRY(isequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isfinite, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isgreaterequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isinf, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isless, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isnan, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnormal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnotequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isunordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(ldexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
-    ENTRY(lgamma, INFINITY, INFINITY, INFINITY, FTZ_OFF, unaryF),
-    ENTRY(lgamma_r, INFINITY, INFINITY, INFINITY, FTZ_OFF,
+    ENTRY(expm1, 3.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(fabs, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(fdim, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(floor, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(fma, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, ternaryF),
+    ENTRY(fmax, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fmin, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fmod, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fract, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
+    ENTRY(frexp, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
+    ENTRY(hypot, 4.0f, 4.0f,  2.0f, 3.0f, FTZ_OFF, binaryF),
+    ENTRY(ilogb, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
+    ENTRY(isequal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isfinite, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isgreater, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isgreaterequal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isinf, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isless, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(islessequal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(islessgreater, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isnan, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isnormal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isnotequal, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isordered, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isunordered, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(ldexp, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
+    ENTRY(lgamma, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF, unaryF),
+    ENTRY(lgamma_r, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF,
           unaryF_two_results_i),
-    ENTRY_EXT(log, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(log, 3.0f, 4.0f, 2.0f, 3.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log2, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(log2, 3.0f, 4.0f, 2.0f, 3.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log10, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(log10, 3.0f, 4.0f, 2.0f, 3.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY(log1p, 2.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(logb, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(log1p, 2.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(logb, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
 
     // In fast-relaxed-math mode it has to be either exactly rounded fma or exactly rounded a*b+c
-    ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function, INFINITY),
+    ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function, INFINITY),
 
-    ENTRY(maxmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(minmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(modf, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(nan, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
-    ENTRY(nextafter, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
+    ENTRY(maxmag, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(minmag, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(modf, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
+    ENTRY(nan, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
+    ENTRY(nextafter, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
 
     // In derived mode the ulp error is calculated as exp2(y*log2(x)).
     // In non-derived it is the same as half_pow.
-    ENTRY_EXT(pow, 16.0f, 16.0f, 4.0f, 8192.0f, FTZ_OFF, binaryF, 8192.0f),
+    ENTRY_EXT(pow, 16.0f, 16.0f, 4.0f, 5.0f, 8192.0f, FTZ_OFF, binaryF, 8192.0f),
 
-    ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
-    ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
-    //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
-    ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
-    ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rootn, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
-    ENTRY(round, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rsqrt, 2.0f, 4.0f, 1.0f, FTZ_OFF, unaryF),
-    ENTRY(signbit, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY_EXT(sin, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(pown, 16.0f, 16.0f, 4.0f, 5.0f, FTZ_OFF, binaryF_i),
+    ENTRY(powr, 16.0f, 16.0f, 4.0f, 5.0f, FTZ_OFF, binaryF),
+    ENTRY(remainder, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(remquo, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
+    ENTRY(rint, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(rootn, 16.0f, 16.0f, 4.0f, 5.0f, FTZ_OFF, binaryF_i),
+    ENTRY(round, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(rsqrt, 2.0f, 4.0f, 1.0f, 1.0f, FTZ_OFF, unaryF),
+    ENTRY(signbit, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY_EXT(sin, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY_EXT(sincos, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF,
+    ENTRY_EXT(sincos, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF,
               unaryF_two_results,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(sinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(sinpi, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(sinh, 4.0f, 4.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(sinpi, 4.0f, 4.0f, 2.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
     { "sqrt",
       "sqrt",
@@ -370,6 +377,7 @@ const Func functionList[] = {
       3.0f,
       0.0f,
       0.0f,
+      1.0f,
       4.0f,
       INFINITY,
       INFINITY,
@@ -387,18 +395,19 @@ const Func functionList[] = {
       INFINITY,
       INFINITY,
       INFINITY,
+      INFINITY,
       FTZ_OFF,
       RELAXED_OFF,
       unaryOF /* only for single precision */ },
 
     // In derived mode it the ulp error is calculated as sin/cos.
     // In non-derived mode it is the same as half_tan.
-    ENTRY_EXT(tan, 5.0f, 5.0f, 2.0f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
+    ENTRY_EXT(tan, 5.0f, 5.0f, 2.0f, 3.0f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
 
-    ENTRY(tanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
-    ENTRY(tanpi, 6.0f, 6.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(tanh, 5.0f, 5.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
+    ENTRY(tanpi, 6.0f, 6.0f, 2.0f, 3.0f, FTZ_OFF, unaryF),
     //ENTRY(tgamma, 16.0f, 16.0f, FTZ_OFF, unaryF), Commented this out until we can be sure this requirement is realistic
-    ENTRY(trunc, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(trunc, 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
 
     HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
     HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryOF),
@@ -416,8 +425,24 @@ const Func functionList[] = {
     HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
 
     // basic operations
-    OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
+    { "reciprocal",
+      "/",
+      { (void*)reference_reciprocal },
+      { (void*)reference_reciprocall },
+      { (void*)reference_relaxed_reciprocal },
+      2.5f,
+      0.0f,
+      0.0f,
+      1.0f,
+      3.0f,
+      2.5f,
+      INFINITY,
+      FTZ_OFF,
+      RELAXED_ON,
+      binaryOperatorF },
     { "divide",
       "/",
       { (void*)reference_divide },
@@ -426,6 +451,7 @@ const Func functionList[] = {
       2.5f,
       0.0f,
       1.0f,
+      1.0f,
       3.0f,
       2.5f,
       INFINITY,
@@ -443,13 +469,14 @@ const Func functionList[] = {
       INFINITY,
       INFINITY,
       INFINITY,
+      INFINITY,
       FTZ_OFF,
       RELAXED_OFF,
       binaryOperatorOF /* only for single precision */ },
-    OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, 0.0f, FTZ_OFF,
+    OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF,
                    unaryF), // A simple copy operation
-    OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
 };
 // clang-format on
 
diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
index 56190e33..bdcc016d 100644
--- a/test_conformance/math_brute_force/function_list.h
+++ b/test_conformance/math_brute_force/function_list.h
@@ -87,6 +87,7 @@ struct Func
     float float_ulps;
     float double_ulps;
     float half_ulps;
+    float half_embedded_ulps;
     float float_embedded_ulps;
     float relaxed_error;
     float relaxed_embedded_error;
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index c2c7963f..4db27a62 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -122,18 +122,12 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index eed76a6e..1f84b901 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -121,18 +121,12 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/i_unary_half.cpp b/test_conformance/math_brute_force/i_unary_half.cpp
index 97955f93..0aad984b 100644
--- a/test_conformance/math_brute_force/i_unary_half.cpp
+++ b/test_conformance/math_brute_force/i_unary_half.cpp
@@ -118,18 +118,12 @@ int TestFunc_Int_Half(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_int);
             size_t localCount = (bufferSizeOut + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 51d5b64b..ab969ad6 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -327,26 +327,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index b00a29ff..c49346cc 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -320,26 +320,15 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_binary_half.cpp b/test_conformance/math_brute_force/macro_binary_half.cpp
index a8f459a7..ec10c65e 100644
--- a/test_conformance/math_brute_force/macro_binary_half.cpp
+++ b/test_conformance/math_brute_force/macro_binary_half.cpp
@@ -210,26 +210,15 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
-                                    &tinfo->inBuf2)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
+        error =
+            clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2), &tinfo->inBuf2);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index b747b980..c6d81ab7 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -168,20 +168,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 34f49a5a..85be620f 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -169,20 +169,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/macro_unary_half.cpp b/test_conformance/math_brute_force/macro_unary_half.cpp
index a1e92118..d80dd153 100644
--- a/test_conformance/math_brute_force/macro_unary_half.cpp
+++ b/test_conformance/math_brute_force/macro_unary_half.cpp
@@ -151,20 +151,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index b6a6742f..c0442e68 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -131,30 +131,18 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 3f237ed9..2e3a294b 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -132,30 +132,18 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/mad_half.cpp b/test_conformance/math_brute_force/mad_half.cpp
index 33235534..0a59f3f1 100644
--- a/test_conformance/math_brute_force/mad_half.cpp
+++ b/test_conformance/math_brute_force/mad_half.cpp
@@ -126,30 +126,18 @@ int TestFunc_mad_Half(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_half) * sizeValues[j];
             size_t localCount = (bufferSize + vectorSize - 1)
                 / vectorSize; // bufferSize / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 38954f3f..c6a4b5d6 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -154,7 +154,7 @@ static int doTest(const char *name)
         exit(EXIT_FAILURE);
     }
 
-    if (func_data->func.p == NULL)
+    if (func_data->func.p == NULL && func_data->rfunc.p == NULL)
     {
         vlog("'%s' is missing implementation, skipping function.\n",
              func_data->name);
@@ -308,9 +308,10 @@ static test_definition test_list[] = {
     ADD_TEST(half_log),      ADD_TEST(half_log2),  ADD_TEST(half_log10),
     ADD_TEST(half_powr),     ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
     ADD_TEST(half_sin),      ADD_TEST(half_sqrt),  ADD_TEST(half_tan),
-    ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(divide),
-    ADD_TEST(divide_cr),     ADD_TEST(multiply),   ADD_TEST(assignment),
-    ADD_TEST(not ),          ADD_TEST(erf),        ADD_TEST(erfc),
+    ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(reciprocal),
+    ADD_TEST(divide),        ADD_TEST(divide_cr),  ADD_TEST(multiply),
+    ADD_TEST(assignment),    ADD_TEST(not ),       ADD_TEST(erf),
+    ADD_TEST(erfc),
 };
 
 #undef ADD_TEST
@@ -980,19 +981,6 @@ static void ReleaseCL(void)
     }
 }
 
-void _LogBuildError(cl_program p, int line, const char *file)
-{
-    char the_log[2048] = "";
-
-    vlog_error("%s:%d: Build Log:\n", file, line);
-    if (0
-        == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
-                                 sizeof(the_log), the_log, NULL))
-        vlog_error("%s", the_log);
-    else
-        vlog_error("*** Error getting build log for program %p\n", p);
-}
-
 int InitILogbConstants(void)
 {
     int error;
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 049f2013..acde1136 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -1856,6 +1856,13 @@ double reference_logb(double x)
 
 double reference_relaxed_reciprocal(double x) { return 1.0f / ((float)x); }
 
+long double reference_reciprocall(long double y)
+{
+    double dx = 1.0;
+    double dy = y;
+    return dx / dy;
+}
+
 double reference_reciprocal(double x) { return 1.0 / x; }
 
 double reference_remainder(double x, double y)
@@ -3740,9 +3747,6 @@ long double reference_nanl(cl_ulong x)
     return (long double)u.f;
 }
 
-
-long double reference_reciprocall(long double x) { return 1.0L / x; }
-
 long double reference_remainderl(long double x, long double y)
 {
     int i;
@@ -5771,4 +5775,4 @@ long double reference_erfcl(long double x) { return erfc(x); }
 long double reference_erfl(long double x) { return erf(x); }
 
 double reference_erfc(double x) { return erfc(x); }
-double reference_erf(double x) { return erf(x); }
\ No newline at end of file
+double reference_erf(double x) { return erf(x); }
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 62adbf29..aadebc25 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -236,30 +236,18 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index efde6063..478090d4 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -258,30 +258,18 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/ternary_half.cpp b/test_conformance/math_brute_force/ternary_half.cpp
index 856fa67d..843ceaa0 100644
--- a/test_conformance/math_brute_force/ternary_half.cpp
+++ b/test_conformance/math_brute_force/ternary_half.cpp
@@ -191,30 +191,18 @@ int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_half) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer2), &gInBuffer2)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
-                                        sizeof(gInBuffer3), &gInBuffer3)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2),
+                                   &gInBuffer2);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3),
+                                   &gInBuffer3);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index f3157fdf..4762a81d 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -176,20 +176,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 7d1f6cda..0a2af3be 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -205,20 +205,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument 0");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument 1");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/unary_half.cpp b/test_conformance/math_brute_force/unary_half.cpp
index 83cdd01f..877e1fad 100644
--- a/test_conformance/math_brute_force/unary_half.cpp
+++ b/test_conformance/math_brute_force/unary_half.cpp
@@ -154,20 +154,12 @@ cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
                                                  // own copy of the cl_kernel
-        cl_program program = job->programs[j];
 
-        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
-                                    &tinfo->outBuf[j])))
-        {
-            LogBuildError(program);
-            return error;
-        }
-        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
-                                    &tinfo->inBuf)))
-        {
-            LogBuildError(program);
-            return error;
-        }
+        error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                               &tinfo->outBuf[j]);
+        test_error(error, "Failed to set kernel argument");
+        error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf), &tinfo->inBuf);
+        test_error(error, "Failed to set kernel argument");
 
         if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
                                             &vectorCount, NULL, 0, NULL, NULL)))
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 4d660e0a..a109cd6e 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -143,25 +143,15 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 3fd16cd3..a3b52c62 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -159,25 +159,15 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_half.cpp b/test_conformance/math_brute_force/unary_two_results_half.cpp
index 2bf35142..683e1492 100644
--- a/test_conformance/math_brute_force/unary_two_results_half.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_half.cpp
@@ -132,25 +132,15 @@ int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_half);
             size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 916f4a0c..dd60f43e 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -151,25 +151,15 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index 0dbe3f77..6e01a794 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -156,25 +156,15 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_two_results_i_half.cpp b/test_conformance/math_brute_force/unary_two_results_i_half.cpp
index 7b3431ab..685611ea 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_half.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_half.cpp
@@ -145,25 +145,15 @@ int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
             // align working group size with the bigger output type
             size_t vectorSize = sizeValues[j] * sizeof(cl_int);
             size_t localCount = (bufferSizeHi + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error =
-                     clSetKernelArg(kernels[j][thread_id], 1,
-                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1,
+                                   sizeof(gOutBuffer2[j]), &gOutBuffer2[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 226ef068..514b7c60 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -116,18 +116,12 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index 3eb76794..d5d30495 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -123,18 +123,12 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/unary_u_half.cpp b/test_conformance/math_brute_force/unary_u_half.cpp
index 2970403f..cc3fe728 100644
--- a/test_conformance/math_brute_force/unary_u_half.cpp
+++ b/test_conformance/math_brute_force/unary_u_half.cpp
@@ -111,18 +111,12 @@ int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_half);
             size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
-                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
-            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
-                                        sizeof(gInBuffer), &gInBuffer)))
-            {
-                LogBuildError(programs[j]);
-                return error;
-            }
+            error = clSetKernelArg(kernels[j][thread_id], 0,
+                                   sizeof(gOutBuffer[j]), &gOutBuffer[j]);
+            test_error(error, "Failed to set kernel argument");
+            error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer),
+                                   &gInBuffer);
+            test_error(error, "Failed to set kernel argument");
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
                                                 1, NULL, &localCount, NULL, 0,
diff --git a/test_conformance/math_brute_force/utility.cpp b/test_conformance/math_brute_force/utility.cpp
index 53dd928c..741b3e41 100644
--- a/test_conformance/math_brute_force/utility.cpp
+++ b/test_conformance/math_brute_force/utility.cpp
@@ -195,8 +195,14 @@ float getAllowedUlpError(const Func *f, Type t, const bool relaxed)
             // TODO: distinguish between embedded and full profile.
             return f->double_ulps;
         case khalf:
-            // TODO: distinguish between embedded and full profile.
-            return f->half_ulps;
+            if (gIsEmbedded)
+            {
+                return f->half_embedded_ulps;
+            }
+            else
+            {
+                return f->half_ulps;
+            }
         default:
             assert(false && "unsupported type in getAllowedUlpError");
             // Return a negative value which will make any test fail.
diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h
index 74cacc46..a43f3a64 100644
--- a/test_conformance/math_brute_force/utility.h
+++ b/test_conformance/math_brute_force/utility.h
@@ -114,9 +114,6 @@ inline double DoubleFromUInt32(uint32_t bits)
     return u.d;
 }
 
-void _LogBuildError(cl_program p, int line, const char *file);
-#define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__)
-
 // The spec is fairly clear that we may enforce a hard cutoff to prevent
 // premature flushing to zero.
 // However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index 0d5dfa7b..380878cb 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -317,8 +317,10 @@ cl_program makeMixedFormatPrintfProgram(cl_kernel* kernel_ptr,
         {
             const float max_range = 100000.f;
             float arg = get_random_float(-max_range, max_range, gMTdata);
-            args_str << str_sprintf("%f", arg) << "f, ";
-            ref_str << str_sprintf(format, arg) << ", ";
+            std::string arg_str = str_sprintf("%f", arg);
+            args_str << arg_str << "f, ";
+            float arg_deviceRound = std::stof(arg_str);
+            ref_str << str_sprintf(format, arg_deviceRound) << ", ";
         }
     }
     // Restore the original CPU rounding mode
@@ -1151,7 +1153,8 @@ int main(int argc, const char* argv[])
     char* pcTempFname = get_temp_filename();
     if (pcTempFname != nullptr)
     {
-        strncpy(gFileName, pcTempFname, sizeof(gFileName));
+        strncpy(gFileName, pcTempFname, sizeof(gFileName) - 1);
+        gFileName[sizeof(gFileName) - 1] = '\0';
     }
 
     free(pcTempFname);
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index cd84c01a..803f13ab 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -1336,6 +1336,33 @@ std::vector<std::string> correctBufferVector = {
     "00512,01024,262144,1048576"
 };
 
+std::vector<std::string> correctBufferVectorRTZ = {
+
+    "1.00,2.00,3.00,4.00",
+
+    "0xfa,0xfb",
+
+    "0x1234,0x8765",
+
+    "0x12345678,0x87654321",
+
+    "12345678,98765432",
+
+    "1.00,2.00,3.00,4.00",
+
+    "1.23e+03,9.87e+05,4.99e-04",
+
+    "0x1p-2,0x1p-1,0x1p+0,0x1.8p+0",
+
+    "1,2,3,4,1.5,3.13999,2.5,3.5",
+
+    "1,2,3,4,5,6,7,10,11,0,40,100,200,400,1000,2000",
+
+    "+1,-2,+3,-4,+5,-6,+7,-8",
+
+    "00512,01024,262144,1048576"
+};
+
 //-----------------------------------------------------------
 
 //Test case for vector                                      |
@@ -1822,7 +1849,14 @@ void generateRef(const cl_device_id device)
             as they're constant and hard-coded
         */
         if (caseToTest->printFN == NULL)
+        {
+            if (caseToTest->_type == TYPE_VECTOR
+                && fpConfigSingle == CL_FP_ROUND_TO_ZERO)
+            {
+                caseToTest->_correctBuffer = correctBufferVectorRTZ;
+            }
             continue;
+        }
 
         // Make sure the reference result is empty
         assert(caseToTest->_correctBuffer.size() == 0);
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 9cf4727a..20f5bd5e 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -261,14 +261,18 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr,
     switch( vec_len )
     {
         case 1:
-            strncpy(stypename, type_name[srctype], sizeof(stypename));
-            strncpy(ctypename, type_name[cmptype], sizeof(ctypename));
+            strncpy(stypename, type_name[srctype], sizeof(stypename) - 1);
+            stypename[sizeof(stypename) - 1] = '\0';
+            strncpy(ctypename, type_name[cmptype], sizeof(ctypename) - 1);
+            ctypename[sizeof(ctypename) - 1] = '\0';
             snprintf(testname, sizeof(testname), "select_%s_%s", stypename, ctypename );
             log_info("Building %s(%s, %s, %s)\n", testname, stypename, stypename, ctypename);
             break;
         case 3:
-            strncpy(stypename, type_name[srctype], sizeof(stypename));
-            strncpy(ctypename, type_name[cmptype], sizeof(ctypename));
+            strncpy(stypename, type_name[srctype], sizeof(stypename) - 1);
+            stypename[sizeof(stypename) - 1] = '\0';
+            strncpy(ctypename, type_name[cmptype], sizeof(ctypename) - 1);
+            ctypename[sizeof(ctypename) - 1] = '\0';
             snprintf(testname, sizeof(testname), "select_%s3_%s3", stypename, ctypename );
             log_info("Building %s(%s3, %s3, %s3)\n", testname, stypename, stypename, ctypename);
             break;
diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt
index 828d417f..c635e924 100644
--- a/test_conformance/spirv_new/CMakeLists.txt
+++ b/test_conformance/spirv_new/CMakeLists.txt
@@ -5,6 +5,7 @@ set(${MODULE_NAME}_SOURCES
   test_basic_versions.cpp
   test_cl_khr_expect_assume.cpp
   test_decorate.cpp
+  test_extinst_printf.cpp
   test_get_program_il.cpp
   test_linkage.cpp
   test_no_integer_wrap_decoration.cpp
@@ -40,6 +41,9 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
     add_cxx_flag_if_supported(-Wno-narrowing)
 endif()
 
-set(${MODULE_NAME}_SOURCES ${SPIRV_NEW_SOURCES} ${TEST_HARNESS_SOURCES})
+set(${MODULE_NAME}_SOURCES ${${MODULE_NAME}_SOURCES} ${TEST_HARNESS_SOURCES})
 
 include(../CMakeCommon.txt)
+
+add_subdirectory(spirv_asm)
+add_dependencies(${${MODULE_NAME}_OUT} spirv_new_binaries)
diff --git a/test_conformance/spirv_new/spirv_asm/CMakeLists.txt b/test_conformance/spirv_new/spirv_asm/CMakeLists.txt
new file mode 100644
index 00000000..71ae4a9c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/CMakeLists.txt
@@ -0,0 +1,502 @@
+set(SPRIV_TOOLS_DIR ""
+    CACHE
+    PATH
+    "Absolute path to the directory containing the SPIR-V tools to use")
+
+find_package(Python3 COMPONENTS Interpreter QUIET)
+
+find_program(SPIRV_ASSEMBLER
+    NAMES spirv-as
+    HINTS ${SPIRV_TOOLS_DIR})
+
+find_program(SPIRV_VALIDATOR
+    NAMES spirv-val
+    HINTS ${SPIRV_TOOLS_DIR})
+
+if (Python3_FOUND AND
+    NOT ${SPIRV_ASSEMBLER} STREQUAL "SPIRV_ASSEMBLER-NOTFOUND" AND
+    NOT ${SPIRV_VALIDATOR} STREQUAL "SPIRV_VALIDATOR-NOTFOUND")
+    message(STATUS "Using python3: ${Python3_EXECUTABLE}")
+    message(STATUS "Using spirv-as: ${SPIRV_ASSEMBLER}")
+    message(STATUS "Using spirv-val: ${SPIRV_VALIDATOR}")
+else()
+    message(STATUS "Skipping automatic build of SPIR-V files for spirv_new")
+    # Empty custom target
+    add_custom_target(spirv_new_binaries)
+    return()
+endif()
+
+set(spirv_sources
+    assume.spvasm32
+    assume.spvasm64
+    atomic_dec_global.spvasm32
+    atomic_dec_global.spvasm64
+    atomic_inc_global.spvasm32
+    atomic_inc_global.spvasm64
+    basic.spvasm32
+    basic.spvasm64
+    branch_conditional.spvasm32
+    branch_conditional.spvasm64
+    branch_conditional_weighted.spvasm32
+    branch_conditional_weighted.spvasm64
+    branch_simple.spvasm32
+    branch_simple.spvasm64
+    composite_construct_int4.spvasm32
+    composite_construct_int4.spvasm64
+    composite_construct_struct.spvasm32
+    composite_construct_struct.spvasm64
+    constant_char_simple.spvasm32
+    constant_char_simple.spvasm64
+    constant_double_simple.spvasm32
+    constant_double_simple.spvasm64
+    constant_false_simple.spvasm32
+    constant_false_simple.spvasm64
+    constant_float_simple.spvasm32
+    constant_float_simple.spvasm64
+    constant_half_simple.spvasm32
+    constant_half_simple.spvasm64
+    constant_int3_simple.spvasm32
+    constant_int3_simple.spvasm64
+    constant_int4_simple.spvasm32
+    constant_int4_simple.spvasm64
+    constant_int_simple.spvasm32
+    constant_int_simple.spvasm64
+    constant_long_simple.spvasm32
+    constant_long_simple.spvasm64
+    constant_short_simple.spvasm32
+    constant_short_simple.spvasm64
+    constant_struct_int_char_simple.spvasm32
+    constant_struct_int_char_simple.spvasm64
+    constant_struct_int_float_simple.spvasm32
+    constant_struct_int_float_simple.spvasm64
+    constant_struct_struct_simple.spvasm32
+    constant_struct_struct_simple.spvasm64
+    constant_true_simple.spvasm32
+    constant_true_simple.spvasm64
+    constant_uchar_simple.spvasm32
+    constant_uchar_simple.spvasm64
+    constant_uint_simple.spvasm32
+    constant_uint_simple.spvasm64
+    constant_ulong_simple.spvasm32
+    constant_ulong_simple.spvasm64
+    constant_ushort_simple.spvasm32
+    constant_ushort_simple.spvasm64
+    copy_char_simple.spvasm32
+    copy_char_simple.spvasm64
+    copy_double_simple.spvasm32
+    copy_double_simple.spvasm64
+    copy_float_simple.spvasm32
+    copy_float_simple.spvasm64
+    copy_half_simple.spvasm32
+    copy_half_simple.spvasm64
+    copy_int3_simple.spvasm32
+    copy_int3_simple.spvasm64
+    copy_int4_simple.spvasm32
+    copy_int4_simple.spvasm64
+    copy_int_simple.spvasm32
+    copy_int_simple.spvasm64
+    copy_long_simple.spvasm32
+    copy_long_simple.spvasm64
+    copy_short_simple.spvasm32
+    copy_short_simple.spvasm64
+    copy_struct_int_char_simple.spvasm32
+    copy_struct_int_char_simple.spvasm64
+    copy_struct_int_float_simple.spvasm32
+    copy_struct_int_float_simple.spvasm64
+    copy_struct_struct_simple.spvasm32
+    copy_struct_struct_simple.spvasm64
+    copy_uchar_simple.spvasm32
+    copy_uchar_simple.spvasm64
+    copy_uint_simple.spvasm32
+    copy_uint_simple.spvasm64
+    copy_ulong_simple.spvasm32
+    copy_ulong_simple.spvasm64
+    copy_ushort_simple.spvasm32
+    copy_ushort_simple.spvasm64
+    decorate_aliased.spvasm32
+    decorate_aliased.spvasm64
+    decorate_alignment.spvasm32
+    decorate_alignment.spvasm64
+    decorate_constant.spvasm32
+    decorate_constant.spvasm64
+    decorate_constant_fail.spvasm32
+    decorate_constant_fail.spvasm64
+    decorate_cpacked.spvasm32
+    decorate_cpacked.spvasm64
+    decorate_restrict.spvasm32
+    decorate_restrict.spvasm64
+    decorate_rounding_rte_double_long.spvasm32
+    decorate_rounding_rte_double_long.spvasm64
+    decorate_rounding_rte_float_int.spvasm32
+    decorate_rounding_rte_float_int.spvasm64
+    decorate_rounding_rte_half_short.spvasm32
+    decorate_rounding_rte_half_short.spvasm64
+    decorate_rounding_rtn_double_long.spvasm32
+    decorate_rounding_rtn_double_long.spvasm64
+    decorate_rounding_rtn_float_int.spvasm32
+    decorate_rounding_rtn_float_int.spvasm64
+    decorate_rounding_rtn_half_short.spvasm32
+    decorate_rounding_rtn_half_short.spvasm64
+    decorate_rounding_rtp_double_long.spvasm32
+    decorate_rounding_rtp_double_long.spvasm64
+    decorate_rounding_rtp_float_int.spvasm32
+    decorate_rounding_rtp_float_int.spvasm64
+    decorate_rounding_rtp_half_short.spvasm32
+    decorate_rounding_rtp_half_short.spvasm64
+    decorate_rounding_rtz_double_long.spvasm32
+    decorate_rounding_rtz_double_long.spvasm64
+    decorate_rounding_rtz_float_int.spvasm32
+    decorate_rounding_rtz_float_int.spvasm64
+    decorate_rounding_rtz_half_short.spvasm32
+    decorate_rounding_rtz_half_short.spvasm64
+    decorate_saturated_conversion_double_to_int.spvasm32
+    decorate_saturated_conversion_double_to_int.spvasm64
+    decorate_saturated_conversion_double_to_uint.spvasm32
+    decorate_saturated_conversion_double_to_uint.spvasm64
+    decorate_saturated_conversion_float_to_char.spvasm32
+    decorate_saturated_conversion_float_to_char.spvasm64
+    decorate_saturated_conversion_float_to_short.spvasm32
+    decorate_saturated_conversion_float_to_short.spvasm64
+    decorate_saturated_conversion_float_to_uchar.spvasm32
+    decorate_saturated_conversion_float_to_uchar.spvasm64
+    decorate_saturated_conversion_float_to_ushort.spvasm32
+    decorate_saturated_conversion_float_to_ushort.spvasm64
+    decorate_saturated_conversion_half_to_char.spvasm32
+    decorate_saturated_conversion_half_to_char.spvasm64
+    decorate_saturated_conversion_half_to_uchar.spvasm32
+    decorate_saturated_conversion_half_to_uchar.spvasm64
+    expect_bool.spvasm32
+    expect_bool.spvasm64
+    expect_char.spvasm32
+    expect_char.spvasm64
+    expect_int.spvasm32
+    expect_int.spvasm64
+    expect_long.spvasm32
+    expect_long.spvasm64
+    expect_short.spvasm32
+    expect_short.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fadd_uint.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fmul_uint.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fnegate_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fshiftleft_uint.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_int.spvasm64
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm32
+    ext_cl_khr_spirv_no_integer_wrap_decoration_fsub_uint.spvasm64
+    fadd_double.spvasm32
+    fadd_double.spvasm64
+    fadd_double2.spvasm32
+    fadd_double2.spvasm64
+    fadd_float.spvasm32
+    fadd_float.spvasm64
+    fadd_float4.spvasm32
+    fadd_float4.spvasm64
+    fadd_half.spvasm32
+    fadd_half.spvasm64
+    fdiv_double.spvasm32
+    fdiv_double.spvasm64
+    fdiv_double2.spvasm32
+    fdiv_double2.spvasm64
+    fdiv_float.spvasm32
+    fdiv_float.spvasm64
+    fdiv_float4.spvasm32
+    fdiv_float4.spvasm64
+    fdiv_half.spvasm32
+    fdiv_half.spvasm64
+    fmod_double.spvasm32
+    fmod_double.spvasm64
+    fmod_double2.spvasm32
+    fmod_double2.spvasm64
+    fmod_float.spvasm32
+    fmod_float.spvasm64
+    fmod_float4.spvasm32
+    fmod_float4.spvasm64
+    fmod_half.spvasm32
+    fmod_half.spvasm64
+    fmul_double.spvasm32
+    fmul_double.spvasm64
+    fmul_double2.spvasm32
+    fmul_double2.spvasm64
+    fmul_float.spvasm32
+    fmul_float.spvasm64
+    fmul_float4.spvasm32
+    fmul_float4.spvasm64
+    fmul_half.spvasm32
+    fmul_half.spvasm64
+    frem_double.spvasm32
+    frem_double.spvasm64
+    frem_double2.spvasm32
+    frem_double2.spvasm64
+    frem_float.spvasm32
+    frem_float.spvasm64
+    frem_float4.spvasm32
+    frem_float4.spvasm64
+    frem_half.spvasm32
+    frem_half.spvasm64
+    fsub_double.spvasm32
+    fsub_double.spvasm64
+    fsub_double2.spvasm32
+    fsub_double2.spvasm64
+    fsub_float.spvasm32
+    fsub_float.spvasm64
+    fsub_float4.spvasm32
+    fsub_float4.spvasm64
+    fsub_half.spvasm32
+    fsub_half.spvasm64
+    label_simple.spvasm32
+    label_simple.spvasm64
+    lifetime_simple.spvasm32
+    lifetime_simple.spvasm64
+    linkage_export.spvasm32
+    linkage_export.spvasm64
+    linkage_import.spvasm32
+    linkage_import.spvasm64
+    linkage_linkonce_odr_main.spvasm32
+    linkage_linkonce_odr_main.spvasm64
+    linkage_linkonce_odr_noa_main.spvasm32
+    linkage_linkonce_odr_noa_main.spvasm64
+    linkage_linkonce_odr_obj.spvasm32
+    linkage_linkonce_odr_obj.spvasm64
+    loop_merge_branch_conditional_dont_unroll.spvasm32
+    loop_merge_branch_conditional_dont_unroll.spvasm64
+    loop_merge_branch_conditional_none.spvasm32
+    loop_merge_branch_conditional_none.spvasm64
+    loop_merge_branch_conditional_unroll.spvasm32
+    loop_merge_branch_conditional_unroll.spvasm64
+    loop_merge_branch_dont_unroll.spvasm32
+    loop_merge_branch_dont_unroll.spvasm64
+    loop_merge_branch_none.spvasm32
+    loop_merge_branch_none.spvasm64
+    loop_merge_branch_unroll.spvasm32
+    loop_merge_branch_unroll.spvasm64
+    op_function_const.spvasm32
+    op_function_const.spvasm64
+    op_function_inline.spvasm32
+    op_function_inline.spvasm64
+    op_function_noinline.spvasm32
+    op_function_noinline.spvasm64
+    op_function_none.spvasm32
+    op_function_none.spvasm64
+    op_function_pure.spvasm32
+    op_function_pure.spvasm64
+    op_function_pure_ptr.spvasm32
+    op_function_pure_ptr.spvasm64
+    op_neg_double.spvasm32
+    op_neg_double.spvasm64
+    op_neg_float.spvasm32
+    op_neg_float.spvasm64
+    op_neg_float4.spvasm32
+    op_neg_float4.spvasm64
+    op_neg_half.spvasm32
+    op_neg_half.spvasm64
+    op_neg_int.spvasm32
+    op_neg_int.spvasm64
+    op_neg_int4.spvasm32
+    op_neg_int4.spvasm64
+    op_neg_long.spvasm32
+    op_neg_long.spvasm64
+    op_neg_short.spvasm32
+    op_neg_short.spvasm64
+    op_not_int.spvasm32
+    op_not_int.spvasm64
+    op_not_int4.spvasm32
+    op_not_int4.spvasm64
+    op_not_long.spvasm32
+    op_not_long.spvasm64
+    op_not_short.spvasm32
+    op_not_short.spvasm64
+    op_spec_constant_double_simple.spvasm32
+    op_spec_constant_double_simple.spvasm64
+    op_spec_constant_false_simple.spvasm32
+    op_spec_constant_false_simple.spvasm64
+    op_spec_constant_float_simple.spvasm32
+    op_spec_constant_float_simple.spvasm64
+    op_spec_constant_half_simple.spvasm32
+    op_spec_constant_half_simple.spvasm64
+    op_spec_constant_true_simple.spvasm32
+    op_spec_constant_true_simple.spvasm64
+    op_spec_constant_uchar_simple.spvasm32
+    op_spec_constant_uchar_simple.spvasm64
+    op_spec_constant_uint_simple.spvasm32
+    op_spec_constant_uint_simple.spvasm64
+    op_spec_constant_ulong_simple.spvasm32
+    op_spec_constant_ulong_simple.spvasm64
+    op_spec_constant_ushort_simple.spvasm32
+    op_spec_constant_ushort_simple.spvasm64
+    opaque.spvasm32
+    opaque.spvasm64
+    phi_2.spvasm32
+    phi_2.spvasm64
+    phi_3.spvasm32
+    phi_3.spvasm64
+    phi_4.spvasm32
+    phi_4.spvasm64
+    select_if_dont_flatten.spvasm32
+    select_if_dont_flatten.spvasm64
+    select_if_flatten.spvasm32
+    select_if_flatten.spvasm64
+    select_if_none.spvasm32
+    select_if_none.spvasm64
+    select_switch_dont_flatten.spvasm32
+    select_switch_dont_flatten.spvasm64
+    select_switch_flatten.spvasm32
+    select_switch_flatten.spvasm64
+    select_switch_none.spvasm32
+    select_switch_none.spvasm64
+    spv1.1/basic.spvasm32
+    spv1.1/basic.spvasm64
+    spv1.2/basic.spvasm32
+    spv1.2/basic.spvasm64
+    spv1.3/basic.spvasm32
+    spv1.3/basic.spvasm64
+    spv1.4/basic.spvasm32
+    spv1.4/basic.spvasm64
+    spv1.4/copylogical_struct.spvasm32
+    spv1.4/copylogical_struct.spvasm64
+    spv1.4/copymemory_memory_operands.spvasm32
+    spv1.4/copymemory_memory_operands.spvasm64
+    spv1.4/image_operand_signextend.spvasm32
+    spv1.4/image_operand_signextend.spvasm64
+    spv1.4/image_operand_zeroextend.spvasm32
+    spv1.4/image_operand_zeroextend.spvasm64
+    spv1.4/loop_control_iterationmultiple.spvasm32
+    spv1.4/loop_control_iterationmultiple.spvasm64
+    spv1.4/loop_control_maxiterations.spvasm32
+    spv1.4/loop_control_maxiterations.spvasm64
+    spv1.4/loop_control_miniterations.spvasm32
+    spv1.4/loop_control_miniterations.spvasm64
+    spv1.4/loop_control_partialcount.spvasm32
+    spv1.4/loop_control_partialcount.spvasm64
+    spv1.4/loop_control_peelcount.spvasm32
+    spv1.4/loop_control_peelcount.spvasm64
+    spv1.4/no_integer_wrap_decoration_fadd_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fadd_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fadd_uint.spvasm32
+    spv1.4/no_integer_wrap_decoration_fadd_uint.spvasm64
+    spv1.4/no_integer_wrap_decoration_fmul_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fmul_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fmul_uint.spvasm32
+    spv1.4/no_integer_wrap_decoration_fmul_uint.spvasm64
+    spv1.4/no_integer_wrap_decoration_fnegate_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fnegate_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fshiftleft_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fshiftleft_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fshiftleft_uint.spvasm32
+    spv1.4/no_integer_wrap_decoration_fshiftleft_uint.spvasm64
+    spv1.4/no_integer_wrap_decoration_fsub_int.spvasm32
+    spv1.4/no_integer_wrap_decoration_fsub_int.spvasm64
+    spv1.4/no_integer_wrap_decoration_fsub_uint.spvasm32
+    spv1.4/no_integer_wrap_decoration_fsub_uint.spvasm64
+    spv1.4/nonwriteable_decoration_function_storage_class.spvasm32
+    spv1.4/nonwriteable_decoration_function_storage_class.spvasm64
+    spv1.4/ptrops.spvasm32
+    spv1.4/ptrops.spvasm64
+    spv1.4/select_struct.spvasm32
+    spv1.4/select_struct.spvasm64
+    spv1.4/usersemantic_decoratestring.spvasm32
+    spv1.4/usersemantic_decoratestring.spvasm64
+    spv1.4/usersemantic_memberdecoratestring.spvasm32
+    spv1.4/usersemantic_memberdecoratestring.spvasm64
+    spv1.5/basic.spvasm32
+    spv1.5/basic.spvasm64
+    spv1.6/basic.spvasm32
+    spv1.6/basic.spvasm64
+    undef_char_simple.spvasm32
+    undef_char_simple.spvasm64
+    undef_double_simple.spvasm32
+    undef_double_simple.spvasm64
+    undef_false_simple.spvasm32
+    undef_false_simple.spvasm64
+    undef_float_simple.spvasm32
+    undef_float_simple.spvasm64
+    undef_half_simple.spvasm32
+    undef_half_simple.spvasm64
+    undef_int3_simple.spvasm32
+    undef_int3_simple.spvasm64
+    undef_int4_simple.spvasm32
+    undef_int4_simple.spvasm64
+    undef_int_simple.spvasm32
+    undef_int_simple.spvasm64
+    undef_long_simple.spvasm32
+    undef_long_simple.spvasm64
+    undef_short_simple.spvasm32
+    undef_short_simple.spvasm64
+    undef_struct_int_char_simple.spvasm32
+    undef_struct_int_char_simple.spvasm64
+    undef_struct_int_float_simple.spvasm32
+    undef_struct_int_float_simple.spvasm64
+    undef_struct_struct_simple.spvasm32
+    undef_struct_struct_simple.spvasm64
+    undef_true_simple.spvasm32
+    undef_true_simple.spvasm64
+    undef_uchar_simple.spvasm32
+    undef_uchar_simple.spvasm64
+    undef_uint_simple.spvasm32
+    undef_uint_simple.spvasm64
+    undef_ulong_simple.spvasm32
+    undef_ulong_simple.spvasm64
+    undef_ushort_simple.spvasm32
+    undef_ushort_simple.spvasm64
+    unreachable_simple.spvasm32
+    unreachable_simple.spvasm64
+    vector_char16_extract.spvasm32
+    vector_char16_extract.spvasm64
+    vector_char16_insert.spvasm32
+    vector_char16_insert.spvasm64
+    vector_double2_extract.spvasm32
+    vector_double2_extract.spvasm64
+    vector_double2_insert.spvasm32
+    vector_double2_insert.spvasm64
+    vector_float4_extract.spvasm32
+    vector_float4_extract.spvasm64
+    vector_float4_insert.spvasm32
+    vector_float4_insert.spvasm64
+    vector_half8_extract.spvasm32
+    vector_half8_extract.spvasm64
+    vector_half8_insert.spvasm32
+    vector_half8_insert.spvasm64
+    vector_int4_extract.spvasm32
+    vector_int4_extract.spvasm64
+    vector_int4_insert.spvasm32
+    vector_int4_insert.spvasm64
+    vector_long2_extract.spvasm32
+    vector_long2_extract.spvasm64
+    vector_long2_insert.spvasm32
+    vector_long2_insert.spvasm64
+    vector_times_scalar_double.spvasm32
+    vector_times_scalar_double.spvasm64
+    vector_times_scalar_float.spvasm32
+    vector_times_scalar_float.spvasm64
+    vector_times_scalar_half.spvasm32
+    vector_times_scalar_half.spvasm64)
+
+set(assembled_spirv_binaries "")
+foreach(spirv_source ${spirv_sources})
+    string(REPLACE ".spvasm" ".spv" assembled_spirv_binary ${spirv_source})
+    set(assembled_spirv_binary "${CMAKE_CURRENT_BINARY_DIR}/../spirv_bin/${assembled_spirv_binary}")
+    list(APPEND assembled_spirv_binaries ${assembled_spirv_binary})
+endforeach()
+
+add_custom_command(
+    OUTPUT ${assembled_spirv_binaries}
+    COMMENT "Generating SPIR-V binaries..."
+    COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/assemble_spirv.py
+            --source-dir "${CMAKE_CURRENT_SOURCE_DIR}"
+            --output-dir "${CMAKE_CURRENT_BINARY_DIR}/../spirv_bin"
+            --assembler "${SPIRV_ASSEMBLER}"
+            --validator "${SPIRV_VALIDATOR}"
+    DEPENDS assemble_spirv.py ${spirv_sources}
+    USES_TERMINAL
+    VERBATIM)
+
+add_custom_target(spirv_new_binaries DEPENDS ${assembled_spirv_binaries})
diff --git a/test_conformance/spirv_new/assemble_spirv.py b/test_conformance/spirv_new/spirv_asm/assemble_spirv.py
similarity index 100%
rename from test_conformance/spirv_new/assemble_spirv.py
rename to test_conformance/spirv_new/spirv_asm/assemble_spirv.py
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm32 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm32
new file mode 100644
index 00000000..58631498
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm32
@@ -0,0 +1,85 @@
+; kernel void printf_operands_scalar_fp32(float f)
+; {
+;     printf("a = %.1a\n", f);
+;     printf("A = %.1A\n", f);
+;     printf("e = %.1e\n", f);
+;     printf("E = %.1E\n", f);
+;     printf("f = %.1f\n", f);
+;     printf("F = %.1F\n", f);
+;     printf("g = %.1g\n", f);
+;     printf("G = %.1G\n", f);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_fp32"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+  %uchar_dot = OpConstant %uchar 46
+    %uchar_1 = OpConstant %uchar 49
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_A = OpConstant %uchar 65
+    %uchar_E = OpConstant %uchar 69
+    %uchar_F = OpConstant %uchar 70
+    %uchar_G = OpConstant %uchar 71
+    %uchar_a = OpConstant %uchar 97
+    %uchar_e = OpConstant %uchar 101
+    %uchar_f = OpConstant %uchar 102
+    %uchar_g = OpConstant %uchar 103
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+       %void = OpTypeVoid
+      %float = OpTypeFloat 32
+ %kernel_sig = OpTypeFunction %void %float
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_a = OpConstantComposite %string_10 %uchar_a %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_a %uchar_nl %uchar_nul ; "a = %.1a\n"
+   %string_a = OpVariable %cptr_string_10 UniformConstant %array_a
+    %array_A = OpConstantComposite %string_10 %uchar_A %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_A %uchar_nl %uchar_nul ; "A = %.1A\n"
+   %string_A = OpVariable %cptr_string_10 UniformConstant %array_A
+    %array_e = OpConstantComposite %string_10 %uchar_e %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_e %uchar_nl %uchar_nul ; "e = %.1e\n"
+   %string_e = OpVariable %cptr_string_10 UniformConstant %array_e
+    %array_E = OpConstantComposite %string_10 %uchar_E %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_E %uchar_nl %uchar_nul ; "E = %.1E\n"
+   %string_E = OpVariable %cptr_string_10 UniformConstant %array_E
+    %array_f = OpConstantComposite %string_10 %uchar_f %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_f %uchar_nl %uchar_nul ; "f = %.1f\n"
+   %string_f = OpVariable %cptr_string_10 UniformConstant %array_f
+    %array_F = OpConstantComposite %string_10 %uchar_F %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_F %uchar_nl %uchar_nul ; "F = %.1F\n"
+   %string_F = OpVariable %cptr_string_10 UniformConstant %array_F
+    %array_g = OpConstantComposite %string_10 %uchar_g %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_g %uchar_nl %uchar_nul ; "g = %.1g\n"
+   %string_g = OpVariable %cptr_string_10 UniformConstant %array_g
+    %array_G = OpConstantComposite %string_10 %uchar_G %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_G %uchar_nl %uchar_nul ; "G = %.1G\n"
+   %string_G = OpVariable %cptr_string_10 UniformConstant %array_G
+
+       %test = OpFunction %void None %kernel_sig
+          %f = OpFunctionParameter %float
+      %entry = OpLabel
+
+      %fmt_a = OpBitcast %cptr_char %string_a
+   %printf_a = OpExtInst %uint %clext printf %fmt_a %f
+      %fmt_A = OpBitcast %cptr_char %string_A
+   %printf_A = OpExtInst %uint %clext printf %fmt_A %f
+      %fmt_e = OpBitcast %cptr_char %string_e
+   %printf_e = OpExtInst %uint %clext printf %fmt_e %f
+      %fmt_E = OpBitcast %cptr_char %string_E
+   %printf_E = OpExtInst %uint %clext printf %fmt_E %f
+      %fmt_f = OpBitcast %cptr_char %string_f
+   %printf_f = OpExtInst %uint %clext printf %fmt_f %f
+      %fmt_F = OpBitcast %cptr_char %string_F
+   %printf_F = OpExtInst %uint %clext printf %fmt_F %f
+      %fmt_g = OpBitcast %cptr_char %string_g
+   %printf_g = OpExtInst %uint %clext printf %fmt_g %f
+      %fmt_G = OpBitcast %cptr_char %string_G
+   %printf_G = OpExtInst %uint %clext printf %fmt_G %f
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm64 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm64
new file mode 100644
index 00000000..ba415d09
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp32.spvasm64
@@ -0,0 +1,85 @@
+; kernel void printf_operands_scalar_fp32(float f)
+; {
+;     printf("a = %.1a\n", f);
+;     printf("A = %.1A\n", f);
+;     printf("e = %.1e\n", f);
+;     printf("E = %.1E\n", f);
+;     printf("f = %.1f\n", f);
+;     printf("F = %.1F\n", f);
+;     printf("g = %.1g\n", f);
+;     printf("G = %.1G\n", f);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_fp32"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+  %uchar_dot = OpConstant %uchar 46
+    %uchar_1 = OpConstant %uchar 49
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_A = OpConstant %uchar 65
+    %uchar_E = OpConstant %uchar 69
+    %uchar_F = OpConstant %uchar 70
+    %uchar_G = OpConstant %uchar 71
+    %uchar_a = OpConstant %uchar 97
+    %uchar_e = OpConstant %uchar 101
+    %uchar_f = OpConstant %uchar 102
+    %uchar_g = OpConstant %uchar 103
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+       %void = OpTypeVoid
+      %float = OpTypeFloat 32
+ %kernel_sig = OpTypeFunction %void %float
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_a = OpConstantComposite %string_10 %uchar_a %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_a %uchar_nl %uchar_nul ; "a = %.1a\n"
+   %string_a = OpVariable %cptr_string_10 UniformConstant %array_a
+    %array_A = OpConstantComposite %string_10 %uchar_A %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_A %uchar_nl %uchar_nul ; "A = %.1A\n"
+   %string_A = OpVariable %cptr_string_10 UniformConstant %array_A
+    %array_e = OpConstantComposite %string_10 %uchar_e %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_e %uchar_nl %uchar_nul ; "e = %.1e\n"
+   %string_e = OpVariable %cptr_string_10 UniformConstant %array_e
+    %array_E = OpConstantComposite %string_10 %uchar_E %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_E %uchar_nl %uchar_nul ; "E = %.1E\n"
+   %string_E = OpVariable %cptr_string_10 UniformConstant %array_E
+    %array_f = OpConstantComposite %string_10 %uchar_f %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_f %uchar_nl %uchar_nul ; "f = %.1f\n"
+   %string_f = OpVariable %cptr_string_10 UniformConstant %array_f
+    %array_F = OpConstantComposite %string_10 %uchar_F %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_F %uchar_nl %uchar_nul ; "F = %.1F\n"
+   %string_F = OpVariable %cptr_string_10 UniformConstant %array_F
+    %array_g = OpConstantComposite %string_10 %uchar_g %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_g %uchar_nl %uchar_nul ; "g = %.1g\n"
+   %string_g = OpVariable %cptr_string_10 UniformConstant %array_g
+    %array_G = OpConstantComposite %string_10 %uchar_G %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_G %uchar_nl %uchar_nul ; "G = %.1G\n"
+   %string_G = OpVariable %cptr_string_10 UniformConstant %array_G
+
+       %test = OpFunction %void None %kernel_sig
+          %f = OpFunctionParameter %float
+      %entry = OpLabel
+
+      %fmt_a = OpBitcast %cptr_char %string_a
+   %printf_a = OpExtInst %uint %clext printf %fmt_a %f
+      %fmt_A = OpBitcast %cptr_char %string_A
+   %printf_A = OpExtInst %uint %clext printf %fmt_A %f
+      %fmt_e = OpBitcast %cptr_char %string_e
+   %printf_e = OpExtInst %uint %clext printf %fmt_e %f
+      %fmt_E = OpBitcast %cptr_char %string_E
+   %printf_E = OpExtInst %uint %clext printf %fmt_E %f
+      %fmt_f = OpBitcast %cptr_char %string_f
+   %printf_f = OpExtInst %uint %clext printf %fmt_f %f
+      %fmt_F = OpBitcast %cptr_char %string_F
+   %printf_F = OpExtInst %uint %clext printf %fmt_F %f
+      %fmt_g = OpBitcast %cptr_char %string_g
+   %printf_g = OpExtInst %uint %clext printf %fmt_g %f
+      %fmt_G = OpBitcast %cptr_char %string_G
+   %printf_G = OpExtInst %uint %clext printf %fmt_G %f
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm32 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm32
new file mode 100644
index 00000000..1b31cf49
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm32
@@ -0,0 +1,93 @@
+; kernel void printf_operands_scalar_fp64(double d)
+; {
+;     printf("a = %.1a\n", d);
+;     printf("A = %.1A\n", d);
+;     printf("e = %.1e\n", d);
+;     printf("E = %.1E\n", d);
+;     printf("f = %.1f\n", d);
+;     printf("F = %.1F\n", d);
+;     printf("g = %.1g\n", d);
+;     printf("G = %.1G\n", d);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Float64
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_fp64"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+  %uchar_dot = OpConstant %uchar 46
+    %uchar_1 = OpConstant %uchar 49
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_A = OpConstant %uchar 65
+    %uchar_E = OpConstant %uchar 69
+    %uchar_F = OpConstant %uchar 70
+    %uchar_G = OpConstant %uchar 71
+    %uchar_X = OpConstant %uchar 88
+    %uchar_a = OpConstant %uchar 97
+    %uchar_d = OpConstant %uchar 100
+    %uchar_e = OpConstant %uchar 101
+    %uchar_f = OpConstant %uchar 102
+    %uchar_g = OpConstant %uchar 103
+    %uchar_h = OpConstant %uchar 104
+    %uchar_i = OpConstant %uchar 105
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+       %void = OpTypeVoid
+     %double = OpTypeFloat 64
+ %kernel_sig = OpTypeFunction %void %double
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_a = OpConstantComposite %string_10 %uchar_a %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_a %uchar_nl %uchar_nul ; "a = %.1a\n"
+   %string_a = OpVariable %cptr_string_10 UniformConstant %array_a
+    %array_A = OpConstantComposite %string_10 %uchar_A %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_A %uchar_nl %uchar_nul ; "A = %.1A\n"
+   %string_A = OpVariable %cptr_string_10 UniformConstant %array_A
+    %array_e = OpConstantComposite %string_10 %uchar_e %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_e %uchar_nl %uchar_nul ; "e = %.1e\n"
+   %string_e = OpVariable %cptr_string_10 UniformConstant %array_e
+    %array_E = OpConstantComposite %string_10 %uchar_E %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_E %uchar_nl %uchar_nul ; "E = %.1E\n"
+   %string_E = OpVariable %cptr_string_10 UniformConstant %array_E
+    %array_f = OpConstantComposite %string_10 %uchar_f %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_f %uchar_nl %uchar_nul ; "f = %.1f\n"
+   %string_f = OpVariable %cptr_string_10 UniformConstant %array_f
+    %array_F = OpConstantComposite %string_10 %uchar_F %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_F %uchar_nl %uchar_nul ; "F = %.1F\n"
+   %string_F = OpVariable %cptr_string_10 UniformConstant %array_F
+    %array_g = OpConstantComposite %string_10 %uchar_g %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_g %uchar_nl %uchar_nul ; "g = %.1g\n"
+   %string_g = OpVariable %cptr_string_10 UniformConstant %array_g
+    %array_G = OpConstantComposite %string_10 %uchar_G %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_G %uchar_nl %uchar_nul ; "G = %.1G\n"
+   %string_G = OpVariable %cptr_string_10 UniformConstant %array_G
+
+       %test = OpFunction %void None %kernel_sig
+          %d = OpFunctionParameter %double
+      %entry = OpLabel
+
+      %fmt_a = OpBitcast %cptr_char %string_a
+   %printf_a = OpExtInst %uint %clext printf %fmt_a %d
+      %fmt_A = OpBitcast %cptr_char %string_A
+   %printf_A = OpExtInst %uint %clext printf %fmt_A %d
+      %fmt_e = OpBitcast %cptr_char %string_e
+   %printf_e = OpExtInst %uint %clext printf %fmt_e %d
+      %fmt_E = OpBitcast %cptr_char %string_E
+   %printf_E = OpExtInst %uint %clext printf %fmt_E %d
+      %fmt_f = OpBitcast %cptr_char %string_f
+   %printf_f = OpExtInst %uint %clext printf %fmt_f %d
+      %fmt_F = OpBitcast %cptr_char %string_F
+   %printf_F = OpExtInst %uint %clext printf %fmt_F %d
+      %fmt_g = OpBitcast %cptr_char %string_g
+   %printf_g = OpExtInst %uint %clext printf %fmt_g %d
+      %fmt_G = OpBitcast %cptr_char %string_G
+   %printf_G = OpExtInst %uint %clext printf %fmt_G %d
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm64 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm64
new file mode 100644
index 00000000..a947e5ec
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_fp64.spvasm64
@@ -0,0 +1,93 @@
+; kernel void printf_operands_scalar_fp64(double d)
+; {
+;     printf("a = %.1a\n", d);
+;     printf("A = %.1A\n", d);
+;     printf("e = %.1e\n", d);
+;     printf("E = %.1E\n", d);
+;     printf("f = %.1f\n", d);
+;     printf("F = %.1F\n", d);
+;     printf("g = %.1g\n", d);
+;     printf("G = %.1G\n", d);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Float64
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_fp64"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+  %uchar_dot = OpConstant %uchar 46
+    %uchar_1 = OpConstant %uchar 49
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_A = OpConstant %uchar 65
+    %uchar_E = OpConstant %uchar 69
+    %uchar_F = OpConstant %uchar 70
+    %uchar_G = OpConstant %uchar 71
+    %uchar_X = OpConstant %uchar 88
+    %uchar_a = OpConstant %uchar 97
+    %uchar_d = OpConstant %uchar 100
+    %uchar_e = OpConstant %uchar 101
+    %uchar_f = OpConstant %uchar 102
+    %uchar_g = OpConstant %uchar 103
+    %uchar_h = OpConstant %uchar 104
+    %uchar_i = OpConstant %uchar 105
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+       %void = OpTypeVoid
+     %double = OpTypeFloat 64
+ %kernel_sig = OpTypeFunction %void %double
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_a = OpConstantComposite %string_10 %uchar_a %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_a %uchar_nl %uchar_nul ; "a = %.1a\n"
+   %string_a = OpVariable %cptr_string_10 UniformConstant %array_a
+    %array_A = OpConstantComposite %string_10 %uchar_A %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_A %uchar_nl %uchar_nul ; "A = %.1A\n"
+   %string_A = OpVariable %cptr_string_10 UniformConstant %array_A
+    %array_e = OpConstantComposite %string_10 %uchar_e %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_e %uchar_nl %uchar_nul ; "e = %.1e\n"
+   %string_e = OpVariable %cptr_string_10 UniformConstant %array_e
+    %array_E = OpConstantComposite %string_10 %uchar_E %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_E %uchar_nl %uchar_nul ; "E = %.1E\n"
+   %string_E = OpVariable %cptr_string_10 UniformConstant %array_E
+    %array_f = OpConstantComposite %string_10 %uchar_f %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_f %uchar_nl %uchar_nul ; "f = %.1f\n"
+   %string_f = OpVariable %cptr_string_10 UniformConstant %array_f
+    %array_F = OpConstantComposite %string_10 %uchar_F %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_F %uchar_nl %uchar_nul ; "F = %.1F\n"
+   %string_F = OpVariable %cptr_string_10 UniformConstant %array_F
+    %array_g = OpConstantComposite %string_10 %uchar_g %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_g %uchar_nl %uchar_nul ; "g = %.1g\n"
+   %string_g = OpVariable %cptr_string_10 UniformConstant %array_g
+    %array_G = OpConstantComposite %string_10 %uchar_G %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_dot %uchar_1 %uchar_G %uchar_nl %uchar_nul ; "G = %.1G\n"
+   %string_G = OpVariable %cptr_string_10 UniformConstant %array_G
+
+       %test = OpFunction %void None %kernel_sig
+          %d = OpFunctionParameter %double
+      %entry = OpLabel
+
+      %fmt_a = OpBitcast %cptr_char %string_a
+   %printf_a = OpExtInst %uint %clext printf %fmt_a %d
+      %fmt_A = OpBitcast %cptr_char %string_A
+   %printf_A = OpExtInst %uint %clext printf %fmt_A %d
+      %fmt_e = OpBitcast %cptr_char %string_e
+   %printf_e = OpExtInst %uint %clext printf %fmt_e %d
+      %fmt_E = OpBitcast %cptr_char %string_E
+   %printf_E = OpExtInst %uint %clext printf %fmt_E %d
+      %fmt_f = OpBitcast %cptr_char %string_f
+   %printf_f = OpExtInst %uint %clext printf %fmt_f %d
+      %fmt_F = OpBitcast %cptr_char %string_F
+   %printf_F = OpExtInst %uint %clext printf %fmt_F %d
+      %fmt_g = OpBitcast %cptr_char %string_g
+   %printf_g = OpExtInst %uint %clext printf %fmt_g %d
+      %fmt_G = OpBitcast %cptr_char %string_G
+   %printf_G = OpExtInst %uint %clext printf %fmt_G %d
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm32 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm32
new file mode 100644
index 00000000..61fb8cd0
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm32
@@ -0,0 +1,140 @@
+; kernel void printf_operands_scalar_int32(int i)
+; {
+;     printf("d = %d\n", i);
+;     printf("i = %i\n", i);
+;     printf("o = %o\n", i);
+;     printf("u = %u\n", i);
+;     printf("x = %x\n", i);
+;     printf("X = %X\n", i);
+;
+;     printf("hd = %hd\n", i);
+;     printf("hi = %hi\n", i);
+;     printf("ho = %ho\n", i);
+;     printf("hu = %hu\n", i);
+;     printf("hx = %hx\n", i);
+;     printf("hX = %hX\n", i);
+;
+;     printf("hhd = %hhd\n", i);
+;     printf("hhi = %hhi\n", i);
+;     printf("hho = %hho\n", i);
+;     printf("hhu = %hhu\n", i);
+;     printf("hhx = %hhx\n", i);
+;     printf("hhX = %hhX\n", i);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_int32"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_X = OpConstant %uchar 88
+    %uchar_d = OpConstant %uchar 100
+    %uchar_h = OpConstant %uchar 104
+    %uchar_i = OpConstant %uchar 105
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+   %string_8 = OpTypeArray %uchar %uint_8
+%cptr_string_8 = OpTypePointer UniformConstant %string_8
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+  %string_12 = OpTypeArray %uchar %uint_12
+%cptr_string_12 = OpTypePointer UniformConstant %string_12
+       %void = OpTypeVoid
+ %kernel_sig = OpTypeFunction %void %uint
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_d = OpConstantComposite %string_8 %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_d %uchar_nl %uchar_nul ; "d = %d\n"
+   %string_d = OpVariable %cptr_string_8 UniformConstant %array_d
+    %array_i = OpConstantComposite %string_8 %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_i %uchar_nl %uchar_nul ; "i = %i\n"
+   %string_i = OpVariable %cptr_string_8 UniformConstant %array_i
+    %array_o = OpConstantComposite %string_8 %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_o %uchar_nl %uchar_nul ; "o = %o\n"
+   %string_o = OpVariable %cptr_string_8 UniformConstant %array_o
+    %array_u = OpConstantComposite %string_8 %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_u %uchar_nl %uchar_nul ; "u = %u\n"
+   %string_u = OpVariable %cptr_string_8 UniformConstant %array_u
+    %array_x = OpConstantComposite %string_8 %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_x %uchar_nl %uchar_nul ; "x = %x\n"
+   %string_x = OpVariable %cptr_string_8 UniformConstant %array_x
+    %array_X = OpConstantComposite %string_8 %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_X %uchar_nl %uchar_nul ; "X = %X\n"
+   %string_X = OpVariable %cptr_string_8 UniformConstant %array_X
+
+   %array_hd = OpConstantComposite %string_10 %uchar_h %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_d %uchar_nl %uchar_nul ; "hd = %hd\n"
+  %string_hd = OpVariable %cptr_string_10 UniformConstant %array_hd
+   %array_hi = OpConstantComposite %string_10 %uchar_h %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_i %uchar_nl %uchar_nul ; "hi = %hi\n"
+  %string_hi = OpVariable %cptr_string_10 UniformConstant %array_hi
+   %array_ho = OpConstantComposite %string_10 %uchar_h %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_o %uchar_nl %uchar_nul ; "ho = %ho\n"
+  %string_ho = OpVariable %cptr_string_10 UniformConstant %array_ho
+   %array_hu = OpConstantComposite %string_10 %uchar_h %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_u %uchar_nl %uchar_nul ; "hu = %hu\n"
+  %string_hu = OpVariable %cptr_string_10 UniformConstant %array_hu
+   %array_hx = OpConstantComposite %string_10 %uchar_h %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_x %uchar_nl %uchar_nul ; "hx = %hx\n"
+  %string_hx = OpVariable %cptr_string_10 UniformConstant %array_hx
+   %array_hX = OpConstantComposite %string_10 %uchar_h %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_X %uchar_nl %uchar_nul ; "hX = %hX\n"
+  %string_hX = OpVariable %cptr_string_10 UniformConstant %array_hX
+
+  %array_hhd = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_d %uchar_nl %uchar_nul ; "hhd = %hhd\n"
+ %string_hhd = OpVariable %cptr_string_12 UniformConstant %array_hhd
+  %array_hhi = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_i %uchar_nl %uchar_nul ; "hhi = %hhi\n"
+ %string_hhi = OpVariable %cptr_string_12 UniformConstant %array_hhi
+  %array_hho = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_o %uchar_nl %uchar_nul ; "hho = %hho\n"
+ %string_hho = OpVariable %cptr_string_12 UniformConstant %array_hho
+  %array_hhu = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_u %uchar_nl %uchar_nul ; "hhu = %hhu\n"
+ %string_hhu = OpVariable %cptr_string_12 UniformConstant %array_hhu
+  %array_hhx = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_x %uchar_nl %uchar_nul ; "hhx = %hhx\n"
+ %string_hhx = OpVariable %cptr_string_12 UniformConstant %array_hhx
+  %array_hhX = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_X %uchar_nl %uchar_nul ; "hhX = %hhX\n"
+ %string_hhX = OpVariable %cptr_string_12 UniformConstant %array_hhX
+
+       %test = OpFunction %void None %kernel_sig
+          %i = OpFunctionParameter %uint
+      %entry = OpLabel
+      %fmt_d = OpBitcast %cptr_char %string_d
+   %printf_d = OpExtInst %uint %clext printf %fmt_d %i
+      %fmt_i = OpBitcast %cptr_char %string_i
+   %printf_i = OpExtInst %uint %clext printf %fmt_i %i
+      %fmt_o = OpBitcast %cptr_char %string_o
+   %printf_o = OpExtInst %uint %clext printf %fmt_o %i
+      %fmt_u = OpBitcast %cptr_char %string_u
+   %printf_u = OpExtInst %uint %clext printf %fmt_u %i
+      %fmt_x = OpBitcast %cptr_char %string_x
+   %printf_x = OpExtInst %uint %clext printf %fmt_x %i
+      %fmt_X = OpBitcast %cptr_char %string_X
+   %printf_X = OpExtInst %uint %clext printf %fmt_X %i
+
+     %fmt_hd = OpBitcast %cptr_char %string_hd
+  %printf_hd = OpExtInst %uint %clext printf %fmt_hd %i
+     %fmt_hi = OpBitcast %cptr_char %string_hi
+  %printf_hi = OpExtInst %uint %clext printf %fmt_hi %i
+     %fmt_ho = OpBitcast %cptr_char %string_ho
+  %printf_ho = OpExtInst %uint %clext printf %fmt_ho %i
+     %fmt_hu = OpBitcast %cptr_char %string_hu
+  %printf_hu = OpExtInst %uint %clext printf %fmt_hu %i
+     %fmt_hx = OpBitcast %cptr_char %string_hx
+  %printf_hx = OpExtInst %uint %clext printf %fmt_hx %i
+     %fmt_hX = OpBitcast %cptr_char %string_hX
+  %printf_hX = OpExtInst %uint %clext printf %fmt_hX %i
+
+    %fmt_hhd = OpBitcast %cptr_char %string_hhd
+ %printf_hhd = OpExtInst %uint %clext printf %fmt_hhd %i
+    %fmt_hhi = OpBitcast %cptr_char %string_hhi
+ %printf_hhi = OpExtInst %uint %clext printf %fmt_hhi %i
+    %fmt_hho = OpBitcast %cptr_char %string_hho
+ %printf_hho = OpExtInst %uint %clext printf %fmt_hho %i
+    %fmt_hhu = OpBitcast %cptr_char %string_hhu
+ %printf_hhu = OpExtInst %uint %clext printf %fmt_hhu %i
+    %fmt_hhx = OpBitcast %cptr_char %string_hhx
+ %printf_hhx = OpExtInst %uint %clext printf %fmt_hhx %i
+    %fmt_hhX = OpBitcast %cptr_char %string_hhX
+ %printf_hhX = OpExtInst %uint %clext printf %fmt_hhX %i
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm64 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm64
new file mode 100644
index 00000000..91ad8e1e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int32.spvasm64
@@ -0,0 +1,140 @@
+; kernel void printf_operands_scalar_int32(int i)
+; {
+;     printf("d = %d\n", i);
+;     printf("i = %i\n", i);
+;     printf("o = %o\n", i);
+;     printf("u = %u\n", i);
+;     printf("x = %x\n", i);
+;     printf("X = %X\n", i);
+;
+;     printf("hd = %hd\n", i);
+;     printf("hi = %hi\n", i);
+;     printf("ho = %ho\n", i);
+;     printf("hu = %hu\n", i);
+;     printf("hx = %hx\n", i);
+;     printf("hX = %hX\n", i);
+;
+;     printf("hhd = %hhd\n", i);
+;     printf("hhi = %hhi\n", i);
+;     printf("hho = %hho\n", i);
+;     printf("hhu = %hhu\n", i);
+;     printf("hhx = %hhx\n", i);
+;     printf("hhX = %hhX\n", i);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_int32"
+      %uchar = OpTypeInt 8 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+  %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_X = OpConstant %uchar 88
+    %uchar_d = OpConstant %uchar 100
+    %uchar_h = OpConstant %uchar 104
+    %uchar_i = OpConstant %uchar 105
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+   %string_8 = OpTypeArray %uchar %uint_8
+%cptr_string_8 = OpTypePointer UniformConstant %string_8
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+  %string_12 = OpTypeArray %uchar %uint_12
+%cptr_string_12 = OpTypePointer UniformConstant %string_12
+       %void = OpTypeVoid
+ %kernel_sig = OpTypeFunction %void %uint
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+    %array_d = OpConstantComposite %string_8 %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_d %uchar_nl %uchar_nul ; "d = %d\n"
+   %string_d = OpVariable %cptr_string_8 UniformConstant %array_d
+    %array_i = OpConstantComposite %string_8 %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_i %uchar_nl %uchar_nul ; "i = %i\n"
+   %string_i = OpVariable %cptr_string_8 UniformConstant %array_i
+    %array_o = OpConstantComposite %string_8 %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_o %uchar_nl %uchar_nul ; "o = %o\n"
+   %string_o = OpVariable %cptr_string_8 UniformConstant %array_o
+    %array_u = OpConstantComposite %string_8 %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_u %uchar_nl %uchar_nul ; "u = %u\n"
+   %string_u = OpVariable %cptr_string_8 UniformConstant %array_u
+    %array_x = OpConstantComposite %string_8 %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_x %uchar_nl %uchar_nul ; "x = %x\n"
+   %string_x = OpVariable %cptr_string_8 UniformConstant %array_x
+    %array_X = OpConstantComposite %string_8 %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_X %uchar_nl %uchar_nul ; "X = %X\n"
+   %string_X = OpVariable %cptr_string_8 UniformConstant %array_X
+
+   %array_hd = OpConstantComposite %string_10 %uchar_h %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_d %uchar_nl %uchar_nul ; "hd = %hd\n"
+  %string_hd = OpVariable %cptr_string_10 UniformConstant %array_hd
+   %array_hi = OpConstantComposite %string_10 %uchar_h %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_i %uchar_nl %uchar_nul ; "hi = %hi\n"
+  %string_hi = OpVariable %cptr_string_10 UniformConstant %array_hi
+   %array_ho = OpConstantComposite %string_10 %uchar_h %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_o %uchar_nl %uchar_nul ; "ho = %ho\n"
+  %string_ho = OpVariable %cptr_string_10 UniformConstant %array_ho
+   %array_hu = OpConstantComposite %string_10 %uchar_h %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_u %uchar_nl %uchar_nul ; "hu = %hu\n"
+  %string_hu = OpVariable %cptr_string_10 UniformConstant %array_hu
+   %array_hx = OpConstantComposite %string_10 %uchar_h %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_x %uchar_nl %uchar_nul ; "hx = %hx\n"
+  %string_hx = OpVariable %cptr_string_10 UniformConstant %array_hx
+   %array_hX = OpConstantComposite %string_10 %uchar_h %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_X %uchar_nl %uchar_nul ; "hX = %hX\n"
+  %string_hX = OpVariable %cptr_string_10 UniformConstant %array_hX
+
+  %array_hhd = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_d %uchar_nl %uchar_nul ; "hhd = %hhd\n"
+ %string_hhd = OpVariable %cptr_string_12 UniformConstant %array_hhd
+  %array_hhi = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_i %uchar_nl %uchar_nul ; "hhi = %hhi\n"
+ %string_hhi = OpVariable %cptr_string_12 UniformConstant %array_hhi
+  %array_hho = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_o %uchar_nl %uchar_nul ; "hho = %hho\n"
+ %string_hho = OpVariable %cptr_string_12 UniformConstant %array_hho
+  %array_hhu = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_u %uchar_nl %uchar_nul ; "hhu = %hhu\n"
+ %string_hhu = OpVariable %cptr_string_12 UniformConstant %array_hhu
+  %array_hhx = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_x %uchar_nl %uchar_nul ; "hhx = %hhx\n"
+ %string_hhx = OpVariable %cptr_string_12 UniformConstant %array_hhx
+  %array_hhX = OpConstantComposite %string_12 %uchar_h %uchar_h %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_h %uchar_h %uchar_X %uchar_nl %uchar_nul ; "hhX = %hhX\n"
+ %string_hhX = OpVariable %cptr_string_12 UniformConstant %array_hhX
+
+       %test = OpFunction %void None %kernel_sig
+          %i = OpFunctionParameter %uint
+      %entry = OpLabel
+      %fmt_d = OpBitcast %cptr_char %string_d
+   %printf_d = OpExtInst %uint %clext printf %fmt_d %i
+      %fmt_i = OpBitcast %cptr_char %string_i
+   %printf_i = OpExtInst %uint %clext printf %fmt_i %i
+      %fmt_o = OpBitcast %cptr_char %string_o
+   %printf_o = OpExtInst %uint %clext printf %fmt_o %i
+      %fmt_u = OpBitcast %cptr_char %string_u
+   %printf_u = OpExtInst %uint %clext printf %fmt_u %i
+      %fmt_x = OpBitcast %cptr_char %string_x
+   %printf_x = OpExtInst %uint %clext printf %fmt_x %i
+      %fmt_X = OpBitcast %cptr_char %string_X
+   %printf_X = OpExtInst %uint %clext printf %fmt_X %i
+
+     %fmt_hd = OpBitcast %cptr_char %string_hd
+  %printf_hd = OpExtInst %uint %clext printf %fmt_hd %i
+     %fmt_hi = OpBitcast %cptr_char %string_hi
+  %printf_hi = OpExtInst %uint %clext printf %fmt_hi %i
+     %fmt_ho = OpBitcast %cptr_char %string_ho
+  %printf_ho = OpExtInst %uint %clext printf %fmt_ho %i
+     %fmt_hu = OpBitcast %cptr_char %string_hu
+  %printf_hu = OpExtInst %uint %clext printf %fmt_hu %i
+     %fmt_hx = OpBitcast %cptr_char %string_hx
+  %printf_hx = OpExtInst %uint %clext printf %fmt_hx %i
+     %fmt_hX = OpBitcast %cptr_char %string_hX
+  %printf_hX = OpExtInst %uint %clext printf %fmt_hX %i
+
+    %fmt_hhd = OpBitcast %cptr_char %string_hhd
+ %printf_hhd = OpExtInst %uint %clext printf %fmt_hhd %i
+    %fmt_hhi = OpBitcast %cptr_char %string_hhi
+ %printf_hhi = OpExtInst %uint %clext printf %fmt_hhi %i
+    %fmt_hho = OpBitcast %cptr_char %string_hho
+ %printf_hho = OpExtInst %uint %clext printf %fmt_hho %i
+    %fmt_hhu = OpBitcast %cptr_char %string_hhu
+ %printf_hhu = OpExtInst %uint %clext printf %fmt_hhu %i
+    %fmt_hhx = OpBitcast %cptr_char %string_hhx
+ %printf_hhx = OpExtInst %uint %clext printf %fmt_hhx %i
+    %fmt_hhX = OpBitcast %cptr_char %string_hhX
+ %printf_hhX = OpExtInst %uint %clext printf %fmt_hhX %i
+
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm32 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm32
new file mode 100644
index 00000000..ec19e9f8
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm32
@@ -0,0 +1,77 @@
+; kernel void printf_operands_scalar_int64(long l)
+; {
+;     printf("ld = %ld\n", l);
+;     printf("li = %li\n", l);
+;     printf("lo = %lo\n", l);
+;     printf("lu = %lu\n", l);
+;     printf("lx = %lx\n", l);
+;     printf("lX = %lX\n", l);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_int64"
+      %uchar = OpTypeInt 8 0
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+    %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_X = OpConstant %uchar 88
+    %uchar_d = OpConstant %uchar 100
+    %uchar_i = OpConstant %uchar 105
+    %uchar_l = OpConstant %uchar 108
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+   %string_8 = OpTypeArray %uchar %uint_8
+%cptr_string_8 = OpTypePointer UniformConstant %string_8
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+  %string_12 = OpTypeArray %uchar %uint_12
+%cptr_string_12 = OpTypePointer UniformConstant %string_12
+       %void = OpTypeVoid
+      %float = OpTypeFloat 32
+ %kernel_sig = OpTypeFunction %void %ulong
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+   %array_ld = OpConstantComposite %string_10 %uchar_l %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_d %uchar_nl %uchar_nul ; "ld = %ld\n"
+  %string_ld = OpVariable %cptr_string_10 UniformConstant %array_ld
+   %array_li = OpConstantComposite %string_10 %uchar_l %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_i %uchar_nl %uchar_nul ; "li = %li\n"
+  %string_li = OpVariable %cptr_string_10 UniformConstant %array_li
+   %array_lo = OpConstantComposite %string_10 %uchar_l %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_o %uchar_nl %uchar_nul ; "lo = %lo\n"
+  %string_lo = OpVariable %cptr_string_10 UniformConstant %array_lo
+   %array_lu = OpConstantComposite %string_10 %uchar_l %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_u %uchar_nl %uchar_nul ; "lu = %lu\n"
+  %string_lu = OpVariable %cptr_string_10 UniformConstant %array_lu
+   %array_lx = OpConstantComposite %string_10 %uchar_l %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_x %uchar_nl %uchar_nul ; "lx = %lx\n"
+  %string_lx = OpVariable %cptr_string_10 UniformConstant %array_lx
+   %array_lX = OpConstantComposite %string_10 %uchar_l %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_X %uchar_nl %uchar_nul ; "lX = %lX\n"
+  %string_lX = OpVariable %cptr_string_10 UniformConstant %array_lX
+
+       %test = OpFunction %void None %kernel_sig
+          %l = OpFunctionParameter %ulong
+      %entry = OpLabel
+
+     %fmt_ld = OpBitcast %cptr_char %string_ld
+  %printf_ld = OpExtInst %uint %clext printf %fmt_ld %l
+     %fmt_li = OpBitcast %cptr_char %string_li
+  %printf_li = OpExtInst %uint %clext printf %fmt_li %l
+     %fmt_lo = OpBitcast %cptr_char %string_lo
+  %printf_lo = OpExtInst %uint %clext printf %fmt_lo %l
+     %fmt_lu = OpBitcast %cptr_char %string_lu
+  %printf_lu = OpExtInst %uint %clext printf %fmt_lu %l
+     %fmt_lx = OpBitcast %cptr_char %string_lx
+  %printf_lx = OpExtInst %uint %clext printf %fmt_lx %l
+     %fmt_lX = OpBitcast %cptr_char %string_lX
+  %printf_lX = OpExtInst %uint %clext printf %fmt_lX %l
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm64 b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm64
new file mode 100644
index 00000000..8401d1fe
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/printf_operands_scalar_int64.spvasm64
@@ -0,0 +1,77 @@
+; kernel void printf_operands_scalar_int64(long l)
+; {
+;     printf("ld = %ld\n", l);
+;     printf("li = %li\n", l);
+;     printf("lo = %lo\n", l);
+;     printf("lu = %lu\n", l);
+;     printf("lx = %lx\n", l);
+;     printf("lX = %lX\n", l);
+; }
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Int64
+               OpCapability Int8
+        %clext = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %test "printf_operands_scalar_int64"
+      %uchar = OpTypeInt 8 0
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+     %uint_8 = OpConstant %uint 8
+    %uint_10 = OpConstant %uint 10
+    %uint_12 = OpConstant %uint 12
+    %uchar_nul = OpConstant %uchar 0
+   %uchar_nl = OpConstant %uchar 10
+   %uchar_sp = OpConstant %uchar 32
+  %uchar_pct = OpConstant %uchar 37
+   %uchar_eq = OpConstant %uchar 61
+    %uchar_X = OpConstant %uchar 88
+    %uchar_d = OpConstant %uchar 100
+    %uchar_i = OpConstant %uchar 105
+    %uchar_l = OpConstant %uchar 108
+    %uchar_o = OpConstant %uchar 111
+    %uchar_u = OpConstant %uchar 117
+    %uchar_x = OpConstant %uchar 120
+   %string_8 = OpTypeArray %uchar %uint_8
+%cptr_string_8 = OpTypePointer UniformConstant %string_8
+  %string_10 = OpTypeArray %uchar %uint_10
+%cptr_string_10 = OpTypePointer UniformConstant %string_10
+  %string_12 = OpTypeArray %uchar %uint_12
+%cptr_string_12 = OpTypePointer UniformConstant %string_12
+       %void = OpTypeVoid
+      %float = OpTypeFloat 32
+ %kernel_sig = OpTypeFunction %void %ulong
+  %cptr_char = OpTypePointer UniformConstant %uchar
+
+   %array_ld = OpConstantComposite %string_10 %uchar_l %uchar_d %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_d %uchar_nl %uchar_nul ; "ld = %ld\n"
+  %string_ld = OpVariable %cptr_string_10 UniformConstant %array_ld
+   %array_li = OpConstantComposite %string_10 %uchar_l %uchar_i %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_i %uchar_nl %uchar_nul ; "li = %li\n"
+  %string_li = OpVariable %cptr_string_10 UniformConstant %array_li
+   %array_lo = OpConstantComposite %string_10 %uchar_l %uchar_o %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_o %uchar_nl %uchar_nul ; "lo = %lo\n"
+  %string_lo = OpVariable %cptr_string_10 UniformConstant %array_lo
+   %array_lu = OpConstantComposite %string_10 %uchar_l %uchar_u %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_u %uchar_nl %uchar_nul ; "lu = %lu\n"
+  %string_lu = OpVariable %cptr_string_10 UniformConstant %array_lu
+   %array_lx = OpConstantComposite %string_10 %uchar_l %uchar_x %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_x %uchar_nl %uchar_nul ; "lx = %lx\n"
+  %string_lx = OpVariable %cptr_string_10 UniformConstant %array_lx
+   %array_lX = OpConstantComposite %string_10 %uchar_l %uchar_X %uchar_sp %uchar_eq %uchar_sp %uchar_pct %uchar_l %uchar_X %uchar_nl %uchar_nul ; "lX = %lX\n"
+  %string_lX = OpVariable %cptr_string_10 UniformConstant %array_lX
+
+       %test = OpFunction %void None %kernel_sig
+          %l = OpFunctionParameter %ulong
+      %entry = OpLabel
+
+     %fmt_ld = OpBitcast %cptr_char %string_ld
+  %printf_ld = OpExtInst %uint %clext printf %fmt_ld %l
+     %fmt_li = OpBitcast %cptr_char %string_li
+  %printf_li = OpExtInst %uint %clext printf %fmt_li %l
+     %fmt_lo = OpBitcast %cptr_char %string_lo
+  %printf_lo = OpExtInst %uint %clext printf %fmt_lo %l
+     %fmt_lu = OpBitcast %cptr_char %string_lu
+  %printf_lu = OpExtInst %uint %clext printf %fmt_lu %l
+     %fmt_lx = OpBitcast %cptr_char %string_lx
+  %printf_lx = OpExtInst %uint %clext printf %fmt_lx %l
+     %fmt_lX = OpBitcast %cptr_char %string_lX
+  %printf_lX = OpExtInst %uint %clext printf %fmt_lX %l
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/test_extinst_printf.cpp b/test_conformance/spirv_new/test_extinst_printf.cpp
new file mode 100644
index 00000000..54bb8326
--- /dev/null
+++ b/test_conformance/spirv_new/test_extinst_printf.cpp
@@ -0,0 +1,259 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "harness/os_helpers.h"
+#include "testBase.h"
+
+#if defined(_WIN32)
+#include <io.h>
+#define streamDup(fd1) _dup(fd1)
+#define streamDup2(fd1, fd2) _dup2(fd1, fd2)
+#else
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#endif
+#include <unistd.h>
+#define streamDup(fd1) dup(fd1)
+#define streamDup2(fd1, fd2) dup2(fd1, fd2)
+#endif
+
+#include <fstream>
+#include <vector>
+
+// TODO: Unify with test_printf.
+struct StreamGrabber
+{
+    StreamGrabber()
+    {
+        char* tmp = get_temp_filename();
+        tempFileName = tmp;
+        free(tmp);
+    }
+    ~StreamGrabber()
+    {
+        if (acquired)
+        {
+            release();
+        }
+    }
+
+    int acquire(void)
+    {
+        if (acquired == false)
+        {
+            old_fd = streamDup(fileno(stdout));
+            if (!freopen(tempFileName.c_str(), "w", stdout))
+            {
+                release();
+                return -1;
+            }
+            acquired = true;
+        }
+        return 0;
+    }
+
+    int release(void)
+    {
+        if (acquired == true)
+        {
+            fflush(stdout);
+            streamDup2(old_fd, fileno(stdout));
+            close(old_fd);
+            acquired = false;
+        }
+        return 0;
+    }
+
+    int get_results(std::string& results)
+    {
+        if (acquired == false)
+        {
+            std::ifstream is(tempFileName, std::ios::binary);
+            if (is.good())
+            {
+                size_t filesize = 0;
+                is.seekg(0, std::ios::end);
+                filesize = (size_t)is.tellg();
+                is.seekg(0, std::ios::beg);
+
+                results.clear();
+                results.resize(filesize);
+                is.read(&results[0], filesize);
+
+                return 0;
+            }
+        }
+        return -1;
+    }
+
+    std::string tempFileName;
+    int old_fd = 0;
+    bool acquired = false;
+};
+
+// printf callback, for cl_arm_printf
+void CL_CALLBACK printfCallBack(const char* printf_data, size_t len,
+                                size_t final, void* user_data)
+{
+    fwrite(printf_data, 1, len, stdout);
+}
+
+template <typename T>
+static int printf_operands_helper(cl_device_id device,
+                                  const char* spirvFileName,
+                                  const char* kernelName,
+                                  const char* expectedResults, T value)
+{
+    StreamGrabber grabber;
+    cl_int error;
+
+    // Create a context and a queue to test with.
+    // We cannot use the context and queue from the harness because some
+    // implementations require a printf callback to be set at context creation.
+
+    cl_context_properties printf_properties[] = {
+        CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printfCallBack,
+        CL_PRINTF_BUFFERSIZE_ARM, 256, 0
+    };
+
+    cl_context_properties* props =
+        is_extension_available(device, "cl_arm_printf") ? printf_properties
+                                                        : nullptr;
+
+    clContextWrapper context =
+        clCreateContext(props, 1, &device, notify_callback, nullptr, &error);
+    test_error(error, "Unable to create printf context");
+
+    clCommandQueueWrapper queue =
+        clCreateCommandQueue(context, device, 0, &error);
+    test_error(error, "Unable to create printf queue");
+
+    clProgramWrapper program;
+    error = get_program_with_il(program, device, context, spirvFileName);
+    test_error(error, "Unable to build SPIR-V program");
+
+    clKernelWrapper kernel = clCreateKernel(program, kernelName, &error);
+    test_error(error, "Unable to create SPIR-V kernel");
+
+    error = clSetKernelArg(kernel, 0, sizeof(value), &value);
+    test_error(error, "Unable to set kernel arguments");
+
+    size_t global = 1;
+    grabber.acquire();
+    error |= clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0,
+                                    NULL, NULL);
+    error |= clFinish(queue);
+    grabber.release();
+    test_error(error, "unable to enqueue kernel");
+
+    std::string results;
+    grabber.get_results(results);
+
+    if (results != std::string(expectedResults))
+    {
+        log_error("Results do not match.\n");
+        log_error("Expected: \n---\n%s---\n", expectedResults);
+        log_error("Got: \n---\n%s---\n", results.c_str());
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
+REGISTER_TEST(extinst_printf_operands_scalar_int32)
+{
+    static const char* expected = R"(d = 1
+i = 1
+o = 1
+u = 1
+x = 1
+X = 1
+hd = 1
+hi = 1
+ho = 1
+hu = 1
+hx = 1
+hX = 1
+hhd = 1
+hhi = 1
+hho = 1
+hhu = 1
+hhx = 1
+hhX = 1
+)";
+
+    return printf_operands_helper(device, "printf_operands_scalar_int32",
+                                  "printf_operands_scalar_int32", expected, 1);
+}
+
+REGISTER_TEST(extinst_printf_operands_scalar_fp32)
+{
+    static const char* expected = R"(a = 0x1.0p+1
+A = 0X1.0P+1
+e = 2.0e+00
+E = 2.0E+00
+f = 2.0
+F = 2.0
+g = 2
+G = 2
+)";
+
+    return printf_operands_helper(device, "printf_operands_scalar_fp32",
+                                  "printf_operands_scalar_fp32", expected,
+                                  2.0f);
+}
+
+REGISTER_TEST(extinst_printf_operands_scalar_int64)
+{
+    static const char* expected = R"(ld = 4
+li = 4
+lo = 4
+lu = 4
+lx = 4
+lX = 4
+)";
+
+    if (!gHasLong)
+    {
+        log_info("Device does not support 64-bit integers. Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    return printf_operands_helper(device, "printf_operands_scalar_int64",
+                                  "printf_operands_scalar_int64", expected, 4L);
+}
+
+REGISTER_TEST(extinst_printf_operands_scalar_fp64)
+{
+    static const char* expected = R"(a = 0x1.0p+3
+A = 0X1.0P+3
+e = 8.0e+00
+E = 8.0E+00
+f = 8.0
+F = 8.0
+g = 8
+G = 8
+)";
+
+    if (!is_extension_available(device, "cl_khr_fp64"))
+    {
+        log_info("Device does not support fp64. Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    return printf_operands_helper(device, "printf_operands_scalar_fp64",
+                                  "printf_operands_scalar_fp64", expected, 8.0);
+}
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index a081bd09..ab8ee797 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -1611,7 +1611,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct subgroup_test
         test_params.subgroup_size = subgroup_size;
         Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params);
 
-        test_status status;
+        test_status status = TEST_FAIL;
 
         if (test_params.divergence_mask_arg != -1)
         {
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
index f3ce4a79..06b48fb0 100644
--- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -476,29 +476,6 @@ struct ConsistencyExternalSemaphoreTest : public VulkanTestBase
             sema_props1.push_back(0);
             sema_props2.push_back(0);
 
-            // Pass NULL properties
-            clCreateSemaphoreWithPropertiesKHRptr(context, NULL, &errNum);
-            test_failure_error(
-                errNum, CL_INVALID_VALUE,
-                "Semaphore creation must fail with CL_INVALID_VALUE "
-                " when properties are passed as NULL");
-
-            // Pass invalid semaphore object to wait
-            errNum = clEnqueueWaitSemaphoresKHRptr(queue, 1, NULL, NULL, 0,
-                                                   NULL, NULL);
-            test_failure_error(errNum, CL_INVALID_SEMAPHORE_KHR,
-                               "clEnqueueWaitSemaphoresKHR fails with "
-                               "CL_INVALID_SEMAPHORE_KHR "
-                               "when invalid semaphore object is passed");
-
-            // Pass invalid semaphore object to signal
-            errNum = clEnqueueSignalSemaphoresKHRptr(queue, 1, NULL, NULL, 0,
-                                                     NULL, NULL);
-            test_failure_error(errNum, CL_INVALID_SEMAPHORE_KHR,
-                               "clEnqueueSignalSemaphoresKHR fails with "
-                               "CL_INVALID_SEMAPHORE_KHR"
-                               "when invalid semaphore object is passed");
-
             // Create two semaphore objects
             clVk2Clsemaphore = clCreateSemaphoreWithPropertiesKHRptr(
                 context, sema_props1.data(), &errNum);
@@ -512,13 +489,6 @@ struct ConsistencyExternalSemaphoreTest : public VulkanTestBase
                 errNum,
                 "Unable to create semaphore with valid semaphore properties");
 
-            // Pass invalid object to release call
-            errNum = clReleaseSemaphoreKHRptr(NULL);
-            test_failure_error(errNum, CL_INVALID_SEMAPHORE_KHR,
-                               "clReleaseSemaphoreKHRptr fails with "
-                               "CL_INVALID_SEMAPHORE_KHR when NULL semaphore "
-                               "object is passed");
-
             // Release both semaphore objects
             errNum = clReleaseSemaphoreKHRptr(clVk2Clsemaphore);
             test_error(errNum, "clReleaseSemaphoreKHRptr failed");