diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 8f2a0872..ec36d73c 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -28,7 +28,7 @@ jobs:
             arch: android-aarch64
             android_arch_abi: arm64-v8a
     steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
       - name: Setup Ninja
         uses: seanmiddleditch/gha-setup-ninja@master
       - name: Install Arm and AArch64 compilers
@@ -184,7 +184,7 @@ jobs:
     steps:
       - name: Install packages
         run: sudo apt install -y clang-format clang-format-14
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v6
         with:
           fetch-depth: 0
       - name: Check code format
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 30a64447..09fa4854 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,14 +81,17 @@ include(CheckFunctionExists)
 include(CheckIncludeFiles)
 include(CheckCXXCompilerFlag)
 
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
-    set(CLConform_TARGET_ARCH ARM)
-elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" OR
+   CMAKE_VS_PLATFORM_NAME MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
     set(CLConform_TARGET_ARCH ARM64)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
+    set(CLConform_TARGET_ARCH ARM)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
     set(CLConform_TARGET_ARCH x86_64)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*")
     set(CLConform_TARGET_ARCH x86)
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv.*")
+    set(CLConform_TARGET_ARCH RISCV)
 endif()
 
 if(NOT DEFINED CLConform_TARGET_ARCH)
@@ -139,8 +142,12 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
         add_cxx_flag_if_supported(-frounding-math)
     endif()
 else()
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
+    # Curb the inclusion of SSE headers when compiling for non x86 targets
+    if(${CLConform_TARGET_ARCH} STREQUAL "x86_64" OR ${CLConform_TARGET_ARCH}
+            STREQUAL "x86")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
+    endif()
 endif()
 
 # Set a module's COMPILE_FLAGS if using gcc or clang.
diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp
index 5b1e7e03..e06d32aa 100644
--- a/test_common/harness/deviceInfo.cpp
+++ b/test_common/harness/deviceInfo.cpp
@@ -50,7 +50,7 @@ std::string get_device_info_string(cl_device_id device,
 }
 
 /* Determines if an extension is supported by a device. */
-int is_extension_available(cl_device_id device, const char *extensionName)
+bool is_extension_available(cl_device_id device, const char *extensionName)
 {
     std::string extString = get_device_extensions_string(device);
     std::istringstream ss(extString);
diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h
index c3c8a512..30711b18 100644
--- a/test_common/harness/deviceInfo.h
+++ b/test_common/harness/deviceInfo.h
@@ -26,7 +26,7 @@ std::string get_device_info_string(cl_device_id device,
                                    cl_device_info param_name);
 
 /* Determines if an extension is supported by a device. */
-int is_extension_available(cl_device_id device, const char *extensionName);
+bool is_extension_available(cl_device_id device, const char *extensionName);
 
 /* Returns the version of the extension the device supports or throws an
  * exception if the extension is not supported by the device. */
diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index cb1a9113..7e59f915 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -120,6 +120,12 @@ static int vlog_win32(const char *format, ...);
             return retValue;                                                   \
         }                                                                      \
     }
+#define test_object_failure_ret(object, errCode, expectedErrCode, msg,         \
+                                retValue)                                      \
+    {                                                                          \
+        test_assert_error_ret(object == nullptr, msg, retValue);               \
+        test_failure_error_ret(errCode, expectedErrCode, msg, retValue);       \
+    }
 #define print_failure_error(errCode, expectedErrCode, msg)                     \
     log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg,            \
               IGetErrorString(errCode), IGetErrorString(expectedErrCode),      \
diff --git a/test_common/harness/extensionHelpers.h b/test_common/harness/extensionHelpers.h
index e98f67c2..abaa0ba6 100644
--- a/test_common/harness/extensionHelpers.h
+++ b/test_common/harness/extensionHelpers.h
@@ -32,5 +32,15 @@
         }                                                                      \
     } while (false)
 
+#define GET_FUNCTION_EXTENSION_ADDRESS(device, FUNC)                           \
+    FUNC =                                                                     \
+        reinterpret_cast<FUNC##_fn>(clGetExtensionFunctionAddressForPlatform(  \
+            getPlatformFromDevice(device), #FUNC));                            \
+    if (FUNC == nullptr)                                                       \
+    {                                                                          \
+        log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed"     \
+                  " with " #FUNC "\n");                                        \
+        return TEST_FAIL;                                                      \
+    }
 
 #endif // _extensionHelpers_h
diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
index afb0f5a3..9c1e0db0 100644
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -45,6 +45,9 @@ typedef int64_t FPU_mode_type;
 #elif defined(__PPC__)
 #include <fpu_control.h>
 extern __thread fpu_control_t fpu_control;
+#elif defined(__riscv)
+#define _FPU_MASK_NI 1
+static FPU_mode_type fpu_control;
 #elif defined(__mips__)
 #include "mips/m32c1.h"
 #endif
@@ -56,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *oldMode)
     || defined(_M_X64) || defined(__MINGW32__)
     *oldMode = _mm_getcsr();
     _mm_setcsr(*oldMode | 0x8040);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     *oldMode = fpu_control;
     fpu_control |= _FPU_MASK_NI;
 #elif defined(__arm__)
@@ -89,8 +92,8 @@ inline void DisableFTZ(FPU_mode_type *oldMode)
     || defined(_M_X64) || defined(__MINGW32__)
     *oldMode = _mm_getcsr();
     _mm_setcsr(*oldMode & ~0x8040);
-#elif defined(__PPC__)
-    *mode = fpu_control;
+#elif defined(__PPC__) || defined(__riscv)
+    *oldMode = fpu_control;
     fpu_control &= ~_FPU_MASK_NI;
 #elif defined(__arm__)
     unsigned fpscr;
@@ -121,7 +124,7 @@ inline void RestoreFPState(FPU_mode_type *mode)
 #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)               \
     || defined(_M_X64) || defined(__MINGW32__)
     _mm_setcsr(*mode);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     fpu_control = *mode;
 #elif defined(__arm__)
     __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 8fa9c0a4..637bf640 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -141,7 +141,6 @@ std::string get_kernel_name(const std::string &source)
         {
             kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
             kernelsList[kernelsList.size() - 1] = '.';
-            kernelsList[kernelsList.size() - 1] = '.';
         }
         oss << kernelsList;
     }
@@ -678,17 +677,18 @@ static int create_single_kernel_helper_create_program_offline(
     return CL_SUCCESS;
 }
 
-static int create_single_kernel_helper_create_program(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions, CompilationMode compilationMode)
+int create_single_kernel_helper_create_program(cl_context context,
+                                               cl_program *outProgram,
+                                               unsigned int numKernelLines,
+                                               const char **kernelProgram,
+                                               const char *buildOptions)
 {
     std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
 
     std::string filePrefix =
         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
     bool shouldSaveToDisk = should_save_kernel_source_to_disk(
-        compilationMode, gCompilationCacheMode, gCompilationCachePath,
+        gCompilationMode, gCompilationCacheMode, gCompilationCachePath,
         filePrefix);
 
     if (shouldSaveToDisk)
@@ -701,7 +701,7 @@ static int create_single_kernel_helper_create_program(
             return -1;
         }
     }
-    if (compilationMode == kOnline)
+    if (gCompilationMode == kOnline)
     {
         int error = CL_SUCCESS;
 
@@ -718,42 +718,11 @@ static int create_single_kernel_helper_create_program(
     else
     {
         return create_single_kernel_helper_create_program_offline(
-            context, device, outProgram, numKernelLines, kernelProgram,
-            buildOptions, compilationMode);
+            context, nullptr, outProgram, numKernelLines, kernelProgram,
+            buildOptions, gCompilationMode);
     }
 }
 
-int create_single_kernel_helper_create_program(cl_context context,
-                                               cl_program *outProgram,
-                                               unsigned int numKernelLines,
-                                               const char **kernelProgram,
-                                               const char *buildOptions)
-{
-    return create_single_kernel_helper_create_program(
-        context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
-        gCompilationMode);
-}
-
-int create_single_kernel_helper_create_program_for_device(
-    cl_context context, cl_device_id device, cl_program *outProgram,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *buildOptions)
-{
-    return create_single_kernel_helper_create_program(
-        context, device, outProgram, numKernelLines, kernelProgram,
-        buildOptions, gCompilationMode);
-}
-
-int create_single_kernel_helper_with_build_options(
-    cl_context context, cl_program *outProgram, cl_kernel *outKernel,
-    unsigned int numKernelLines, const char **kernelProgram,
-    const char *kernelName, const char *buildOptions)
-{
-    return create_single_kernel_helper(context, outProgram, outKernel,
-                                       numKernelLines, kernelProgram,
-                                       kernelName, buildOptions);
-}
-
 // Creates and builds OpenCL C/C++ program, and creates a kernel
 int create_single_kernel_helper(cl_context context, cl_program *outProgram,
                                 cl_kernel *outKernel,
@@ -1239,8 +1208,8 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
 {
     cl_image_format *list;
     cl_uint count = 0;
-    cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
-                                            NULL, &count);
+    cl_int err =
+        clGetSupportedImageFormats(context, flags, image_type, 0, NULL, &count);
     if (count == 0) return 0;
 
     list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
@@ -1276,7 +1245,6 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
     return (i < count) ? 1 : 0;
 }
 
-size_t get_pixel_bytes(const cl_image_format *fmt);
 size_t get_pixel_bytes(const cl_image_format *fmt)
 {
     size_t chanCount;
diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
index 65167116..97aedb7c 100644
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -58,6 +58,13 @@ void helpInfo()
         with a very small subset of the tests. This option should not be used
         for conformance submission (default: disabled).
 
+    --invalid-object-scenarios=<option_1>,<option_2>....
+        Specify different scenarios to use when
+        testing for object validity. Options can be:
+           nullptr                 To use a nullptr (default)
+           valid_object_wrong_type To use a valid_object which is not the correct type
+        NOTE: valid_object_wrong_type option is not required for OpenCL conformance.
+
 For offline compilation (binary and spir-v modes) only:
     --compilation-cache-mode <cache-mode>
         Specify a compilation caching mode:
@@ -104,6 +111,7 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
         }
 
         delArg = 0;
+        size_t i_object_length = strlen("--invalid-object-scenarios=");
 
         if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
         {
@@ -264,6 +272,32 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
                 return -1;
             }
         }
+        else if (!strncmp(argv[i],
+                          "--invalid-object-scenarios=", i_object_length))
+        {
+            if (strlen(argv[i]) > i_object_length)
+            {
+                delArg++;
+                gInvalidObject = 0;
+                std::string invalid_objects(argv[i]);
+
+                if (invalid_objects.find("nullptr") != std::string::npos)
+                {
+                    gInvalidObject |= InvalidObject::Nullptr;
+                }
+                if (invalid_objects.find("valid_object_wrong_type")
+                    != std::string::npos)
+                {
+                    gInvalidObject |= InvalidObject::ValidObjectWrongType;
+                }
+            }
+            else
+            {
+                log_error("Program argument for --invalid-object-scenarios was "
+                          "not specified.\n");
+                return -1;
+            }
+        }
 
         // cleaning parameters from argv tab
         for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];
diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp
index 5aeb86f1..31b18f75 100644
--- a/test_common/harness/rounding_mode.cpp
+++ b/test_common/harness/rounding_mode.cpp
@@ -201,6 +201,7 @@ RoundingMode get_round(void)
 #elif defined(__mips__)
 #include "mips/m32c1.h"
 #endif
+
 void *FlushToZero(void)
 {
 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
@@ -231,6 +232,8 @@ void *FlushToZero(void)
 #elif defined(__mips__)
     fpa_bissr(FPA_CSR_FS);
     return NULL;
+#elif defined(__riscv)
+    return NULL;
 #else
 #error Unknown arch
 #endif
@@ -266,6 +269,8 @@ void UnFlushToZero(void *p)
     _FPU_SETCW(flags);
 #elif defined(__mips__)
     fpa_bicsr(FPA_CSR_FS);
+#elif defined(__riscv)
+    return;
 #else
 #error Unknown arch
 #endif
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index c745a639..92729c49 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "testHarness.h"
+#include "stringHelpers.h"
 #include "compat.h"
 #include <algorithm>
 #include <stdio.h>
@@ -21,6 +22,7 @@
 #include <string.h>
 #include <cassert>
 #include <deque>
+#include <filesystem>
 #include <mutex>
 #include <set>
 #include <stdexcept>
@@ -33,6 +35,8 @@
 #include "imageHelpers.h"
 #include "parseParameters.h"
 
+namespace fs = std::filesystem;
+
 #if !defined(_WIN32)
 #include <sys/utsname.h>
 #include <unistd.h>
@@ -60,6 +64,7 @@ int gInfNanSupport = 1;
 int gIsEmbedded = 0;
 int gHasLong = 1;
 bool gCoreILProgram = true;
+int gInvalidObject = InvalidObject::Nullptr;
 
 #define DEFAULT_NUM_ELEMENTS 0x4000
 
@@ -94,11 +99,25 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[],
         return EXIT_SUCCESS;
     }
 
-    FILE *file = fopen(fileName, "w");
+    fs::path file_path(fileName);
+
+    // When running under Bazel test, prepend the Bazel output directory to
+    // the provided path
+    if (nullptr != getenv("BAZEL_TEST"))
+    {
+        char *bazel_output_dir = getenv("TEST_UNDECLARED_OUTPUTS_DIR");
+        if (nullptr != bazel_output_dir)
+        {
+            file_path = fs::path(bazel_output_dir) / file_path;
+        }
+    }
+
+    auto file_path_str = to_string(file_path.u8string());
+    FILE *file = fopen(file_path_str.c_str(), "w");
     if (NULL == file)
     {
         log_error("ERROR: Failed to open '%s' for writing results.\n",
-                  fileName);
+                  file_path_str.c_str());
         return EXIT_FAILURE;
     }
 
@@ -127,7 +146,8 @@ static int saveResultsToJson(const char *suiteName, test_definition testList[],
 
     int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS;
 
-    log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
+    log_info("Saving results to %s: %s!\n", file_path_str.c_str(),
+             save_map[ret]);
 
     return ret;
 }
@@ -308,6 +328,8 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
                  "CL_CONFORMANCE_RESULTS_FILENAME (currently '%s')\n",
                  fileName != NULL ? fileName : "<undefined>");
         log_info("\t      to save results to JSON file.\n");
+        log_info("\t      When running in Bazel test this is relative to "
+                 "$TEST_UNDECLARED_OUTPUTS_DIR.\n");
 
         log_info("\n");
         log_info("Test names:\n");
@@ -1408,6 +1430,8 @@ void PrintArch(void)
     vlog("ARCH:\tWindows\n");
 #elif defined(__mips__)
     vlog("ARCH:\tmips\n");
+#elif defined(__riscv)
+    vlog("ARCH:\tRISC-V\n");
 #else
 #error unknown arch
 #endif
diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h
index cc9d8212..76fda76f 100644
--- a/test_common/harness/testHarness.h
+++ b/test_common/harness/testHarness.h
@@ -22,6 +22,7 @@
 
 #include <string>
 #include <vector>
+#include <type_traits>
 
 class Version {
 public:
@@ -257,6 +258,37 @@ extern std::string get_platform_info_string(cl_platform_id platform,
                                             cl_platform_info param_name);
 extern bool is_platform_extension_available(cl_platform_id platform,
                                             const char *extensionName);
+enum InvalidObject
+{
+    Nullptr = 1 << 0,
+    ValidObjectWrongType = 1 << 1,
+};
+
+extern int gInvalidObject;
+
+
+template <typename T> std::vector<T> get_invalid_objects(cl_device_id device)
+{
+    std::vector<T> ret;
+    if ((gInvalidObject & InvalidObject::Nullptr)
+        && !(std::is_same<T, cl_platform_id>::value))
+    {
+        ret.push_back(nullptr);
+    }
+    if (gInvalidObject & InvalidObject::ValidObjectWrongType)
+    {
+        if (std::is_same<T, cl_device_id>::value)
+        {
+            cl_platform_id platform = getPlatformFromDevice(device);
+            ret.push_back(reinterpret_cast<T>(platform));
+        }
+        else
+        {
+            ret.push_back(reinterpret_cast<T>(device));
+        }
+    }
+    return ret;
+}
 
 #if !defined(__APPLE__)
 void memset_pattern4(void *, const void *, size_t);
diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
index b781e49b..fed0aace 100644
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt
@@ -6,7 +6,10 @@ set(${MODULE_NAME}_SOURCES
          main.cpp
          negative_platform.cpp
          negative_queue.cpp
+         negative_context.cpp
+         negative_enqueue_marker.cpp
          negative_enqueue_map_image.cpp
+         negative_device.cpp
          test_api_consistency.cpp
          test_bool.cpp
          test_retain.cpp
diff --git a/test_conformance/api/negative_context.cpp b/test_conformance/api/negative_context.cpp
new file mode 100644
index 00000000..23ace048
--- /dev/null
+++ b/test_conformance/api/negative_context.cpp
@@ -0,0 +1,290 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+/* Negative Tests for clCreateContext */
+REGISTER_TEST(negative_create_context)
+{
+    cl_context_properties props[3] = {
+        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(nullptr), 0
+    };
+    cl_int err = 0;
+    cl_context ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PLATFORM,
+        "clCreateContext should return CL_INVALID_PLATFORM when:\"an invalid "
+        "platform object is used with the CL_CONTEXT_PLATFORM property\" using "
+        "a nullptr",
+        TEST_FAIL);
+
+    props[0] = reinterpret_cast<cl_context_properties>("INVALID_PROPERTY");
+
+    props[1] = reinterpret_cast<cl_context_properties>(nullptr);
+    ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PROPERTY,
+        "clCreateContext should return CL_INVALID_PROPERTY when: \"context "
+        "property name in properties is not a supported property name\"",
+        TEST_FAIL);
+
+    if (get_device_cl_version(device) >= Version(1, 2))
+    {
+        cl_context_properties invalid_value{ -1 };
+        props[0] = CL_CONTEXT_INTEROP_USER_SYNC;
+        props[1] = invalid_value;
+        ctx = clCreateContext(props, 1, &device, nullptr, nullptr, &err);
+        test_object_failure_ret(
+            ctx, err, CL_INVALID_PROPERTY,
+            "clCreateContext should return CL_INVALID_PROPERTY when: \"the "
+            "value specified for a supported property name is not valid\"",
+            TEST_FAIL);
+
+        cl_bool property_value = CL_FALSE;
+        cl_context_properties duplicated_property[5] = {
+            CL_CONTEXT_INTEROP_USER_SYNC,
+            static_cast<cl_context_properties>(property_value),
+            CL_CONTEXT_INTEROP_USER_SYNC,
+            static_cast<cl_context_properties>(property_value), 0
+        };
+        ctx = clCreateContext(duplicated_property, 1, &device, nullptr, nullptr,
+                              &err);
+        test_object_failure_ret(
+            ctx, err, CL_INVALID_PROPERTY,
+            "clCreateContext should return CL_INVALID_PROPERTY when: \"the "
+            "same property name is specified more than once\"",
+            TEST_FAIL);
+    }
+
+    ctx = clCreateContext(nullptr, 1, nullptr, nullptr, nullptr, &err);
+    test_object_failure_ret(ctx, err, CL_INVALID_VALUE,
+                            "clCreateContext should return CL_INVALID_VALUE "
+                            "when: \"devices is NULL\"",
+                            TEST_FAIL);
+
+    ctx = clCreateContext(nullptr, 0, &device, nullptr, nullptr, &err);
+    test_object_failure_ret(ctx, err, CL_INVALID_VALUE,
+                            "clCreateContext should return CL_INVALID_VALUE "
+                            "when: \"num_devices is equal to zero\"",
+                            TEST_FAIL);
+
+    int user_data = 1; // Arbitrary non-NULL value
+    ctx = clCreateContext(nullptr, 1, &device, nullptr, &user_data, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_VALUE,
+        "clCreateContext should return CL_INVALID_VALUE when: \"pfn_notify is "
+        "NULL but user_data is not NULL\"",
+        TEST_FAIL);
+
+    cl_device_id invalid_device = nullptr;
+    ctx = clCreateContext(nullptr, 1, &invalid_device, nullptr, nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_DEVICE,
+        "clCreateContext should return CL_INVALID_DEVICE when: \"any device in "
+        "devices is not a valid device\" using a device set to nullptr",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clCreateContextFromType */
+REGISTER_TEST(negative_create_context_from_type)
+{
+    cl_platform_id platform = getPlatformFromDevice(device);
+
+    cl_context_properties props[5] = {
+        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(nullptr),
+        0, 0, 0
+    };
+    cl_int err = 0;
+    cl_context ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT,
+                                             nullptr, nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PLATFORM,
+        "clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an "
+        "invalid platform object is used with the CL_CONTEXT_PLATFORM "
+        "property\" using a nullptr",
+        TEST_FAIL);
+
+    ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                  nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PLATFORM,
+        "clCreateContextFromType should return CL_INVALID_PLATFORM when: \"an "
+        "invalid platform object is used with the CL_CONTEXT_PLATFORM "
+        "property\" using a valid object that is NOT a platform",
+        TEST_FAIL);
+
+    props[1] = reinterpret_cast<cl_context_properties>(platform);
+    props[2] = reinterpret_cast<cl_context_properties>("INVALID_PROPERTY");
+    props[3] = reinterpret_cast<cl_context_properties>(nullptr);
+
+    ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                  nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_PROPERTY,
+        "clCreateContextFromType should return CL_INVALID_PROPERTY when: "
+        "\"context property name in properties is not a supported property "
+        "name\"",
+        TEST_FAIL);
+
+    if (get_device_cl_version(device) >= Version(1, 2))
+    {
+        cl_context_properties invalid_value{ -1 };
+        props[2] = CL_CONTEXT_INTEROP_USER_SYNC;
+        props[3] = invalid_value;
+        ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                      nullptr, &err);
+        test_object_failure_ret(
+            ctx, err, CL_INVALID_PROPERTY,
+            "clCreateContextFromType should return CL_INVALID_PROPERTY when: "
+            "\"the value specified for a supported property name is not "
+            "valid\"",
+            TEST_FAIL);
+
+        props[2] = CL_CONTEXT_PLATFORM;
+        props[3] = reinterpret_cast<cl_context_properties>(platform);
+        ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                      nullptr, &err);
+        test_object_failure_ret(
+            ctx, err, CL_INVALID_PROPERTY,
+            "clCreateContextFromType should return CL_INVALID_PROPERTY when: "
+            "\"the same property name is specified more than once\"",
+            TEST_FAIL);
+    }
+
+    int user_data = 1; // Arbitrary non-NULL value
+    ctx = clCreateContextFromType(nullptr, CL_DEVICE_TYPE_DEFAULT, nullptr,
+                                  &user_data, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_VALUE,
+        "clCreateContextFromType should return CL_INVALID_VALUE when: "
+        "\"pfn_notify is NULL but user_data is not NULL\"",
+        TEST_FAIL);
+
+    cl_device_type INVALID_DEVICE_TYPE = 0;
+    ctx = clCreateContextFromType(nullptr, INVALID_DEVICE_TYPE, nullptr,
+                                  nullptr, &err);
+    test_object_failure_ret(
+        ctx, err, CL_INVALID_DEVICE_TYPE,
+        "clCreateContextFromType should return CL_INVALID_DEVICE_TYPE when: "
+        "\"device_type is not a valid value\"",
+        TEST_FAIL);
+
+    std::vector<cl_device_type> device_types = { CL_DEVICE_TYPE_CPU,
+                                                 CL_DEVICE_TYPE_GPU,
+                                                 CL_DEVICE_TYPE_ACCELERATOR };
+    if (get_device_cl_version(device) >= Version(1, 2))
+    {
+        device_types.push_back(CL_DEVICE_TYPE_CUSTOM);
+    }
+    for (auto type : device_types)
+    {
+        clContextWrapper tmp_context =
+            clCreateContextFromType(nullptr, type, nullptr, nullptr, &err);
+        if (err != CL_SUCCESS)
+        {
+            test_object_failure_ret(
+                tmp_context, err, CL_DEVICE_NOT_FOUND,
+                "clCreateContextFromType should return CL_DEVICE_NOT_FOUND "
+                "when: \"no devices that match device_type and property values "
+                "specified in properties are currently available\"",
+                TEST_FAIL);
+            break;
+        }
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clRetainContext */
+REGISTER_TEST(negative_retain_context)
+{
+    cl_int err = clRetainContext(nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clRetainContext should return CL_INVALID_CONTEXT when: \"context is "
+        "not a valid OpenCL context\" using a nullptr",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clReleaseContext */
+REGISTER_TEST(negative_release_context)
+{
+    cl_int err = clReleaseContext(nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clReleaseContext should return CL_INVALID_CONTEXT when: \"context is "
+        "not a valid OpenCL context\" using a nullptr",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clGetContextInfo */
+REGISTER_TEST(negative_get_context_info)
+{
+
+    cl_uint param_value = 0;
+    cl_int err = clGetContextInfo(nullptr, CL_CONTEXT_REFERENCE_COUNT,
+                                  sizeof(param_value), &param_value, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clGetContextInfo should return CL_INVALID_CONTEXT when: \"context is "
+        "not a valid context\" using a nullptr",
+        TEST_FAIL);
+
+    cl_context_info INVALID_PARAM_VALUE = 0;
+    err = clGetContextInfo(context, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clGetContextInfo should return CL_INVALID_VALUE when: \"param_name is "
+        "not one of the supported values\"",
+        TEST_FAIL);
+
+    err = clGetContextInfo(context, CL_CONTEXT_REFERENCE_COUNT,
+                           sizeof(param_value) - 1, &param_value, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clGetContextInfo should return CL_INVALID_VALUE when: \"size in bytes "
+        "specified by param_value_size is < size of return type and "
+        "param_value is not a NULL value\"",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clSetContextDestructorCallback */
+static void CL_CALLBACK callback(cl_context context, void* user_data) {}
+
+REGISTER_TEST_VERSION(negative_set_context_destructor_callback, Version(3, 0))
+{
+    cl_int err = clSetContextDestructorCallback(nullptr, callback, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clSetContextDestructorCallback should return CL_INVALID_CONTEXT when: "
+        "\"context is not a valid context\" using a nullptr",
+        TEST_FAIL);
+
+    err = clSetContextDestructorCallback(context, nullptr, nullptr);
+    test_failure_error_ret(err, CL_INVALID_VALUE,
+                           "clSetContextDestructorCallback should return "
+                           "CL_INVALID_VALUE when: \"pfn_notify is NULL\"",
+                           TEST_FAIL);
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/api/negative_device.cpp b/test_conformance/api/negative_device.cpp
new file mode 100644
index 00000000..d51c685c
--- /dev/null
+++ b/test_conformance/api/negative_device.cpp
@@ -0,0 +1,526 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "testBase.h"
+#include "harness/testHarness.h"
+#include <vector>
+
+/* Negative Tests for clGetDeviceInfo */
+REGISTER_TEST(negative_get_device_info)
+{
+
+    cl_device_type device_type = 0;
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clGetDeviceInfo(invalid_device, CL_DEVICE_TYPE,
+                              sizeof(device_type), &device_type, nullptr);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clGetDeviceInfo should return "
+                               "CL_INVALID_DEVICE when: \"device is not "
+                               "a valid device\"",
+                               TEST_FAIL);
+    }
+
+    constexpr cl_device_info INVALID_PARAM_VALUE = 0;
+    err = clGetDeviceInfo(device, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clGetDeviceInfo should return CL_INVALID_VALUE when: \"param_name is "
+        "not one of the supported values\"",
+        TEST_FAIL);
+
+    err = clGetDeviceInfo(device, CL_DEVICE_TYPE, 0, &device_type, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clGetDeviceInfo should return CL_INVALID_VALUE when: \"size in bytes "
+        "specified by param_value_size is < size of return type and "
+        "param_value is not a NULL value\"",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clGetDeviceIDs */
+REGISTER_TEST(negative_get_device_ids)
+{
+    cl_platform_id platform = getPlatformFromDevice(device);
+
+    cl_device_id devices = nullptr;
+
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_platform : get_invalid_objects<cl_platform_id>(device))
+    {
+        err = clGetDeviceIDs(invalid_platform, CL_DEVICE_TYPE_DEFAULT, 1,
+                             &devices, nullptr);
+        test_failure_error_ret(err, CL_INVALID_PLATFORM,
+                               "clGetDeviceIDs should return "
+                               "CL_INVALID_PLATFORM when: \"platform is "
+                               "not a valid platform\"",
+                               TEST_FAIL);
+    }
+
+    cl_device_type INVALID_DEVICE_TYPE = 0;
+    err = clGetDeviceIDs(platform, INVALID_DEVICE_TYPE, 1, &devices, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_DEVICE_TYPE,
+        "clGetDeviceIDs should return CL_INVALID_DEVICE_TYPE when: "
+        "\"device_type is not a valid value\"",
+        TEST_FAIL);
+
+    err =
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 0, &devices, nullptr);
+    test_failure_error_ret(err, CL_INVALID_VALUE,
+                           "clGetDeviceIDs should return when: \"num_entries "
+                           "is equal to zero and devices is not NULL\"",
+                           TEST_FAIL);
+
+    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, nullptr, nullptr);
+    test_failure_error_ret(err, CL_INVALID_VALUE,
+                           "clGetDeviceIDs should return CL_INVALID_VALUE "
+                           "when: \"both num_devices and devices are NULL\"",
+                           TEST_FAIL);
+
+    devices = nullptr;
+    std::vector<cl_device_type> device_types{ CL_DEVICE_TYPE_CPU,
+                                              CL_DEVICE_TYPE_GPU,
+                                              CL_DEVICE_TYPE_ACCELERATOR };
+    if (get_device_cl_version(device) >= Version(1, 2))
+    {
+        device_types.push_back(CL_DEVICE_TYPE_CUSTOM);
+    }
+
+    bool platform_supports_all_device_types = true;
+    for (auto device_type : device_types)
+    {
+        err = clGetDeviceIDs(platform, device_type, 1, &devices, nullptr);
+        if (err == CL_SUCCESS)
+        {
+            continue;
+        }
+        platform_supports_all_device_types = false;
+        break;
+    }
+    if (platform_supports_all_device_types)
+    {
+        log_info("Platform has every Device Type... Skipping Test\n");
+    }
+    else
+    {
+        test_failure_error_ret(
+            err, CL_DEVICE_NOT_FOUND,
+            "clGetDeviceIDs should return CL_DEVICE_NOT_FOUND when: \"no "
+            "OpenCL devices that matched device_type were found\"",
+            TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clGetDeviceAndHostTimer */
+REGISTER_TEST_VERSION(negative_get_device_and_host_timer, Version(2, 1))
+{
+    cl_ulong *device_timestamp = nullptr, *host_timestamp = nullptr;
+    cl_int err = CL_SUCCESS;
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clGetDeviceAndHostTimer(invalid_device, device_timestamp,
+                                      host_timestamp);
+        test_failure_error_ret(
+            err, CL_INVALID_DEVICE,
+            "clGetDeviceAndHostTimer should return CL_INVALID_DEVICE when: "
+            "\"device is not a valid device\"",
+            TEST_FAIL);
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+
+    // Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must
+    // support timer synchronisation
+    cl_ulong timer_resolution = 1;
+    auto device_version = get_device_cl_version(device);
+    err =
+        clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
+                          sizeof(timer_resolution), &timer_resolution, nullptr);
+    test_error(err, "clGetPlatformInfo failed");
+    if (timer_resolution == 0
+        && (device_version == Version(2, 1) || device_version == Version(2, 2)))
+    {
+        log_error("Support for device and host timer synchronization is "
+                  "required for platforms supporting OpenCL 2.1 or 2.2.");
+        return TEST_FAIL;
+    }
+
+    if (timer_resolution != 0)
+    {
+        log_info("Platform Supports Timers\n");
+        log_info("Skipping CL_INVALID_OPERATION tests\n");
+
+        err = clGetDeviceAndHostTimer(device, nullptr, host_timestamp);
+        test_failure_error_ret(
+            err, CL_INVALID_VALUE,
+            "clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: "
+            "\"host_timestamp or device_timestamp is NULL\" using nullptr for "
+            "device_timestamp ",
+            TEST_FAIL);
+
+        err = clGetDeviceAndHostTimer(device, device_timestamp, nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_VALUE,
+            "clGetDeviceAndHostTimer should return CL_INVALID_VALUE when: "
+            "\"host_timestamp or device_timestamp is NULL\" using nullptr for "
+            "host_timestamp ",
+            TEST_FAIL);
+    }
+    else
+    {
+        log_info("Platform does not Support Timers\n");
+        log_info("Skipping CL_INVALID_VALUE tests\n");
+
+        err = clGetDeviceAndHostTimer(device, device_timestamp, host_timestamp);
+        test_failure_error_ret(
+            err, CL_INVALID_OPERATION,
+            "clGetDeviceAndHostTimer should return CL_INVALID_OPERATION when: "
+            "\"the platform associated with device does not support device and "
+            "host timer synchronization\"",
+            TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clGetHostTimer */
+REGISTER_TEST_VERSION(negative_get_host_timer, Version(2, 1))
+{
+    cl_ulong host_timestamp = 0;
+    cl_int err = CL_SUCCESS;
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clGetHostTimer(invalid_device, &host_timestamp);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clGetHostTimer should return CL_INVALID_DEVICE "
+                               "when: \"device is not "
+                               "a valid device\"",
+                               TEST_FAIL);
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+    // Initialise timer_resolution to a Non-0 value as CL2.1/2 devices must
+    // support timer synchronisation
+    cl_ulong timer_resolution = 1;
+    auto device_version = get_device_cl_version(device);
+    err =
+        clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION,
+                          sizeof(timer_resolution), &timer_resolution, nullptr);
+    test_error(err, "clGetPlatformInfo failed");
+    if (timer_resolution == 0
+        && (device_version == Version(2, 1) || device_version == Version(2, 2)))
+    {
+        log_error("Support for device and host timer synchronization is "
+                  "required for platforms supporting OpenCL 2.1 or 2.2.");
+        return TEST_FAIL;
+    }
+
+    if (timer_resolution != 0)
+    {
+        log_info("Platform Supports Timers\n");
+        log_info("Skipping CL_INVALID_OPERATION tests\n");
+
+        err = clGetHostTimer(device, nullptr);
+        test_failure_error_ret(err, CL_INVALID_VALUE,
+                               "clGetHostTimer should return CL_INVALID_VALUE "
+                               "when: \"host_timestamp is NULL\"",
+                               TEST_FAIL);
+    }
+    else
+    {
+        log_info("Platform does not Support Timers\n");
+        log_info("Skipping CL_INVALID_VALUE tests\n");
+
+        err = clGetHostTimer(device, &host_timestamp);
+        test_failure_error_ret(
+            err, CL_INVALID_OPERATION,
+            "clGetHostTimer should return CL_INVALID_OPERATION when: \"the "
+            "platform associated with device does not support device and host "
+            "timer synchronization\"",
+            TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clCreateSubDevices */
+enum SupportedPartitionSchemes
+{
+    None = 0,
+    Equally = 1 << 0,
+    Counts = 1 << 1,
+    Affinity = 1 << 2,
+    All_Schemes = Affinity | Counts | Equally,
+};
+
+static int get_supported_properties(cl_device_id device)
+{
+    size_t number_of_properties = 0;
+    int err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES, 0,
+                              nullptr, &number_of_properties);
+    test_error(err, "clGetDeviceInfo");
+    std::vector<cl_device_partition_property> supported_properties(
+        number_of_properties / sizeof(cl_device_partition_property));
+    err = clGetDeviceInfo(device, CL_DEVICE_PARTITION_PROPERTIES,
+                          number_of_properties, &supported_properties.front(),
+                          nullptr);
+    test_error(err, "clGetDeviceInfo");
+    int ret = SupportedPartitionSchemes::None;
+    for (auto property : supported_properties)
+    {
+        switch (property)
+        {
+            case CL_DEVICE_PARTITION_EQUALLY:
+                ret |= SupportedPartitionSchemes::Equally;
+                break;
+            case CL_DEVICE_PARTITION_BY_COUNTS:
+                ret |= SupportedPartitionSchemes::Counts;
+                break;
+            case CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:
+                ret |= SupportedPartitionSchemes::Affinity;
+                break;
+            default: break;
+        }
+    }
+    return ret;
+}
+
+static std::vector<cl_device_partition_property>
+get_invalid_properties(int unsupported_properties)
+{
+    if (unsupported_properties & SupportedPartitionSchemes::Equally)
+    {
+        return { CL_DEVICE_PARTITION_EQUALLY, 1, 0 };
+    }
+    else if (unsupported_properties & SupportedPartitionSchemes::Counts)
+    {
+        return { CL_DEVICE_PARTITION_BY_COUNTS, 1,
+                 CL_DEVICE_PARTITION_BY_COUNTS_LIST_END };
+    }
+    else if (unsupported_properties & SupportedPartitionSchemes::Affinity)
+    {
+        return { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN,
+                 CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0 };
+    }
+    else
+    {
+        return {};
+    }
+}
+
+static cl_uint get_uint_device_info(const cl_device_id device,
+                                    const cl_device_info param_name)
+{
+    cl_uint ret = 0;
+    cl_int err =
+        clGetDeviceInfo(device, param_name, sizeof(ret), &ret, nullptr);
+    test_error(err, "clGetDeviceInfo");
+    return ret;
+}
+
+REGISTER_TEST_VERSION(negative_create_sub_devices, Version(1, 2))
+{
+    int supported_properties = get_supported_properties(device);
+    if (supported_properties == SupportedPartitionSchemes::None)
+    {
+        printf("Device does not support creating subdevices... Skipping\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_device_partition_property properties[4] = {};
+    cl_uint max_compute_units =
+        get_uint_device_info(device, CL_DEVICE_MAX_COMPUTE_UNITS);
+    cl_uint max_sub_devices =
+        get_uint_device_info(device, CL_DEVICE_PARTITION_MAX_SUB_DEVICES);
+    std::vector<cl_device_id> out_devices;
+    cl_uint max_for_partition = 0;
+    if (supported_properties & SupportedPartitionSchemes::Equally)
+    {
+        properties[0] = CL_DEVICE_PARTITION_EQUALLY;
+        properties[1] = 1;
+        properties[2] = 0;
+        out_devices.resize(static_cast<size_t>(max_compute_units));
+        max_for_partition = max_compute_units;
+    }
+    else if (supported_properties & SupportedPartitionSchemes::Counts)
+    {
+        properties[0] = CL_DEVICE_PARTITION_BY_COUNTS;
+        properties[1] = 1;
+        properties[2] = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END;
+        out_devices.resize(static_cast<size_t>(max_sub_devices));
+        max_for_partition = max_sub_devices;
+    }
+    else
+    {
+        properties[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
+        properties[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
+        properties[2] = 0;
+    }
+
+    properties[3] = 0;
+
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clCreateSubDevices(invalid_device, properties, out_devices.size(),
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clCreateSubDevices should return "
+                               "CL_INVALID_DEVICE when: \"in_device "
+                               "is not a valid device\"",
+                               TEST_FAIL);
+    }
+
+    err = clCreateSubDevices(device, nullptr, out_devices.size(),
+                             out_devices.data(), nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
+        "specified in properties are not valid\" using a nullptr",
+        TEST_FAIL);
+
+    err =
+        clCreateSubDevices(device, properties, 0, out_devices.data(), nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clCreateSubDevices should return CL_INVALID_VALUE when: \"out_devices "
+        "is not NULL and num_devices is less than the number of sub-devices "
+        "created by the partition scheme\"",
+        TEST_FAIL);
+
+    if (supported_properties != SupportedPartitionSchemes::All_Schemes)
+    {
+        std::vector<cl_device_partition_property> invalid_properties =
+            get_invalid_properties(supported_properties
+                                   ^ SupportedPartitionSchemes::All_Schemes);
+        err =
+            clCreateSubDevices(device, invalid_properties.data(),
+                               out_devices.size(), out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_VALUE,
+            "clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
+            "specified in properties are valid but not supported by the "
+            "device\"",
+            TEST_FAIL);
+    }
+
+    if (supported_properties & SupportedPartitionSchemes::Equally)
+    {
+        properties[1] = max_compute_units;
+        err = clCreateSubDevices(device, properties, max_for_partition,
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_DEVICE_PARTITION_FAILED,
+            "clCreateSubDevices should return CL_DEVICE_PARTITION_FAILED when: "
+            "\"the partition name is supported by the implementation but "
+            "in_device could not be further partitioned\"",
+            TEST_FAIL);
+    }
+
+    constexpr cl_device_partition_property INVALID_PARTITION_PROPERTY =
+        -1; // Aribitrary Invalid number
+    properties[0] = INVALID_PARTITION_PROPERTY;
+    err = clCreateSubDevices(device, properties, out_devices.size(),
+                             out_devices.data(), nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_VALUE,
+        "clCreateSubDevices should return CL_INVALID_VALUE when: \"values "
+        "specified in properties are not valid\" using an invalid property",
+        TEST_FAIL);
+
+    if (supported_properties & SupportedPartitionSchemes::Counts)
+    {
+        properties[0] = CL_DEVICE_PARTITION_BY_COUNTS;
+        properties[1] = max_sub_devices + 1;
+        err = clCreateSubDevices(device, properties, max_sub_devices + 1,
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_DEVICE_PARTITION_COUNT,
+            "clCreateSubDevices should return "
+            "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the partition name "
+            "specified in properties is CL_DEVICE_ PARTITION_BY_COUNTS and the "
+            "number of sub-devices requested exceeds "
+            "CL_DEVICE_PARTITION_MAX_SUB_DEVICES\"",
+            TEST_FAIL);
+
+        properties[1] = -1;
+        err = clCreateSubDevices(device, properties, out_devices.size(),
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_DEVICE_PARTITION_COUNT,
+            "clCreateSubDevices should return "
+            "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the number of compute "
+            "units requested for one or more sub-devices is less than zero\"",
+            TEST_FAIL);
+    }
+
+    if (supported_properties & SupportedPartitionSchemes::Equally)
+    {
+        properties[0] = CL_DEVICE_PARTITION_EQUALLY;
+        properties[1] = max_compute_units + 1;
+        err = clCreateSubDevices(device, properties, max_compute_units + 1,
+                                 out_devices.data(), nullptr);
+        test_failure_error_ret(
+            err, CL_INVALID_DEVICE_PARTITION_COUNT,
+            "clCreateSubDevices should return "
+            "CL_INVALID_DEVICE_PARTITION_COUNT when: \"the total number of "
+            "compute units requested exceeds CL_DEVICE_MAX_COMPUTE_UNITS for "
+            "in_device\"",
+            TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clRetainDevice */
+REGISTER_TEST_VERSION(negative_retain_device, Version(1, 2))
+{
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clRetainDevice(invalid_device);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clRetainDevice should return CL_INVALID_DEVICE "
+                               "when: \"device is not "
+                               "a valid device\"",
+                               TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
+
+/* Negative Tests for clReleaseDevice */
+REGISTER_TEST_VERSION(negative_release_device, Version(1, 2))
+{
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_device : get_invalid_objects<cl_device_id>(device))
+    {
+        err = clReleaseDevice(invalid_device);
+        test_failure_error_ret(err, CL_INVALID_DEVICE,
+                               "clReleaseDevice should return "
+                               "CL_INVALID_DEVICE when: \"device is not "
+                               "a valid device\"",
+                               TEST_FAIL);
+    }
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/api/negative_enqueue_marker.cpp b/test_conformance/api/negative_enqueue_marker.cpp
new file mode 100644
index 00000000..182b3dbf
--- /dev/null
+++ b/test_conformance/api/negative_enqueue_marker.cpp
@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testBase.h"
+#include "harness/typeWrappers.h"
+
+REGISTER_TEST(negative_enqueue_marker_with_wait_list)
+{
+    cl_platform_id platform = getPlatformFromDevice(device);
+    cl_context_properties props[3] = {
+        CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platform),
+        0
+    };
+
+    cl_int err = CL_SUCCESS;
+    clContextWrapper ctx =
+        clCreateContext(props, 1, &device, nullptr, nullptr, &err);
+    test_error(err, "clCreateContext failed");
+
+    cl_event ret_event = nullptr;
+
+    err = clEnqueueMarkerWithWaitList(nullptr, 0, nullptr, &ret_event);
+    test_failure_error_ret(err, CL_INVALID_COMMAND_QUEUE,
+                           "clEnqueueMarkerWithWaitList should return "
+                           "CL_INVALID_COMMAND_QUEUE when: \"command_queue is "
+                           "not a valid host command-queue\" using a nullptr",
+                           TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+    clEventWrapper different_ctx_event = clCreateUserEvent(ctx, &err);
+    test_error(err, "clCreateUserEvent failed");
+
+    err =
+        clEnqueueMarkerWithWaitList(queue, 1, &different_ctx_event, &ret_event);
+    test_failure_error_ret(
+        err, CL_INVALID_CONTEXT,
+        "clEnqueueMarkerWithWaitList should return CL_INVALID_CONTEXT when: "
+        "\"The context of both the command queue and the events in ret_event "
+        "wait list are not the same\"",
+        TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+    err = clEnqueueMarkerWithWaitList(queue, 1, nullptr, &ret_event);
+    test_failure_error_ret(
+        err, CL_INVALID_EVENT_WAIT_LIST,
+        "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
+        "when: \"num_events_in_wait_list > 0 but event_wait_list is NULL\"",
+        TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+
+    clEventWrapper event = clCreateUserEvent(context, &err);
+    test_error(err, "clCreateUserEvent failed");
+
+    err = clEnqueueMarkerWithWaitList(queue, 0, &event, &ret_event);
+    test_failure_error_ret(
+        err, CL_INVALID_EVENT_WAIT_LIST,
+        "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
+        "when: \"num_events_in_wait_list is 0 but event_wait_list is not "
+        "NULL\"",
+        TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+    cl_event invalid_event_wait_list[] = { nullptr };
+    err = clEnqueueMarkerWithWaitList(queue, 1, invalid_event_wait_list,
+                                      &ret_event);
+    test_failure_error_ret(
+        err, CL_INVALID_EVENT_WAIT_LIST,
+        "clEnqueueMarkerWithWaitList should return CL_INVALID_EVENT_WAIT_LIST "
+        "when: \"event objects in event_wait_list are not valid events\"",
+        TEST_FAIL);
+    test_assert_error(ret_event == nullptr,
+                      "if clEnqueueMarkerWithWaitList failed, no ret_event "
+                      "should be created");
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp
index f98ec13a..6c5922c5 100644
--- a/test_conformance/api/negative_platform.cpp
+++ b/test_conformance/api/negative_platform.cpp
@@ -40,9 +40,20 @@ REGISTER_TEST(negative_get_platform_info)
 {
     cl_platform_id platform = getPlatformFromDevice(device);
 
+    cl_int err(CL_SUCCESS);
+    for (auto invalid_platform : get_invalid_objects<cl_platform_id>(device))
+    {
+        err = clGetPlatformInfo(invalid_platform, CL_PLATFORM_VERSION,
+                                sizeof(char*), nullptr, nullptr);
+        test_failure_error_ret(err, CL_INVALID_PLATFORM,
+                               "clGetPlatformInfo should return "
+                               "CL_INVALID_PLATFORM  when: \"platform "
+                               "is not a valid platform\"",
+                               TEST_FAIL);
+    }
+
     constexpr cl_platform_info INVALID_PARAM_VALUE = 0;
-    cl_int err =
-        clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
+    err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
     test_failure_error_ret(
         err, CL_INVALID_VALUE,
         "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name "
diff --git a/test_conformance/api/negative_queue.cpp b/test_conformance/api/negative_queue.cpp
index 7da68b32..8959ca0b 100644
--- a/test_conformance/api/negative_queue.cpp
+++ b/test_conformance/api/negative_queue.cpp
@@ -349,24 +349,34 @@ REGISTER_TEST_VERSION(negative_set_default_device_command_queue, Version(2, 1))
     }
 
     {
-        constexpr cl_queue_properties props[] = {
-            CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
-        };
-        clCommandQueueWrapper not_on_device_queue =
-            clCreateCommandQueueWithProperties(context, device, props, &err);
-        test_error_fail(err, "clCreateCommandQueueWithProperties failed");
-        err = clSetDefaultDeviceCommandQueue(context, device,
-                                             not_on_device_queue);
-        if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
+        cl_command_queue_properties queue_properties;
+        err =
+            clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
+                            sizeof(queue_properties), &queue_properties, NULL);
+        test_error(err, "Unable to query CL_DEVICE_QUEUE_PROPERTIES");
+        if (queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
         {
-            log_error("ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
-                      "clSetDefaultDeviceCommandQueue should return "
-                      "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
-                      "\"command_queue is not a valid command-queue for "
-                      "device\" using a command queue that is not on device",
-                      IGetErrorString(err),
-                      "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE",
-                      __FILE__, __LINE__);
+            constexpr cl_queue_properties props[] = {
+                CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
+            };
+            clCommandQueueWrapper not_on_device_queue =
+                clCreateCommandQueueWithProperties(context, device, props,
+                                                   &err);
+            test_error_fail(err, "clCreateCommandQueueWithProperties failed");
+            err = clSetDefaultDeviceCommandQueue(context, device,
+                                                 not_on_device_queue);
+            if (err != CL_INVALID_OPERATION && err != CL_INVALID_COMMAND_QUEUE)
+            {
+                log_error(
+                    "ERROR: %s! (Got %s, expected (%s) from %s:%d)\n",
+                    "clSetDefaultDeviceCommandQueue should return "
+                    "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE when: "
+                    "\"command_queue is not a valid command-queue for "
+                    "device\" using a command queue that is not on device",
+                    IGetErrorString(err),
+                    "CL_INVALID_OPERATION or CL_INVALID_COMMAND_QUEUE",
+                    __FILE__, __LINE__);
+            }
         }
     }
 
diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 29677623..fd8c3ee5 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -1526,7 +1526,7 @@ REGISTER_TEST(min_max_constant_buffer_size)
     size_t threads[1], localThreads[1];
     cl_int *constantData, *resultData;
     cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
-    int i;
+    size_t i;
     cl_event event;
     cl_int event_status;
     MTdata d;
@@ -1556,6 +1556,8 @@ REGISTER_TEST(min_max_constant_buffer_size)
 
     maxAllocSize = get_device_info_max_mem_alloc_size(
         device, MAX_DEVICE_MEMORY_SIZE_DIVISOR);
+    log_info("Reported max alloc size of %" PRIu64 " bytes.\n",
+             (uint64_t)maxAllocSize);
 
     if (maxSize > maxAllocSize) maxSize = maxAllocSize;
 
@@ -1590,7 +1592,7 @@ REGISTER_TEST(min_max_constant_buffer_size)
             return EXIT_FAILURE;
         }
 
-        for (i = 0; i < (int)(numberOfInts); i++)
+        for (i = 0; i < numberOfInts; i++)
             constantData[i] = (int)genrand_int32(d);
 
         clMemWrapper streams[3];
@@ -1678,11 +1680,11 @@ REGISTER_TEST(min_max_constant_buffer_size)
                                     sizeToAllocate, resultData, 0, NULL, NULL);
         test_error(error, "clEnqueueReadBuffer failed");
 
-        for (i = 0; i < (int)(numberOfInts); i++)
+        for (i = 0; i < numberOfInts; i++)
             if (constantData[i] != resultData[i])
             {
-                log_error("Data failed to verify: constantData[%d]=%d != "
-                          "resultData[%d]=%d\n",
+                log_error("Data failed to verify: constantData[%zu]=%d != "
+                          "resultData[%zu]=%d\n",
                           i, constantData[i], i, resultData[i]);
                 free(constantData);
                 free(resultData);
diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index 90b302b2..47b151b0 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp
@@ -487,10 +487,10 @@ compare_kernel_with_expected(cl_context context, cl_device_id device,
     int failed_tests = 0;
     clKernelWrapper kernel;
     clProgramWrapper program;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(device).c_str());
-    test_error(err, "create_single_kernel_helper_with_build_options");
+    cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                             &kernel_src, "get_kernel_arg_info",
+                                             get_build_options(device).c_str());
+    test_error(err, "create_single_kernel_helper");
     for (size_t i = 0; i < expected_args.size(); ++i)
     {
         KernelArgInfo actual;
@@ -874,11 +874,10 @@ static int test_null_param(cl_context context, cl_device_id device,
 {
     clProgramWrapper program;
     clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(device).c_str());
-    test_error_ret(err, "create_single_kernel_helper_with_build_options",
-                   TEST_FAIL);
+    cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                             &kernel_src, "get_kernel_arg_info",
+                                             get_build_options(device).c_str());
+    test_error_ret(err, "create_single_kernel_helper", TEST_FAIL);
 
     err = clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER,
                              CL_KERNEL_ARG_ADDRESS_QUALIFIER, 0, nullptr,
@@ -916,12 +915,11 @@ static int test_arg_name_size(cl_context context, cl_device_id device,
     char arg_return[sizeof(KERNEL_ARGUMENT_NAME) + 1];
     clProgramWrapper program;
     clKernelWrapper kernel;
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
-        get_build_options(device).c_str());
+    cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                             &kernel_src, "get_kernel_arg_info",
+                                             get_build_options(device).c_str());
 
-    test_error_ret(err, "create_single_kernel_helper_with_build_options",
-                   TEST_FAIL);
+    test_error_ret(err, "create_single_kernel_helper", TEST_FAIL);
 
     err =
         clGetKernelArgInfo(kernel, SINGLE_KERNEL_ARG_NUMBER, CL_KERNEL_ARG_NAME,
diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp
index 86b3595c..fadae00a 100644
--- a/test_conformance/api/test_kernel_attributes.cpp
+++ b/test_conformance/api/test_kernel_attributes.cpp
@@ -382,17 +382,18 @@ REGISTER_TEST(null_required_work_group_size)
     clMemWrapper dst;
     dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
                          nullptr, &error);
+    test_error(error, "clCreateBuffer failed");
 
     struct KernelAttribInfo
     {
-        std::string str;
-        cl_uint max_dim;
+        cl_int wgs[3];
+        cl_uint min_dim;
     };
 
     std::vector<KernelAttribInfo> attribs;
-    attribs.push_back({ "__attribute__((reqd_work_group_size(2,1,1)))", 1 });
-    attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,1)))", 2 });
-    attribs.push_back({ "__attribute__((reqd_work_group_size(2,3,4)))", 3 });
+    attribs.push_back({ { 2, 1, 1 }, 1 });
+    attribs.push_back({ { 2, 3, 1 }, 2 });
+    attribs.push_back({ { 2, 3, 4 }, 3 });
 
     const std::string body_str = R"(
         __kernel void wg_size(__global int* dst)
@@ -409,7 +410,11 @@ REGISTER_TEST(null_required_work_group_size)
 
     for (auto& attrib : attribs)
     {
-        const std::string source_str = attrib.str + body_str;
+        const std::string attrib_str = "__attribute__((reqd_work_group_size("
+            + std::to_string(attrib.wgs[0]) + ","
+            + std::to_string(attrib.wgs[1]) + ","
+            + std::to_string(attrib.wgs[2]) + ")))";
+        const std::string source_str = attrib_str + body_str;
         const char* source = source_str.c_str();
 
         clProgramWrapper program;
@@ -421,21 +426,19 @@ REGISTER_TEST(null_required_work_group_size)
         error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
         test_error(error, "clSetKernelArg failed");
 
-        for (cl_uint work_dim = 1; work_dim <= attrib.max_dim; work_dim++)
+        for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++)
         {
-            const cl_int expected[3] = { 2, work_dim >= 2 ? 3 : 1,
-                                         work_dim >= 3 ? 4 : 1 };
             const size_t test_work_group_size =
-                expected[0] * expected[1] * expected[2];
-            if ((size_t)expected[0] > device_max_work_item_sizes[0]
-                || (size_t)expected[1] > device_max_work_item_sizes[1]
-                || (size_t)expected[2] > device_max_work_item_sizes[2]
+                attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2];
+            if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0]
+                || (size_t)attrib.wgs[1] > device_max_work_item_sizes[1]
+                || (size_t)attrib.wgs[2] > device_max_work_item_sizes[2]
                 || test_work_group_size > device_max_work_group_size)
             {
                 log_info("Skipping test for work_dim = %u: required work group "
                          "size (%i, %i, %i) (total %zu) exceeds device max "
                          "work group size (%zu, %zu, %zu) (total %zu)\n",
-                         work_dim, expected[0], expected[1], expected[2],
+                         work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
                          test_work_group_size, device_max_work_item_sizes[0],
                          device_max_work_item_sizes[1],
                          device_max_work_item_sizes[2],
@@ -444,8 +447,10 @@ REGISTER_TEST(null_required_work_group_size)
             }
 
             const cl_int zero = 0;
-            error = clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
-                                        sizeof(expected), 0, nullptr, nullptr);
+            error =
+                clEnqueueFillBuffer(queue, dst, &zero, sizeof(zero), 0,
+                                    sizeof(attrib.wgs), 0, nullptr, nullptr);
+            test_error(error, "clEnqueueFillBuffer failed");
 
             const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
             error = clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr,
@@ -458,12 +463,12 @@ REGISTER_TEST(null_required_work_group_size)
                                         results, 0, nullptr, nullptr);
             test_error(error, "clEnqueueReadBuffer failed");
 
-            if (results[0] != expected[0] || results[1] != expected[1]
-                || results[2] != expected[2])
+            if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1]
+                || results[2] != attrib.wgs[2])
             {
                 log_error("Executed local size mismatch with work_dim = %u: "
                           "Expected (%d,%d,%d) got (%d,%d,%d)\n",
-                          work_dim, expected[0], expected[1], expected[2],
+                          work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
                           results[0], results[1], results[2]);
                 return TEST_FAIL;
             }
@@ -477,15 +482,15 @@ REGISTER_TEST(null_required_work_group_size)
                 test_error(error,
                            "clGetKernelSuggestedLocalWorkSizeKHR failed");
 
-                if ((cl_int)suggested[0] != expected[0]
-                    || (cl_int)suggested[1] != expected[1]
-                    || (cl_int)suggested[2] != expected[2])
+                if (suggested[0] != (size_t)attrib.wgs[0]
+                    || suggested[1] != (size_t)attrib.wgs[1]
+                    || suggested[2] != (size_t)attrib.wgs[2])
                 {
                     log_error("Suggested local size mismatch with work_dim = "
-                              "%u: Expected (%d,%d,%d) got (%d,%d,%d)\n",
-                              work_dim, expected[0], expected[1], expected[2],
-                              (cl_int)suggested[0], (cl_int)suggested[1],
-                              (cl_int)suggested[2]);
+                              "%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n",
+                              work_dim, attrib.wgs[0], attrib.wgs[1],
+                              attrib.wgs[2], suggested[0], suggested[1],
+                              suggested[2]);
                     return TEST_FAIL;
                 }
             }
diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp
index 47100b7a..d0dc116f 100644
--- a/test_conformance/api/test_kernels.cpp
+++ b/test_conformance/api/test_kernels.cpp
@@ -982,6 +982,12 @@ REGISTER_TEST(negative_invalid_arg_index)
 
 REGISTER_TEST(negative_invalid_arg_size_local)
 {
+    if (true)
+    {
+        log_info("Disabling this test temporarily, see internal issue 374.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
     cl_int error = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper local_arg_kernel;
diff --git a/test_conformance/api/test_queue_hint.cpp b/test_conformance/api/test_queue_hint.cpp
index 89769d7e..21df4341 100644
--- a/test_conformance/api/test_queue_hint.cpp
+++ b/test_conformance/api/test_queue_hint.cpp
@@ -86,7 +86,8 @@ REGISTER_TEST(queue_hint)
     clProgramWrapper program;
     clKernelWrapper kernel;
 
-    err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, queue_hint_test_kernel, "vec_cpy", NULL);
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      queue_hint_test_kernel, "vec_cpy");
     if (err != 0)
     {
         return err;
diff --git a/test_conformance/api/test_spirv_queries.cpp b/test_conformance/api/test_spirv_queries.cpp
index 720f73b1..d1536e0b 100644
--- a/test_conformance/api/test_spirv_queries.cpp
+++ b/test_conformance/api/test_spirv_queries.cpp
@@ -757,7 +757,7 @@ REGISTER_TEST(spirv_query_dependencies)
         }
         for (const auto& extension_dep : it->second.extensions)
         {
-            log_error("Checked for SPIR-V extension %s.n",
+            log_error("Checked for SPIR-V extension %s.\n",
                       extension_dep.c_str());
         }
         return TEST_FAIL;
diff --git a/test_conformance/basic/test_arrayimagecopy.cpp b/test_conformance/basic/test_arrayimagecopy.cpp
index bb44abff..16b24390 100644
--- a/test_conformance/basic/test_arrayimagecopy.cpp
+++ b/test_conformance/basic/test_arrayimagecopy.cpp
@@ -188,9 +188,17 @@ REGISTER_TEST(arrayimagecopy)
 {
     PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-    return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
-                                 CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
-                                 test_arrayimagecopy_single_format);
+    int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
+                                      CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
+                                      test_arrayimagecopy_single_format);
+    if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
+    {
+        error |= test_arrayimagecommon(
+            device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+            CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
+            test_arrayimagecopy_single_format);
+    }
+    return error;
 }
 
 
@@ -198,7 +206,15 @@ REGISTER_TEST(arrayimagecopy3d)
 {
     PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
 
-    return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
-                                 CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
-                                 test_arrayimagecopy_single_format);
+    int error = test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
+                                      CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
+                                      test_arrayimagecopy_single_format);
+    if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
+    {
+        error |= test_arrayimagecommon(
+            device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+            CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
+            test_arrayimagecopy_single_format);
+    }
+    return error;
 }
diff --git a/test_conformance/basic/test_arrayreadwrite.cpp b/test_conformance/basic/test_arrayreadwrite.cpp
index fe4bb995..4b0555fd 100644
--- a/test_conformance/basic/test_arrayreadwrite.cpp
+++ b/test_conformance/basic/test_arrayreadwrite.cpp
@@ -72,16 +72,36 @@ static int test_arrayreadwrite_impl(cl_device_id device, cl_context context,
         err = clEnqueueWriteBuffer(
             queue, buffer, CL_TRUE, offset * sizeof(cl_uint),
             sizeof(cl_uint) * cb, &reference_vals[offset], 0, nullptr, nullptr);
-        test_error(err, "clEnqueueWriteBuffer failed");
+        if (flags & CL_MEM_IMMUTABLE_EXT)
+        {
+            test_failure_error_ret(err, CL_INVALID_OPERATION,
+                                   "clEnqueueWriteBuffer is expected to fail "
+                                   "with CL_INVALID_OPERATION when the buffer "
+                                   "is created with CL_MEM_IMMUTABLE_EXT",
+                                   TEST_FAIL);
+        }
+        else
+        {
+            test_error(err, "clEnqueueWriteBuffer failed");
+        }
 
         err = clEnqueueReadBuffer(
             queue, buffer, CL_TRUE, offset * sizeof(cl_uint),
             cb * sizeof(cl_uint), &outptr[offset], 0, nullptr, nullptr);
         test_error(err, "clEnqueueReadBuffer failed");
 
+        const cl_uint* expected_buffer_values = nullptr;
+        if (flags & CL_MEM_IMMUTABLE_EXT)
+        {
+            expected_buffer_values = inptr.data();
+        }
+        else
+        {
+            expected_buffer_values = reference_vals.data();
+        }
         for (int j = offset; j < offset + cb; j++)
         {
-            if (reference_vals[j] != outptr[j])
+            if (expected_buffer_values[j] != outptr[j])
             {
                 log_error("ARRAY read, write test failed\n");
                 err = -1;
@@ -105,3 +125,11 @@ REGISTER_TEST(arrayreadwrite)
     return test_arrayreadwrite_impl(device, context, queue, num_elements,
                                     CL_MEM_READ_WRITE);
 }
+
+REGISTER_TEST(immutable_arrayreadwrite)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    return test_arrayreadwrite_impl(device, context, queue, num_elements,
+                                    CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR);
+}
diff --git a/test_conformance/basic/test_bufferreadwriterect.cpp b/test_conformance/basic/test_bufferreadwriterect.cpp
index 883bff7c..03ba2706 100644
--- a/test_conformance/basic/test_bufferreadwriterect.cpp
+++ b/test_conformance/basic/test_bufferreadwriterect.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "harness/compat.h"
+#include "errorHelpers.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -194,6 +195,43 @@ int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, si
     return 0;
 }
 
+int immutable_copy_region(size_t src, size_t soffset[3], size_t sregion[3],
+                          size_t dst, size_t doffset[3], size_t dregion[3])
+{
+
+    // Copy between cl buffers.
+    size_t src_slice_pitch =
+        (width[src] * height[src] != 1) ? width[src] * height[src] : 0;
+    size_t dst_slice_pitch =
+        (width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0;
+    size_t src_row_pitch = width[src];
+
+    cl_int err;
+    if (check_overlap_rect(soffset, doffset, sregion, src_row_pitch,
+                           src_slice_pitch))
+    {
+        log_info("Copy overlap reported, skipping copy buffer rect\n");
+        return CL_SUCCESS;
+    }
+    else
+    {
+        err = clEnqueueCopyBufferRect(gQueue, buffer[src], buffer[dst], soffset,
+                                      doffset, sregion, /*dregion,*/
+                                      width[src], src_slice_pitch, width[dst],
+                                      dst_slice_pitch, 0, nullptr, nullptr);
+        if (err != CL_INVALID_OPERATION)
+        {
+            log_error(
+                "clEnqueueCopyBufferRect should return "
+                "CL_INVALID_OPERATION but returned %s between %zu and %zu",
+                IGetErrorString(err), src, dst);
+            return TEST_FAIL;
+        }
+    }
+
+    return TEST_PASS;
+}
+
 // This function compares the destination region in the buffer pointed
 // to by device, to the source region of the specified verify buffer.
 int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
@@ -337,6 +375,32 @@ int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, s
     return 0;
 }
 
+int immutable_write_region(size_t src, size_t soffset[3], size_t sregion[3],
+                           size_t dst, size_t doffset[3], size_t dregion[3])
+{
+    initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
+
+    size_t src_slice_pitch =
+        (width[src] * height[src] != 1) ? width[src] * height[src] : 0;
+    size_t dst_slice_pitch =
+        (width[dst] * height[dst] != 1) ? width[dst] * height[dst] : 0;
+
+    cl_int error = clEnqueueWriteBufferRect(
+        gQueue, buffer[dst], CL_TRUE, doffset, soffset, dregion, width[dst],
+        dst_slice_pitch, width[src], src_slice_pitch, tmp_buffer, 0, nullptr,
+        nullptr);
+
+    if (error != CL_INVALID_OPERATION)
+    {
+        log_error("clEnqueueWriteBufferRect should return CL_INVALID_OPERATION "
+                  "but retured %s between %zu and %zu",
+                  IGetErrorString(error), src, dst);
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
 void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
 {
     free( data );
@@ -591,3 +655,16 @@ REGISTER_TEST(bufferreadwriterect)
         device, context, queue, num_elements,
         CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, test_functions);
 }
+
+REGISTER_TEST(immutable_bufferreadwriterect)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    TestFunctions test_functions;
+    test_functions.copy = immutable_copy_region;
+    test_functions.read = read_verify_region;
+    test_functions.write = immutable_write_region;
+    return test_bufferreadwriterect_impl(
+        device, context, queue, num_elements,
+        CL_MEM_USE_HOST_PTR | CL_MEM_IMMUTABLE_EXT, test_functions);
+}
diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index 47123120..685982d1 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp
@@ -103,11 +103,11 @@ REGISTER_TEST_VERSION(enqueued_local_size, Version(2, 0))
 
     std::string cl_std = "-cl-std=CL";
     cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
-    err = create_single_kernel_helper_with_build_options(
+    err = create_single_kernel_helper(
         context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
         "test_enqueued_local_size_1d", cl_std.c_str());
     test_error(err, "create_single_kernel_helper failed");
-    err = create_single_kernel_helper_with_build_options(
+    err = create_single_kernel_helper(
         context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
         "test_enqueued_local_size_2d", cl_std.c_str());
     test_error(err, "create_single_kernel_helper failed");
diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp
index a400c460..d0ce67a1 100644
--- a/test_conformance/basic/test_imagearraycopy.cpp
+++ b/test_conformance/basic/test_imagearraycopy.cpp
@@ -27,6 +27,82 @@ using test_function_t = int (*)(cl_device_id, cl_context, cl_command_queue,
                                 cl_mem_flags, cl_mem_flags, cl_mem_object_type,
                                 const cl_image_format *);
 
+static int test_negative_imagearraycopy_single_format(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    cl_mem_flags image_flags, cl_mem_flags buffer_flags,
+    cl_mem_object_type image_type, const cl_image_format *format)
+{
+    std::unique_ptr<cl_uchar, decltype(&free)> bufptr{ nullptr, free },
+        imgptr{ nullptr, free };
+    clMemWrapper image;
+    clMemWrapper buffer;
+    const size_t img_width = 512;
+    const size_t img_height = 512;
+    const size_t img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
+    size_t elem_size;
+    size_t buffer_size;
+    cl_int err;
+    RandomSeed seed(gRandomSeed);
+
+    const size_t origin[3] = { 0, 0, 0 },
+                 region[3] = { img_width, img_height, img_depth };
+
+    log_info("Testing %s %s\n",
+             GetChannelOrderName(format->image_channel_order),
+             GetChannelTypeName(format->image_channel_data_type));
+
+    elem_size = get_pixel_size(format);
+    buffer_size =
+        sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+    if (image_flags & CL_MEM_USE_HOST_PTR || image_flags & CL_MEM_COPY_HOST_PTR)
+    {
+        imgptr.reset(static_cast<cl_uchar *>(
+            create_random_data(kUChar, seed, buffer_size)));
+    }
+
+    bufptr.reset(
+        static_cast<cl_uchar *>(create_random_data(kUChar, seed, buffer_size)));
+
+    if (CL_MEM_OBJECT_IMAGE2D == image_type)
+    {
+        image = create_image_2d(context, image_flags, format, img_width,
+                                img_height, 0, imgptr.get(), &err);
+    }
+    else
+    {
+        image =
+            create_image_3d(context, image_flags, format, img_width, img_height,
+                            img_depth, 0, 0, imgptr.get(), &err);
+    }
+    test_error(err, "create_image_xd failed");
+
+    if (!(image_flags & CL_MEM_USE_HOST_PTR
+          || image_flags & CL_MEM_COPY_HOST_PTR))
+    {
+        imgptr.reset(static_cast<cl_uchar *>(
+            create_random_data(kUChar, seed, buffer_size)));
+
+        err = clEnqueueWriteImage(queue, image, CL_TRUE, origin, region, 0, 0,
+                                  imgptr.get(), 0, nullptr, nullptr);
+        test_error(err, "clEnqueueWriteImage failed");
+    }
+
+    buffer =
+        clCreateBuffer(context, buffer_flags, buffer_size, bufptr.get(), &err);
+    test_error(err, "clCreateBuffer failed");
+
+    err = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 0, 0,
+                                     nullptr, nullptr);
+    test_failure_error_ret(
+        err, CL_INVALID_OPERATION,
+        "clEnqueueCopyImageToBuffer should return CL_INVALID_OPERATION when: "
+        "\" dst_buffer is created with CL_MEM_IMMUTABLE_EXT flag\"",
+        TEST_FAIL);
+
+    return TEST_PASS;
+}
+
 static int test_imagearraycopy_single_format(
     cl_device_id device, cl_context context, cl_command_queue queue,
     cl_mem_flags image_flags, cl_mem_flags buffer_flags,
@@ -188,9 +264,18 @@ REGISTER_TEST(imagearraycopy)
 {
     PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-    return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
-                                 CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
-                                 test_imagearraycopy_single_format);
+    int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
+                                      CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
+                                      test_imagearraycopy_single_format);
+
+    if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
+    {
+        error |= test_imagearraycommon(
+            device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+            CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D,
+            test_imagearraycopy_single_format);
+    }
+    return error;
 }
 
 
@@ -198,7 +283,38 @@ REGISTER_TEST(imagearraycopy3d)
 {
     PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
 
-    return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
-                                 CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
-                                 test_imagearraycopy_single_format);
+    int error = test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
+                                      CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D,
+                                      test_imagearraycopy_single_format);
+
+    if (is_extension_available(device, "cl_ext_immutable_memory_objects"))
+    {
+        error |= test_imagearraycommon(
+            device, context, queue, CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+            CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D,
+            test_imagearraycopy_single_format);
+    }
+    return error;
+}
+
+REGISTER_TEST(negative_imagearraycopy)
+{
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(device);
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
+                                 CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+                                 CL_MEM_OBJECT_IMAGE2D,
+                                 test_negative_imagearraycopy_single_format);
+}
+
+REGISTER_TEST(negative_imagearraycopy3d)
+{
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device);
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
+                                 CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+                                 CL_MEM_OBJECT_IMAGE3D,
+                                 test_negative_imagearraycopy_single_format);
 }
diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index ac0a84f9..ecd2d9bf 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp
@@ -50,8 +50,8 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty
     {
         sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
     }
-    cl_int err = create_single_kernel_helper_with_build_options(
-        context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
+    cl_int err = create_single_kernel_helper(context, &p, &k, 4,
+                                             sizeof_kernel_code, "test_sizeof");
     test_error(err, "Failed to build kernel/program.");
 
     m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
diff --git a/test_conformance/buffers/main.cpp b/test_conformance/buffers/main.cpp
index f2a8c2a3..496d3b7d 100644
--- a/test_conformance/buffers/main.cpp
+++ b/test_conformance/buffers/main.cpp
@@ -19,19 +19,24 @@
 
 #include "testBase.h"
 
-const cl_mem_flags flag_set[] = {
-    CL_MEM_ALLOC_HOST_PTR,
-    CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
-    CL_MEM_USE_HOST_PTR,
-    CL_MEM_COPY_HOST_PTR,
-    0
-};
+const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR,
+                                  CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+                                  CL_MEM_USE_HOST_PTR,
+                                  CL_MEM_COPY_HOST_PTR,
+                                  0,
+                                  CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR,
+                                  CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR,
+                                  CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR
+                                      | CL_MEM_ALLOC_HOST_PTR };
 const char* flag_set_names[] = {
     "CL_MEM_ALLOC_HOST_PTR",
     "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
     "CL_MEM_USE_HOST_PTR",
     "CL_MEM_COPY_HOST_PTR",
-    "0"
+    "0",
+    "CL_MEM_IMMUTABLE_EXT | CL_MEM_USE_HOST_PTR",
+    "CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR",
+    "CL_MEM_IMMUTABLE_EXT | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR",
 };
 
 int main( int argc, const char *argv[] )
diff --git a/test_conformance/buffers/testBase.h b/test_conformance/buffers/testBase.h
index 8c5bb0e4..4cd17155 100644
--- a/test_conformance/buffers/testBase.h
+++ b/test_conformance/buffers/testBase.h
@@ -25,6 +25,6 @@
 extern const cl_mem_flags flag_set[];
 extern const char* flag_set_names[];
 
-#define NUM_FLAGS 5
+#define NUM_FLAGS 8
 
 #endif // _testBase_h
diff --git a/test_conformance/buffers/test_buffer_copy.cpp b/test_conformance/buffers/test_buffer_copy.cpp
index 81dbd5cf..cba2c626 100644
--- a/test_conformance/buffers/test_buffer_copy.cpp
+++ b/test_conformance/buffers/test_buffer_copy.cpp
@@ -39,7 +39,8 @@ static int verify_copy_buffer(int *inptr, int *outptr, int n)
 
 using alignedOwningPtr = std::unique_ptr<cl_int[], decltype(&align_free)>;
 
-static int test_copy( cl_command_queue queue, cl_context context, int num_elements, MTdata d )
+static int test_copy(cl_device_id device, cl_command_queue queue,
+                     cl_context context, int num_elements, MTdata d)
 {
     clMemWrapper buffers[2];
     cl_int err = CL_SUCCESS;
@@ -76,10 +77,19 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
         return TEST_FAIL;
     }
 
+    const bool has_immutable_memory_extension =
+        is_extension_available(device, "cl_ext_immutable_memory_objects");
+
     for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
     {
         for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
         {
+            if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                 || (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
+                && !has_immutable_memory_extension)
+            {
+                continue;
+            }
             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
 
             for (int i = 0; i < num_elements; i++)
@@ -89,7 +99,6 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
                 reference_ptr[i] = (int)genrand_int32(d);
             }
 
-
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
                                             sizeof(cl_int) * num_elements,
@@ -116,7 +125,9 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
                 return TEST_FAIL;
             }
 
-            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
+            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)
+                && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+            {
                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
                                            sizeof(cl_int) * num_elements,
                                            reference_ptr.get(), 0, nullptr,
@@ -130,11 +141,44 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
             err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0,
                                       sizeof(cl_int) * num_elements, 0, nullptr,
                                       nullptr);
-            if ( err != CL_SUCCESS ){
+            if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
+            {
+                if (err != CL_INVALID_OPERATION)
+                {
+                    test_failure_error_ret(err, CL_INVALID_OPERATION,
+                                           "clEnqueueCopyBuffer should return "
+                                           "CL_INVALID_OPERATION when: "
+                                           "\"dst_buffer is created with "
+                                           "CL_MEM_IMMUTABLE_EXT flag\"",
+                                           TEST_FAIL);
+                    return TEST_FAIL;
+                }
+            }
+            else if (err != CL_SUCCESS)
+            {
                 print_error(err, "clCopyArray failed\n");
                 return TEST_FAIL;
             }
 
+            err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
+                                      sizeof(int) * num_elements, out_ptr.get(),
+                                      0, nullptr, nullptr);
+            if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
+                                   num_elements))
+            {
+                log_error("test failed\n");
+                return TEST_FAIL;
+            }
+            else
+            {
+                log_info("test passed\n");
+            }
+
+            // Reset out_ptr
+            for (int i = 0; i < num_elements; i++)
+            {
+                out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data
+            }
             err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
                                       sizeof(int) * num_elements, out_ptr.get(),
                                       0, nullptr, nullptr);
@@ -143,14 +187,20 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
                 return TEST_FAIL;
             }
 
-            if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
-                                   num_elements))
+            int *target_buffer = reference_ptr.get();
+            if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
             {
-                log_error( " test failed\n" );
+                target_buffer = invalid_ptr.get();
+            }
+
+            if (verify_copy_buffer(target_buffer, out_ptr.get(), num_elements))
+            {
+                log_error("test failed\n");
                 return TEST_FAIL;
             }
-            else{
-                log_info( " test passed\n" );
+            else
+            {
+                log_info("test passed\n");
             }
         } // dst flags
     } // src flags
@@ -160,7 +210,10 @@ static int test_copy( cl_command_queue queue, cl_context context, int num_elemen
 }   // end test_copy()
 
 
-static int testPartialCopy( cl_command_queue queue, cl_context context, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
+static int testPartialCopy(cl_device_id device, cl_command_queue queue,
+                           cl_context context, int num_elements,
+                           cl_uint srcStart, cl_uint dstStart, int size,
+                           MTdata d)
 {
     clMemWrapper buffers[2];
     cl_int err = CL_SUCCESS;
@@ -197,10 +250,19 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
         return TEST_FAIL;
     }
 
+    const bool has_immutable_memory_extension =
+        is_extension_available(device, "cl_ext_immutable_memory_objects");
+
     for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
     {
         for (int dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
         {
+            if (((flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                 || (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
+                && !has_immutable_memory_extension)
+            {
+                continue;
+            }
             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
 
             for (int i = 0; i < num_elements; i++)
@@ -236,7 +298,9 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
                 return TEST_FAIL;
             }
 
-            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)){
+            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)
+                && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+            {
                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0,
                                            sizeof(cl_int) * num_elements,
                                            reference_ptr.get(), 0, nullptr,
@@ -251,27 +315,72 @@ static int testPartialCopy( cl_command_queue queue, cl_context context, int num_
                 queue, buffers[0], buffers[1], srcStart * sizeof(cl_int),
                 dstStart * sizeof(cl_int), sizeof(cl_int) * size, 0, nullptr,
                 nullptr);
-            if ( err != CL_SUCCESS){
-                print_error(err, "clEnqueueCopyBuffer failed\n");
+            if ((flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT))
+            {
+                if (err != CL_INVALID_OPERATION)
+                {
+                    test_failure_error_ret(err, CL_INVALID_OPERATION,
+                                           "clEnqueueCopyBuffer should return "
+                                           "CL_INVALID_OPERATION when: "
+                                           "\"dst_buffer is created with "
+                                           "CL_MEM_IMMUTABLE_EXT flag\"",
+                                           TEST_FAIL);
+                }
+            }
+            else if (err != CL_SUCCESS)
+            {
+                print_error(err, "clCopyArray failed\n");
                 return TEST_FAIL;
             }
 
+            err = clEnqueueReadBuffer(queue, buffers[0], true, 0,
+                                      sizeof(int) * num_elements, out_ptr.get(),
+                                      0, nullptr, nullptr);
+            if (err != CL_SUCCESS)
+            {
+                print_error(err, "clEnqueueReadBuffer failed\n");
+                return TEST_FAIL;
+            }
+            if (verify_copy_buffer(reference_ptr.get(), out_ptr.get(),
+                                   num_elements))
+            {
+                log_error("test failed\n");
+                return TEST_FAIL;
+            }
+            else
+            {
+                log_info("test passed\n");
+            }
+
+            // Reset out_ptr
+            for (int i = 0; i < num_elements; i++)
+            {
+                out_ptr[i] = (int)0xdeadbeef; // seed with incorrect data
+            }
             err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
                                       sizeof(int) * num_elements, out_ptr.get(),
                                       0, nullptr, nullptr);
-            if ( err != CL_SUCCESS){
+            if (err != CL_SUCCESS)
+            {
                 print_error(err, "clEnqueueReadBuffer failed\n");
                 return TEST_FAIL;
             }
 
-            if (verify_copy_buffer(reference_ptr.get() + srcStart,
-                                   out_ptr.get() + dstStart, size))
+            cl_int *target_buffer = reference_ptr.get() + srcStart;
+            if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
             {
-                log_error("buffer_COPY test failed\n");
+                target_buffer = invalid_ptr.get();
+            }
+
+            if (verify_copy_buffer(target_buffer, out_ptr.get() + dstStart,
+                                   size))
+            {
+                log_error("test failed\n");
                 return TEST_FAIL;
             }
-            else{
-                log_info("buffer_COPY test passed\n");
+            else
+            {
+                log_info("test passed\n");
             }
         } // dst mem flags
     } // src mem flags
@@ -289,7 +398,7 @@ REGISTER_TEST(buffer_copy)
 
     // test the preset size
     log_info( "set size: %d: ", num_elements );
-    if (test_copy(queue, context, num_elements, d) != TEST_PASS)
+    if (test_copy(device, queue, context, num_elements, d) != TEST_PASS)
     {
         err++;
     }
@@ -298,7 +407,7 @@ REGISTER_TEST(buffer_copy)
     for ( i = 0; i < 8; i++ ){
         size = (int)get_random_float(2.f,131072.f, d);
         log_info( "random size: %d: ", size );
-        if (test_copy(queue, context, size, d) != TEST_PASS)
+        if (test_copy(device, queue, context, size, d) != TEST_PASS)
         {
             err++;
         }
@@ -324,8 +433,8 @@ REGISTER_TEST(buffer_partial_copy)
         size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
         dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
         log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
-        if (testPartialCopy(queue, context, num_elements, srcStart, dstStart,
-                            size, d)
+        if (testPartialCopy(device, queue, context, num_elements, srcStart,
+                            dstStart, size, d)
             != TEST_PASS)
         {
             err++;
diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp
index 2e7a22de..d8fa7654 100644
--- a/test_conformance/buffers/test_buffer_fill.cpp
+++ b/test_conformance/buffers/test_buffer_fill.cpp
@@ -598,6 +598,12 @@ static int test_buffer_fill(cl_device_id deviceID, cl_context context,
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clEventWrapper event[2];
             clMemWrapper buffers[2];
             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
@@ -721,6 +727,12 @@ REGISTER_TEST(buffer_fill_struct)
 
     for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
     {
+        // Skip immutable memory flags
+        if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+        {
+            continue;
+        }
+
         clProgramWrapper program;
         clKernelWrapper kernel;
         log_info("Testing with cl_mem_flags: %s\n",
diff --git a/test_conformance/buffers/test_buffer_map.cpp b/test_conformance/buffers/test_buffer_map.cpp
index 5cac90ab..3299902c 100644
--- a/test_conformance/buffers/test_buffer_map.cpp
+++ b/test_conformance/buffers/test_buffer_map.cpp
@@ -592,6 +592,12 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clMemWrapper buffer;
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
@@ -671,6 +677,101 @@ static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_c
 
 }   // end test_buffer_map_read()
 
+int test_immutable_buffer_map(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements,
+                              size_t size, const char *type, int loops)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    void *outptr[5];
+    cl_int err;
+    int i;
+    size_t ptrSizes[5];
+    int total_errors = 0;
+    MTdataHolder mtdata(gRandomSeed);
+
+    size_t min_alignment = get_min_alignment(context);
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // embedded devices don't support long/ulong so skip over
+    if (!gHasLong && strstr(type, "long")) return TEST_SKIPPED_ITSELF;
+
+    for (i = 0; i < loops; i++)
+    {
+        for (int src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
+        {
+            // Testing writing from immutable flags
+            if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT))
+            {
+                continue;
+            }
+
+            clMemWrapper buffer;
+            outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            if (!outptr[i])
+            {
+                log_error(" unable to allocate %d bytes of memory\n",
+                          (int)ptrSizes[i] * num_elements);
+                return TEST_FAIL;
+            }
+            generate_random_data(kUChar, ptrSizes[i] * num_elements, mtdata,
+                                 outptr[i]);
+
+            buffer =
+                clCreateBuffer(context, flag_set[src_flag_id],
+                               ptrSizes[i] * num_elements, outptr[i], &err);
+
+            if (nullptr == buffer || CL_SUCCESS != err)
+            {
+                print_error(err, "clCreateBuffer failed\n");
+                align_free(outptr[i]);
+                return TEST_FAIL;
+            }
+
+            void *mappedPtr = clEnqueueMapBuffer(
+                queue, buffer, CL_TRUE, CL_MAP_READ, 0,
+                ptrSizes[i] * num_elements, 0, nullptr, nullptr, &err);
+            if (err != CL_SUCCESS)
+            {
+                print_error(err, "clEnqueueMapBuffer failed");
+                align_free(outptr[i]);
+                return TEST_FAIL;
+            }
+
+            if (memcmp(mappedPtr, outptr[i], ptrSizes[i] * num_elements) != 0)
+            {
+                log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
+                          1 << i, flag_set_names[src_flag_id]);
+                total_errors++;
+            }
+            else
+            {
+                log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
+                         1 << i, flag_set_names[src_flag_id]);
+            }
+
+            err = clEnqueueUnmapMemObject(queue, buffer, mappedPtr, 0, nullptr,
+                                          nullptr);
+            test_error(err, "clEnqueueUnmapMemObject failed");
+
+            // If we are using the outptr[i] as backing via USE_HOST_PTR we need
+            // to make sure we are done before freeing.
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR))
+            {
+                err = clFinish(queue);
+                test_error(err, "clFinish failed");
+            }
+            align_free(outptr[i]);
+        }
+    } // cl_mem_flags
+
+    return total_errors > 0 ? TEST_FAIL : TEST_PASS;
+}
 
 #define DECLARE_LOCK_TEST(type, realType)                                      \
     REGISTER_TEST(buffer_map_read_##type)                                      \
@@ -691,6 +792,28 @@ DECLARE_LOCK_TEST(char, cl_char)
 DECLARE_LOCK_TEST(uchar, cl_uchar)
 DECLARE_LOCK_TEST(float, cl_float)
 
+#undef DECLARE_LOCK_TEST
+
+#define DECLARE_LOCK_TEST(type, realType)                                      \
+    REGISTER_TEST(immutable_buffer_map_##type)                                 \
+    {                                                                          \
+        return test_immutable_buffer_map(device, context, queue, num_elements, \
+                                         sizeof(realType), #type, 5);          \
+    }
+
+DECLARE_LOCK_TEST(int, cl_int)
+DECLARE_LOCK_TEST(uint, cl_uint)
+DECLARE_LOCK_TEST(long, cl_long)
+DECLARE_LOCK_TEST(ulong, cl_ulong)
+DECLARE_LOCK_TEST(short, cl_short)
+DECLARE_LOCK_TEST(ushort, cl_ushort)
+DECLARE_LOCK_TEST(char, cl_char)
+DECLARE_LOCK_TEST(uchar, cl_uchar)
+DECLARE_LOCK_TEST(float, cl_float)
+
+#undef DECLARE_LOCK_TEST
+
+
 REGISTER_TEST(buffer_map_read_struct)
 {
     int (*foo)(void *,int);
diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp
index dbf39ab4..814dee45 100644
--- a/test_conformance/buffers/test_buffer_read.cpp
+++ b/test_conformance/buffers/test_buffer_read.cpp
@@ -666,6 +666,12 @@ static int test_buffer_read(cl_device_id deviceID, cl_context context,
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clMemWrapper buffer;
             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
@@ -809,6 +815,12 @@ static int test_buffer_read_async(cl_device_id deviceID, cl_context context,
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clMemWrapper buffer;
             clEventWrapper event;
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -946,6 +958,12 @@ static int test_buffer_read_array_barrier(
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
+
             clMemWrapper buffer;
             clEventWrapper event;
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
diff --git a/test_conformance/buffers/test_buffer_write.cpp b/test_conformance/buffers/test_buffer_write.cpp
index 36dcc963..7c92dfd9 100644
--- a/test_conformance/buffers/test_buffer_write.cpp
+++ b/test_conformance/buffers/test_buffer_write.cpp
@@ -660,8 +660,18 @@ static int test_buffer_write(cl_device_id deviceID, cl_context context,
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
             for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
             {
+                // Skip immutable memory flags
+                if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                {
+                    continue;
+                }
                 clMemWrapper buffers[2];
 
                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
@@ -834,8 +844,19 @@ REGISTER_TEST(buffer_write_struct)
 
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
+            // Skip immutable memory flags
+            if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
             for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
             {
+                // Skip immutable memory flags
+                if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                {
+                    continue;
+                }
+
                 clMemWrapper buffers[2];
 
                 inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
@@ -996,7 +1017,17 @@ static int test_buffer_write_array_async(
     ptrSizes[4] = ptrSizes[3] << 1;
 
     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        // Skip immutable memory flags
+        if (flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT)
+        {
+            continue;
+        }
         for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            // Skip immutable memory flags
+            if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
+            {
+                continue;
+            }
             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
 
             loops = ( loops < 5 ? loops : 5 );
@@ -1974,3 +2005,256 @@ REGISTER_TEST(buffer_write_async_ulong)
 
 }   // end test_buffer_ulong_write_array_async()
 
+
+int immutable_test_buffer_write(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements,
+                                size_t size, const char *type, int loops,
+                                void *inptr[5], const char *kernelCode[],
+                                const char *kernelName[],
+                                int (*fn)(void *, void *, int), MTdataHolder &d)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    void *outptr[5];
+    clProgramWrapper program[5];
+    clKernelWrapper kernel[5];
+    size_t ptrSizes[5];
+    size_t global_work_size[3];
+    cl_int err;
+    int i;
+    int src_flag_id, dst_flag_id;
+    int total_errors = 0;
+
+    size_t min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (size_t)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    loops = (loops < 5 ? loops : 5);
+    for (i = 0; i < loops; i++)
+    {
+        err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
+                                          &kernelCode[i], kernelName[i]);
+        if (err)
+        {
+            log_error(" Error creating program for %s\n", type);
+            return TEST_FAIL;
+        }
+
+        for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
+        {
+            // Testing writing from immutable flags
+            if (!(flag_set[src_flag_id] & CL_MEM_IMMUTABLE_EXT))
+            {
+                continue;
+            }
+            for (dst_flag_id = 0; dst_flag_id < NUM_FLAGS; dst_flag_id++)
+            {
+                // Skip immutable memory flags
+                if (flag_set[dst_flag_id] & CL_MEM_IMMUTABLE_EXT)
+                {
+                    continue;
+                }
+
+                cl_mem_flags src_mem_flags = flag_set[src_flag_id];
+                cl_mem_flags dst_mem_flags = flag_set[dst_flag_id];
+                clMemWrapper buffers[2];
+
+                buffers[0] =
+                    clCreateBuffer(context, src_mem_flags,
+                                   ptrSizes[i] * num_elements, inptr[i], &err);
+
+                if (nullptr == buffers[0] || CL_SUCCESS != err)
+                {
+                    align_free(outptr[i]);
+                    print_error(err, " clCreateBuffer failed\n");
+                    return TEST_FAIL;
+                }
+                if (!strcmp(type, "half"))
+                {
+                    outptr[i] = align_malloc(ptrSizes[i] * (num_elements * 2),
+                                             min_alignment);
+                    buffers[1] = clCreateBuffer(context, dst_mem_flags,
+                                                ptrSizes[i] * 2 * num_elements,
+                                                outptr[i], &err);
+                }
+                else
+                {
+                    outptr[i] =
+                        align_malloc(ptrSizes[i] * num_elements, min_alignment);
+                    if ((dst_mem_flags & CL_MEM_USE_HOST_PTR)
+                        || (dst_mem_flags & CL_MEM_COPY_HOST_PTR))
+                        buffers[1] = clCreateBuffer(context, dst_mem_flags,
+                                                    ptrSizes[i] * num_elements,
+                                                    outptr[i], &err);
+                    else
+                        buffers[1] = clCreateBuffer(context, dst_mem_flags,
+                                                    ptrSizes[i] * num_elements,
+                                                    nullptr, &err);
+                }
+                if (err)
+                {
+                    align_free(outptr[i]);
+                    print_error(err, " clCreateBuffer failed\n");
+                    return TEST_FAIL;
+                }
+
+                err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
+                                     (void *)&buffers[0]);
+                err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
+                                      (void *)&buffers[1]);
+                if (err != CL_SUCCESS)
+                {
+                    align_free(outptr[i]);
+                    print_error(err, " clSetKernelArg failed");
+                    return TEST_FAIL;
+                }
+
+                err = clEnqueueNDRangeKernel(queue, kernel[i], 1, nullptr,
+                                             global_work_size, nullptr, 0,
+                                             nullptr, nullptr);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err, " clEnqueueNDRangeKernel failed");
+                    align_free(outptr[i]);
+                    return TEST_FAIL;
+                }
+
+                err = clEnqueueReadBuffer(queue, buffers[1], true, 0,
+                                          ptrSizes[i] * num_elements, outptr[i],
+                                          0, nullptr, nullptr);
+
+                if (err != CL_SUCCESS)
+                {
+                    align_free(outptr[i]);
+                    print_error(err, " clEnqueueReadBuffer failed");
+                    return TEST_FAIL;
+                }
+
+                if (fn(inptr[i], outptr[i],
+                       (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0])))
+                {
+                    log_error(
+                        " %s%d test failed. cl_mem_flags src: %s, dst: %s\n",
+                        type, 1 << i, flag_set_names[src_flag_id],
+                        flag_set_names[dst_flag_id]);
+                    total_errors++;
+                }
+                else
+                {
+                    log_info(
+                        " %s%d test passed. cl_mem_flags src: %s, dst: %s\n",
+                        type, 1 << i, flag_set_names[src_flag_id],
+                        flag_set_names[dst_flag_id]);
+                }
+                // cleanup
+                align_free(outptr[i]);
+            }
+        } // dst cl_mem_flag
+    } // src cl_mem_flag
+
+    return total_errors;
+
+} // end test_buffer_write()
+
+REGISTER_TEST(write_from_immutable_buffer_to_buffer)
+{
+    REQUIRE_EXTENSION("cl_ext_immutable_memory_objects");
+
+    static const char *immutable_buffer_write_int_kernel_code[] = {
+        R"(
+      __kernel void test_buffer_write_int(constant int *src, __global int *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })",
+
+        R"(
+      __kernel void test_buffer_write_int2(constant int2 *src, __global int2 *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })",
+
+        R"(
+      __kernel void test_buffer_write_int4(constant int4 *src, __global int4 *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })",
+
+        R"(
+      __kernel void test_buffer_write_int8(constant int8 *src, __global int8 *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })",
+
+        R"(
+      __kernel void test_buffer_write_int16(constant int16 *src, __global int16 *dst)
+      {
+        int  tid = get_global_id(0);
+
+        dst[tid] = src[tid];
+      })"
+    };
+
+    static const char *immutable_int_kernel_name[] = {
+        "test_buffer_write_int", "test_buffer_write_int2",
+        "test_buffer_write_int4", "test_buffer_write_int8",
+        "test_buffer_write_int16"
+    };
+
+    if (gTestMap)
+    {
+        log_error("Immutable buffers cannot be mapped with CL_MEM_WRITE\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    int *inptr[5];
+    size_t ptrSizes[5];
+    int i, err;
+    cl_uint j;
+    int (*foo)(void *, void *, int);
+    MTdataHolder d(gRandomSeed);
+
+    size_t min_alignment = get_min_alignment(context);
+
+    foo = verify_write_int;
+
+    ptrSizes[0] = sizeof(cl_int);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for (i = 0; i < 5; i++)
+    {
+        inptr[i] =
+            (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for (j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++)
+            inptr[i][j] = (int)genrand_int32(d);
+    }
+
+    err = immutable_test_buffer_write(device, context, queue, num_elements,
+                                      sizeof(cl_int), "int", 5, (void **)inptr,
+                                      immutable_buffer_write_int_kernel_code,
+                                      immutable_int_kernel_name, foo, d);
+
+    for (i = 0; i < 5; i++)
+    {
+        align_free((void *)inptr[i]);
+    }
+
+    return err;
+}
diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp
index 3be3fbc1..7bb2da76 100644
--- a/test_conformance/c11_atomics/common.cpp
+++ b/test_conformance/c11_atomics/common.cpp
@@ -194,9 +194,9 @@ template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;
 template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
 template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
 template <> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() { return 0; }
-template <> cl_half AtomicTypeExtendedInfo<cl_half>::MinValue()
+template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MinValue()
 {
-    return cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode);
+    return -CL_HALF_MAX;
 }
 template <> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue()
 {
@@ -217,9 +217,9 @@ template <> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue()
 }
 template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
 template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
-template <> cl_half AtomicTypeExtendedInfo<cl_half>::MaxValue()
+template <> HostHalf AtomicTypeExtendedInfo<HostHalf>::MaxValue()
 {
-    return cl_half_from_float(CL_HALF_MAX, gHalfRoundingMode);
+    return CL_HALF_MAX;
 }
 template <> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue()
 {
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index aee5173d..5d12b2ab 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -22,12 +22,14 @@
 
 #include "host_atomics.h"
 
-#include "CL/cl_half.h"
-
+#include <algorithm>
 #include <iomanip>
+#include <limits>
 #include <sstream>
 #include <vector>
 
+#include "CL/cl_half.h"
+
 #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
 #define MAX_HOST_THREADS GetThreadCount()
 
@@ -76,6 +78,10 @@ extern int
 extern cl_device_atomic_capabilities gAtomicMemCap,
     gAtomicFenceCap; // atomic memory and fence capabilities for this device
 
+extern cl_device_fp_config gDoubleFPConfig;
+extern cl_device_fp_config gFloatFPConfig;
+extern cl_device_fp_config gHalfFPConfig;
+
 extern cl_half_rounding_mode gHalfRoundingMode;
 extern bool gFloatAtomicsSupported;
 extern cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps;
@@ -91,6 +97,37 @@ extern cl_int getSupportedMemoryOrdersAndScopes(
     cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
     std::vector<TExplicitMemoryScopeType> &memoryScopes);
 
+union FloatIntUnion {
+    float f;
+    uint32_t i;
+};
+
+template <typename HostDataType> bool is_qnan(const HostDataType &value)
+{
+    if constexpr (std::is_same_v<HostDataType, float>)
+    {
+        FloatIntUnion u;
+        u.f = value;
+        if ((u.i & 0x7F800000) != 0x7F800000) return false;
+        return (u.i & 0x00400000) != 0;
+    }
+    else
+        return std::isnan(value);
+}
+
+template <typename HostDataType> bool is_snan(const HostDataType &value)
+{
+    if constexpr (std::is_same_v<HostDataType, float>)
+    {
+        FloatIntUnion u;
+        u.f = value;
+        if ((u.i & 0x7F800000) != 0x7F800000) return false;
+        return (u.i & 0x00400000) == 0;
+    }
+    else
+        return std::isnan(value);
+}
+
 class AtomicTypeInfo {
 public:
     TExplicitAtomicType _type;
@@ -154,12 +191,12 @@ public:
         return 0;
     }
     CBasicTest(TExplicitAtomicType dataType, bool useSVM)
-        : CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType),
-          _useSVM(useSVM), _startValue(255), _localMemory(false),
-          _declaredInProgram(false), _usedInFunction(false),
-          _genericAddrSpace(false), _oldValueCheck(true),
-          _localRefValues(false), _maxGroupSize(0), _passCount(0),
-          _iterations(gInternalIterations)
+        : CTest(), _dataType(dataType), _useSVM(useSVM), _startValue(255),
+          _localMemory(false), _declaredInProgram(false),
+          _usedInFunction(false), _genericAddrSpace(false),
+          _oldValueCheck(true), _localRefValues(false), _maxGroupSize(0),
+          _passCount(0), _iterations(gInternalIterations),
+          _maxDeviceThreads(MAX_DEVICE_THREADS), _deviceThreads(0)
     {}
     virtual ~CBasicTest()
     {
@@ -178,12 +215,15 @@ public:
     {
         return false;
     }
+
     virtual bool
     IsTestNotAsExpected(const HostDataType &expected,
                         const std::vector<HostAtomicType> &testValues,
+                        const std::vector<HostDataType> &startRefValues,
                         cl_uint whichDestValue)
     {
-        return expected != testValues[whichDestValue];
+        return expected
+            != static_cast<HostDataType>(testValues[whichDestValue]);
     }
     virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                               MTdata d)
@@ -239,12 +279,12 @@ public:
                                       cl_command_queue queue)
     {
         int error = 0;
-        DeclaredInProgram(false);
+        SetDeclaredInProgram(false);
         EXECUTE_TEST(error,
                      ExecuteForEachPointerType(deviceID, context, queue));
         if (!UseSVM())
         {
-            DeclaredInProgram(true);
+            SetDeclaredInProgram(true);
             EXECUTE_TEST(error,
                          ExecuteForEachPointerType(deviceID, context, queue));
         }
@@ -255,13 +295,13 @@ public:
                                            cl_command_queue queue)
     {
         int error = 0;
-        if (_maxDeviceThreads > 0 && !UseSVM())
+        if (_deviceThreads > 0 && !UseSVM())
         {
             SetLocalMemory(true);
             EXECUTE_TEST(
                 error, ExecuteForEachDeclarationType(deviceID, context, queue));
         }
-        if (_maxDeviceThreads + MaxHostThreads() > 0)
+        if (_deviceThreads + MaxHostThreads() > 0)
         {
             SetLocalMemory(false);
             EXECUTE_TEST(
@@ -270,7 +310,7 @@ public:
         return error;
     }
     virtual int Execute(cl_device_id deviceID, cl_context context,
-                        cl_command_queue queue, int num_elements)
+                        cl_command_queue queue, int num_elements) override
     {
         if (sizeof(HostAtomicType) != DataType().Size(deviceID))
         {
@@ -310,7 +350,12 @@ public:
             if (UseSVM()) return 0;
             _maxDeviceThreads = 0;
         }
-        if (_maxDeviceThreads + MaxHostThreads() == 0) return 0;
+
+        _deviceThreads = (num_elements > 0)
+            ? std::min(cl_uint(num_elements), _maxDeviceThreads)
+            : _maxDeviceThreads;
+
+        if (_deviceThreads + MaxHostThreads() == 0) return 0;
         return ExecuteForEachParameterSet(deviceID, context, queue);
     }
     virtual void HostFunction(cl_uint tid, cl_uint threadCount,
@@ -323,7 +368,7 @@ public:
     {
         return AtomicTypeExtendedInfo<HostDataType>(_dataType);
     }
-    cl_uint _maxDeviceThreads;
+
     virtual cl_uint MaxHostThreads()
     {
         if (UseSVM() || gHost)
@@ -420,7 +465,7 @@ public:
     HostDataType StartValue() { return _startValue; }
     void SetLocalMemory(bool local) { _localMemory = local; }
     bool LocalMemory() { return _localMemory; }
-    void DeclaredInProgram(bool declaredInProgram)
+    void SetDeclaredInProgram(bool declaredInProgram)
     {
         _declaredInProgram = declaredInProgram;
     }
@@ -477,6 +522,8 @@ private:
     cl_uint _currentGroupSize;
     cl_uint _passCount;
     const cl_int _iterations;
+    cl_uint _maxDeviceThreads;
+    cl_uint _deviceThreads;
 };
 
 template <typename HostAtomicType, typename HostDataType>
@@ -702,6 +749,28 @@ public:
                                            cl_context context,
                                            cl_command_queue queue)
     {
+        // Comparator for orders and scopes.
+        const auto checkValidity = [](TExplicitMemoryOrderType success,
+                                      TExplicitMemoryOrderType failure,
+                                      TExplicitMemoryScopeType scope) {
+            // Both memory order arguments must be set (or neither).
+            if ((success == MEMORY_ORDER_EMPTY || failure == MEMORY_ORDER_EMPTY)
+                && success != failure)
+                return false;
+
+            // Memory scope without memory order is disallowed.
+            if (success == MEMORY_ORDER_EMPTY && scope != MEMORY_SCOPE_EMPTY)
+                return false;
+
+            // Failure must not be release or acq_rel.
+            if (failure == MEMORY_ORDER_RELEASE
+                || failure == MEMORY_ORDER_ACQ_REL)
+                return false;
+
+            // Failure must not be stronger than success.
+            return failure <= success;
+        };
+
         // repeat test for each reasonable memory order/scope combination
         std::vector<TExplicitMemoryOrderType> memoryOrder;
         std::vector<TExplicitMemoryScopeType> memoryScope;
@@ -719,16 +788,10 @@ public:
             {
                 for (unsigned si = 0; si < memoryScope.size(); si++)
                 {
-                    if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
-                         || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
-                        && memoryOrder[oi] != memoryOrder[o2i])
-                        continue; // both memory order arguments must be set (or
-                                  // none)
-                    if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
-                         || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
-                        && memoryScope[si] != MEMORY_SCOPE_EMPTY)
-                        continue; // memory scope without memory order is not
-                                  // allowed
+                    if (!checkValidity(memoryOrder[oi], memoryOrder[o2i],
+                                       memoryScope[si]))
+                        continue;
+
                     MemoryOrder(memoryOrder[oi]);
                     MemoryOrder2(memoryOrder[o2i]);
                     MemoryScope(memoryScope[si]);
@@ -895,12 +958,25 @@ CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
             + ss.str() + "] = {\n";
         ss.str("");
 
-        if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
-            == TYPE_ATOMIC_FLOAT)
-            ss << std::setprecision(10) << _startValue;
-        else if (CBasicTest<HostAtomicType, HostDataType>::DataType()._type
-                 == TYPE_ATOMIC_HALF)
-            ss << cl_half_to_float(static_cast<cl_half>(_startValue));
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (std::isinf(_startValue))
+                ss << (_startValue < 0 ? "-" : "") << "INFINITY";
+            else if (std::isnan(_startValue))
+                ss << "0.0 / 0.0";
+            else
+                ss << std::setprecision(
+                    std::numeric_limits<HostDataType>::max_digits10)
+                   << _startValue;
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            ss << std::setprecision(std::numeric_limits<float>::max_digits10)
+               << cl_half_to_float(_startValue);
+        }
         else
             ss << _startValue;
 
@@ -1137,7 +1213,7 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
     MTdata d;
     size_t typeSize = DataType().Size(deviceID);
 
-    deviceThreadCount = _maxDeviceThreads;
+    deviceThreadCount = _deviceThreads;
     hostThreadCount = MaxHostThreads();
     threadCount = deviceThreadCount + hostThreadCount;
 
@@ -1202,9 +1278,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
             programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
                 + FunctionCode() + KernelCode(numDestItems);
             programLine = programSource.c_str();
-            if (create_single_kernel_helper_with_build_options(
-                    context, &program, &kernel, 1, &programLine,
-                    "test_atomic_kernel", gOldAPI ? "" : nullptr))
+            if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                            &programLine, "test_atomic_kernel"))
             {
                 return -1;
             }
@@ -1289,7 +1364,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
     numDestItems = NumResults(threadCount, deviceID);
 
     destItems.resize(numDestItems);
-    for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+    for (cl_uint i = 0; i < numDestItems; i++)
+        destItems[i] = static_cast<HostAtomicType>(_startValue);
 
     // Create main buffer with atomic variables (array size dependent on
     // particular test)
@@ -1462,12 +1538,13 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
                            startRefValues.size() ? &startRefValues[0] : 0, i))
             break; // no expected value function provided
 
-        if (IsTestNotAsExpected(expected, destItems, i))
+        if (IsTestNotAsExpected(expected, destItems, startRefValues, i))
         {
             std::stringstream logLine;
             logLine << "ERROR: Result " << i
                     << " from kernel does not validate! (should be " << expected
-                    << ", was " << destItems[i] << ")\n";
+                    << ", was " << static_cast<HostDataType>(destItems[i])
+                    << ")\n";
             log_error("%s", logLine.str().c_str());
             for (i = 0; i < threadCount; i++)
             {
@@ -1534,7 +1611,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
                                  // clEnqueueNDRangeKernel
     {
         /* Re-write the starting value */
-        for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+        for (size_t i = 0; i < numDestItems; i++)
+            destItems[i] = static_cast<HostAtomicType>(_startValue);
         refValues[0] = 0;
         if (deviceThreadCount > 0)
         {
diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
index 9a33f26c..4a65d8da 100644
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -18,12 +18,15 @@
 
 #include "harness/testHarness.h"
 #include <mutex>
+
 #include "CL/cl_half.h"
 
 #ifdef WIN32
 #include "Windows.h"
 #endif
 
+extern cl_half_rounding_mode gHalfRoundingMode;
+
 //flag for test verification (good test should discover non-atomic functions and fail)
 //#define NON_ATOMIC_FUNCTIONS
 
@@ -37,6 +40,93 @@ enum TExplicitMemoryOrderType
   MEMORY_ORDER_SEQ_CST
 };
 
+// Wrapper class for half-precision
+class HostHalf {
+public:
+    // Convert from semantic values
+    HostHalf(cl_uint value = 0)
+        : value(
+            cl_half_from_float(static_cast<float>(value), gHalfRoundingMode))
+    {}
+    HostHalf(int value): HostHalf(static_cast<cl_uint>(value)) {}
+    HostHalf(float value): value(cl_half_from_float(value, gHalfRoundingMode))
+    {}
+    HostHalf(double value): HostHalf(static_cast<float>(value)) {}
+
+    // Convert to semantic values
+    operator cl_uint() const
+    {
+        return static_cast<cl_uint>(cl_half_to_float(value));
+    }
+    operator float() const { return cl_half_to_float(value); }
+    operator double() const
+    {
+        return static_cast<double>(cl_half_to_float(value));
+    }
+
+    // Construct from bit representation
+    HostHalf(cl_half value): value(value) {}
+
+    // Get the underlying bit representation
+    operator cl_half() const { return value; }
+
+    HostHalf operator-() const
+    {
+        return HostHalf(
+            cl_half_from_float(-cl_half_to_float(value), gHalfRoundingMode));
+    }
+
+#define GENERIC_OP(RetType, op)                                                \
+    RetType operator op(const HostHalf &other) const                           \
+    {                                                                          \
+        return RetType(cl_half_to_float(value)                                 \
+                           op cl_half_to_float(other.value));                  \
+    }
+
+    GENERIC_OP(bool, ==)
+    GENERIC_OP(bool, !=)
+    GENERIC_OP(bool, <)
+    GENERIC_OP(bool, <=)
+    GENERIC_OP(bool, >)
+    GENERIC_OP(bool, >=)
+    GENERIC_OP(HostHalf, +)
+    GENERIC_OP(HostHalf, -)
+    GENERIC_OP(HostHalf, *)
+    GENERIC_OP(HostHalf, /)
+#undef GENERIC_OP
+
+#define INPLACE_OP(op)                                                         \
+    HostHalf &operator op##=(const HostHalf &other)                            \
+    {                                                                          \
+        value = cl_half_from_float(cl_half_to_float(value)                     \
+                                       op cl_half_to_float(other.value),       \
+                                   gHalfRoundingMode);                         \
+        return *this;                                                          \
+    }
+    INPLACE_OP(+)
+    INPLACE_OP(-)
+    INPLACE_OP(*)
+    INPLACE_OP(/)
+#undef INPLACE_OP
+
+    friend std::ostream &operator<<(std::ostream &os, const HostHalf &hh)
+    {
+        float f = cl_half_to_float(hh.value);
+        os << f;
+        return os;
+    }
+
+private:
+    cl_half value;
+};
+
+namespace std {
+inline HostHalf abs(const HostHalf &value)
+{
+    return value < HostHalf(0) ? -value : value;
+}
+} // namespace std
+
 // host atomic types (applicable for atomic functions supported on host OS)
 #ifdef WIN32
 #define HOST_ATOMIC_INT         unsigned long
@@ -73,7 +163,7 @@ enum TExplicitMemoryOrderType
 #define HOST_UINT               cl_uint
 #define HOST_LONG               cl_long
 #define HOST_ULONG              cl_ulong
-#define HOST_HALF cl_half
+#define HOST_HALF HostHalf
 #define HOST_FLOAT              cl_float
 #define HOST_DOUBLE             cl_double
 
@@ -91,6 +181,18 @@ enum TExplicitMemoryOrderType
 
 extern cl_half_rounding_mode gHalfRoundingMode;
 
+template <typename HostAtomicType>
+constexpr bool is_host_atomic_fp_v =
+    std::disjunction_v<std::is_same<HostAtomicType, HOST_ATOMIC_HALF>,
+                       std::is_same<HostAtomicType, HOST_ATOMIC_FLOAT>,
+                       std::is_same<HostAtomicType, HOST_ATOMIC_DOUBLE>>;
+
+template <typename HostDataType>
+constexpr bool is_host_fp_v =
+    std::disjunction_v<std::is_same<HostDataType, HOST_HALF>,
+                       std::is_same<HostDataType, HOST_FLOAT>,
+                       std::is_same<HostDataType, HOST_DOUBLE>>;
+
 // host atomic functions
 void host_atomic_thread_fence(TExplicitMemoryOrderType order);
 
@@ -98,24 +200,13 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
                                         TExplicitMemoryOrderType order)
 {
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mx;
         std::lock_guard<std::mutex> lock(mx);
         CorrespondingType old_value = *a;
-        *a = cl_half_from_float((cl_half_to_float(*a) + cl_half_to_float(c)),
-                                gHalfRoundingMode);
-        return old_value;
-    }
-    else if constexpr (
-        std::is_same_v<
-            AtomicType,
-            HOST_ATOMIC_FLOAT> || std::is_same_v<AtomicType, HOST_ATOMIC_DOUBLE>)
-    {
-        static std::mutex mx;
-        std::lock_guard<std::mutex> lock(mx);
-        CorrespondingType old_value = *a;
-        *a += c;
+        CorrespondingType new_value = old_value + c;
+        *a = static_cast<AtomicType>(new_value);
         return old_value;
     }
     else
@@ -135,21 +226,13 @@ template <typename AtomicType, typename CorrespondingType>
 CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
                                         TExplicitMemoryOrderType order)
 {
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mx;
         std::lock_guard<std::mutex> lock(mx);
         CorrespondingType old_value = *a;
-        *a -= c;
-        return old_value;
-    }
-    else if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
-    {
-        static std::mutex mx;
-        std::lock_guard<std::mutex> lock(mx);
-        CorrespondingType old_value = *a;
-        *a = cl_half_from_float((cl_half_to_float(*a) - cl_half_to_float(c)),
-                                gHalfRoundingMode);
+        CorrespondingType new_value = old_value - c;
+        *a = static_cast<AtomicType>(new_value);
         return old_value;
     }
     else
@@ -170,12 +253,14 @@ CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType
                                        TExplicitMemoryOrderType order)
 {
 #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
-    if (sizeof(CorrespondingType) == 2)
-        return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a), c);
+    if constexpr (sizeof(CorrespondingType) == 2)
+        return InterlockedExchange16(reinterpret_cast<volatile SHORT *>(a),
+                                     *reinterpret_cast<SHORT *>(&c));
     else
-        return InterlockedExchange(reinterpret_cast<volatile LONG *>(a), c);
+        return InterlockedExchange(reinterpret_cast<volatile LONG *>(a),
+                                   *reinterpret_cast<LONG *>(&c));
 #elif defined(__GNUC__)
-    return __sync_lock_test_and_set(a, c);
+    return __sync_lock_test_and_set(a, *reinterpret_cast<AtomicType *>(&c));
 #else
   log_info("Host function not implemented: atomic_exchange\n");
   return 0;
@@ -192,30 +277,14 @@ bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *exp
                                   TExplicitMemoryOrderType order_failure)
 {
     CorrespondingType tmp;
-    if constexpr (std::is_same_v<AtomicType, HOST_ATOMIC_HALF>)
+    if constexpr (is_host_atomic_fp_v<AtomicType>)
     {
         static std::mutex mtx;
         std::lock_guard<std::mutex> lock(mtx);
-        tmp = *reinterpret_cast<volatile cl_half *>(a);
-
-        if (cl_half_to_float(tmp) == cl_half_to_float(*expected))
-        {
-            *reinterpret_cast<volatile cl_half *>(a) = desired;
-            return true;
-        }
-        *expected = tmp;
-    }
-    else if constexpr (
-        std::is_same_v<
-            AtomicType,
-            HOST_ATOMIC_DOUBLE> || std::is_same_v<AtomicType, HOST_ATOMIC_FLOAT>)
-    {
-        static std::mutex mtx;
-        std::lock_guard<std::mutex> lock(mtx);
-        tmp = *reinterpret_cast<volatile float *>(a);
+        tmp = static_cast<CorrespondingType>(*a);
         if (tmp == *expected)
         {
-            *a = desired;
+            *a = static_cast<AtomicType>(desired);
             return true;
         }
         *expected = tmp;
@@ -241,8 +310,8 @@ CorrespondingType host_atomic_load(volatile AtomicType *a,
                                    TExplicitMemoryOrderType order)
 {
 #if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
-    if (sizeof(CorrespondingType) == 2)
-        auto prev = InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
+    if constexpr (sizeof(CorrespondingType) == 2)
+        return InterlockedOr16(reinterpret_cast<volatile SHORT *>(a), 0);
     else
         return InterlockedExchangeAdd(reinterpret_cast<volatile LONG *>(a), 0);
 #elif defined(__GNUC__)
diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp
index e2f1888f..ee83dfdb 100644
--- a/test_conformance/c11_atomics/main.cpp
+++ b/test_conformance/c11_atomics/main.cpp
@@ -31,11 +31,14 @@ int gInternalIterations = 10000; // internal test iterations for atomic operatio
 int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
 cl_device_atomic_capabilities gAtomicMemCap,
     gAtomicFenceCap; // atomic memory and fence capabilities for this device
+cl_device_fp_config gDoubleFPConfig = 0;
+cl_device_fp_config gFloatFPConfig = 0;
 cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
 bool gFloatAtomicsSupported = false;
 cl_device_fp_atomic_capabilities_ext gHalfAtomicCaps = 0;
 cl_device_fp_atomic_capabilities_ext gDoubleAtomicCaps = 0;
 cl_device_fp_atomic_capabilities_ext gFloatAtomicCaps = 0;
+cl_device_fp_config gHalfFPConfig = 0;
 
 test_status InitCL(cl_device_id device) {
     auto version = get_device_cl_version(device);
@@ -134,13 +137,17 @@ test_status InitCL(cl_device_id device) {
     if (is_extension_available(device, "cl_ext_float_atomics"))
     {
         gFloatAtomicsSupported = true;
-
         if (is_extension_available(device, "cl_khr_fp64"))
         {
             cl_int error = clGetDeviceInfo(
                 device, CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
                 sizeof(gDoubleAtomicCaps), &gDoubleAtomicCaps, nullptr);
             test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
+
+            error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
+                                    sizeof(gDoubleFPConfig), &gDoubleFPConfig,
+                                    NULL);
+            test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
         }
 
         cl_int error = clGetDeviceInfo(
@@ -148,6 +155,13 @@ test_status InitCL(cl_device_id device) {
             sizeof(gFloatAtomicCaps), &gFloatAtomicCaps, nullptr);
         test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
 
+        error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
+                                sizeof(gFloatFPConfig), &gFloatFPConfig, NULL);
+        test_error_ret(
+            error,
+            "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)",
+            TEST_FAIL);
+
         if (is_extension_available(device, "cl_khr_fp16"))
         {
             cl_int error = clGetDeviceInfo(
@@ -170,6 +184,11 @@ test_status InitCL(cl_device_id device) {
                 log_error("Error while acquiring half rounding mode\n");
                 return TEST_FAIL;
             }
+
+            error =
+                clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
+                                sizeof(gHalfFPConfig), &gHalfFPConfig, NULL);
+            test_error_ret(error, "clGetDeviceInfo failed!", TEST_FAIL);
         }
     }
 
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index f46520ca..d3dd3703 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -94,13 +94,7 @@ public:
                                HostDataType *startRefValues,
                                cl_uint whichDestValue)
     {
-        if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
-                ._type
-            != TYPE_ATOMIC_HALF)
-            expected = (HostDataType)whichDestValue;
-        else
-            expected = cl_half_from_float(static_cast<float>(whichDestValue),
-                                          gHalfRoundingMode);
+        expected = static_cast<HostDataType>(whichDestValue);
         return true;
     }
 };
@@ -401,13 +395,7 @@ public:
                                HostDataType *startRefValues,
                                cl_uint whichDestValue)
     {
-        if (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType()
-                ._type
-            != TYPE_ATOMIC_HALF)
-            expected = (HostDataType)whichDestValue;
-        else
-            expected = cl_half_from_float(static_cast<float>(whichDestValue),
-                                          gHalfRoundingMode);
+        expected = static_cast<HostDataType>(whichDestValue);
         return true;
     }
     virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
@@ -417,25 +405,11 @@ public:
         correct = true;
         for (cl_uint i = 0; i < threadCount; i++)
         {
-            if constexpr (std::is_same_v<HostDataType, cl_half>)
+            if (refValues[i] != (HostDataType)i)
             {
-                HostDataType test = cl_half_from_float(static_cast<float>(i),
-                                                       gHalfRoundingMode);
-                if (refValues[i] != test)
-                {
-                    log_error("Invalid value for thread %u\n", (cl_uint)i);
-                    correct = false;
-                    return true;
-                }
-            }
-            else
-            {
-                if (refValues[i] != (HostDataType)i)
-                {
-                    log_error("Invalid value for thread %u\n", (cl_uint)i);
-                    correct = false;
-                    return true;
-                }
+                log_error("Invalid value for thread %u\n", (cl_uint)i);
+                correct = false;
+                return true;
             }
         }
         return true;
@@ -553,11 +527,7 @@ public:
         : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
                                                                 useSVM)
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-            StartValue(cl_half_from_float(static_cast<float>(1234),
-                                          gHalfRoundingMode));
-        else
-            StartValue(123456);
+        StartValue(1234);
     }
     virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                                   cl_command_queue queue)
@@ -619,32 +589,19 @@ public:
         /* Any repeated value is treated as an error */
         std::vector<bool> tidFound(threadCount);
         bool startValueFound = false;
-        cl_uint startVal = StartValue();
-
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-            startVal = static_cast<cl_uint>(
-                cl_half_to_float(static_cast<cl_half>(StartValue())));
+        cl_uint startVal = static_cast<cl_uint>(StartValue());
 
         for (cl_uint i = 0; i <= threadCount; i++)
         {
             cl_uint value = 0;
             if (i == threadCount)
             {
-                if constexpr (!std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-                    value =
-                        (cl_uint)finalValues[0]; // additional value from atomic
-                                                 // variable (last written)
-                else
-                    value =
-                        cl_half_to_float(static_cast<cl_half>(finalValues[0]));
+                value = static_cast<cl_uint>(
+                    static_cast<HostDataType>(finalValues[0]));
             }
             else
             {
-                if constexpr (!std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
-                    value = (cl_uint)refValues[i];
-                else
-                    value =
-                        cl_half_to_float(static_cast<cl_half>(refValues[i]));
+                value = static_cast<cl_uint>(refValues[i]);
             }
 
             if (value == startVal)
@@ -1201,85 +1158,24 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0), max_error(0.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             StartValue((HostDataType)0.0);
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
+
+            // Narrow down range for half to avoid overflow to infinity
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            {
+                min_range = -50.0;
+                max_range = 50.0;
+            }
         }
     }
-    template <typename Iterator> float accum_halfs(Iterator begin, Iterator end)
-    {
-        cl_half sum = 0;
-        for (auto it = begin; it != end; ++it)
-        {
-            sum = cl_half_from_float(cl_half_to_float(sum)
-                                         + cl_half_to_float(*it),
-                                     gHalfRoundingMode);
-        }
-        return cl_half_to_float(sum);
-    }
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (threadCount > ref_vals.size())
-            {
-                ref_vals.resize(threadCount);
-
-                for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = cl_half_from_float(
-                        get_random_float(min_range, max_range, d),
-                        gHalfRoundingMode);
-
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * ref_vals.size());
-
-                // Estimate highest possible summation error for given set.
-                std::vector<float> sums;
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return cl_half_to_float(a) < cl_half_to_float(b);
-                          });
-
-                sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return std::abs(cl_half_to_float(a))
-                                  < std::abs(cl_half_to_float(b));
-                          });
-
-                float precise = 0.f;
-                for (auto elem : ref_vals) precise += cl_half_to_float(elem);
-                sums.push_back(precise);
-
-                sums.push_back(accum_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(accum_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(sums.begin(), sums.end());
-                max_error = std::abs(sums.front() - sums.back());
-
-                // restore unsorted order
-                memcpy(ref_vals.data(), startRefValues,
-                       sizeof(HostDataType) * ref_vals.size());
-            }
-            else
-            {
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * threadCount);
-            }
-            return true;
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (threadCount > ref_vals.size())
             {
@@ -1299,11 +1195,12 @@ public:
                 std::vector<HostDataType> sums;
                 std::sort(ref_vals.begin(), ref_vals.end());
 
-                sums.push_back(
-                    std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
+                                               static_cast<HostDataType>(0.f)));
 
-                sums.push_back(
-                    std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.rbegin(),
+                                               ref_vals.rend(),
+                                               static_cast<HostDataType>(0.f)));
 
                 std::sort(ref_vals.begin(), ref_vals.end(),
                           [](HostDataType a, HostDataType b) {
@@ -1318,15 +1215,25 @@ public:
 
                 sums.push_back(precise);
 
-                sums.push_back(
-                    std::accumulate(ref_vals.begin(), ref_vals.end(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.begin(), ref_vals.end(),
+                                               static_cast<HostDataType>(0.f)));
 
-                sums.push_back(
-                    std::accumulate(ref_vals.rbegin(), ref_vals.rend(), 0.f));
+                sums.push_back(std::accumulate(ref_vals.rbegin(),
+                                               ref_vals.rend(),
+                                               static_cast<HostDataType>(0.f)));
 
                 std::sort(sums.begin(), sums.end());
+                assert(std::all_of(sums.begin(), sums.end(),
+                                   [](const HostDataType &val) {
+                                       return std::isfinite(
+                                           static_cast<double>(val));
+                                   })
+                       && "Infinite summation value detected!");
                 max_error = std::abs(sums.front() - sums.back());
 
+                log_info("Max allowed error for %u elements: %.10f\n",
+                         threadCount, max_error);
+
                 // restore unsorted order
                 memcpy(ref_vals.data(), startRefValues,
                        sizeof(HostDataType) * ref_vals.size());
@@ -1345,10 +1252,7 @@ public:
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
 
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_add" + postfix + "(&destMemory[0], ("
                 + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1382,10 +1286,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_add(&destMemory[0], (HostDataType)oldValues[tid],
                                   MemoryOrder());
@@ -1411,23 +1312,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    expected = cl_half_from_float(
-                        cl_half_to_float(expected)
-                            + cl_half_to_float(startRefValues[i]),
-                        gHalfRoundingMode);
-                }
-            }
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -1444,37 +1329,28 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
-                return std::abs(cl_half_to_float(expected)
-                                - cl_half_to_float(testValues[whichDestValue]))
-                    > max_error;
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
-        {
-            if (whichDestValue == 0)
-                return std::abs((HostDataType)expected
-                                - testValues[whichDestValue])
+                return std::abs(
+                           static_cast<double>(expected
+                                               - static_cast<HostDataType>(
+                                                   testValues[whichDestValue])))
                     > max_error;
         }
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
@@ -1497,7 +1373,7 @@ public:
     int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_ATOMIC_HALF>)
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
         {
             if (LocalMemory()
                 && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1533,6 +1409,338 @@ public:
                                                              queue);
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
+    {
+        if constexpr (is_host_fp_v<HostDataType>)
+        {
+            return threadCount;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::NumResults(threadCount,
+                                                                 deviceID);
+    }
+};
+
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAddSpecialFloats
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+
+    std::vector<HostDataType> ref_vals;
+
+public:
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::DeclaredInProgram;
+    CBasicTestFetchAddSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        // StartValue is used as an index divisor in the following test
+        // logic. It is set to the number of special values, which allows
+        // threads to be mapped deterministically onto the input data array.
+        // This enables repeated add operations arranged so that every
+        // special value is added to every other one (“all-to-all”).
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            StartValue(spec_vals.size());
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            auto spec_vals = GetSpecialValues();
+            StartValue(cl_half_from_float(spec_vals.size(), gHalfRoundingMode));
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
+    }
+
+    static std::vector<HostDataType> &GetSpecialValues()
+    {
+        static std::vector<HostDataType> special_values;
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            const HostDataType test_value_zero =
+                static_cast<HostDataType>(0.0f);
+            const HostDataType test_value_minus_zero =
+                static_cast<HostDataType>(-0.0f);
+            const HostDataType test_value_without_fraction =
+                static_cast<HostDataType>(2.0f);
+            const HostDataType test_value_with_fraction =
+                static_cast<HostDataType>(2.2f);
+
+            if (special_values.empty())
+            {
+                special_values = {
+                    static_cast<HostDataType>(test_value_minus_zero),
+                    static_cast<HostDataType>(test_value_zero),
+                    static_cast<HostDataType>(test_value_without_fraction),
+                    static_cast<HostDataType>(test_value_with_fraction),
+                    std::numeric_limits<HostDataType>::infinity(),
+                    std::numeric_limits<HostDataType>::quiet_NaN(),
+                    std::numeric_limits<HostDataType>::signaling_NaN(),
+                    -std::numeric_limits<HostDataType>::infinity(),
+                    -std::numeric_limits<HostDataType>::quiet_NaN(),
+                    -std::numeric_limits<HostDataType>::signaling_NaN(),
+                    std::numeric_limits<HostDataType>::lowest(),
+                    std::numeric_limits<HostDataType>::min(),
+                    std::numeric_limits<HostDataType>::max(),
+                };
+
+                if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+                {
+                    if (0 != (CL_FP_DENORM & gDoubleFPConfig))
+                    {
+                        special_values.push_back(
+                            std::numeric_limits<HostDataType>::denorm_min());
+                    }
+                }
+                else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+                {
+                    if (0 != (CL_FP_DENORM & gFloatFPConfig))
+                    {
+                        special_values.push_back(
+                            std::numeric_limits<HostDataType>::denorm_min());
+                    }
+                }
+            }
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (special_values.empty())
+            {
+                special_values = {
+                    0xffff, 0x0000, 0x7c00, /*INFINITY*/
+                    0xfc00, /*-INFINITY*/
+                    0x8000, /*-0*/
+                    0x7bff, /*HALF_MAX*/
+                    0x0400, /*HALF_MIN*/
+                    0x3c00, /* 1 */
+                    0xbc00, /* -1 */
+                    0x3555, /*nearest value to 1/3*/
+                    0x3bff, /*largest number less than one*/
+                    0xc000, /* -2 */
+                    0xfbff, /* -HALF_MAX */
+                    0x8400, /* -HALF_MIN */
+                    0x4248, /* M_PI_H */
+                    0xc248, /* -M_PI_H */
+                    0xbbff, /* Largest negative fraction */
+                };
+
+                if (0 != (CL_FP_DENORM & gHalfFPConfig))
+                {
+                    special_values.push_back(0x0001 /* Smallest denormal */);
+                    special_values.push_back(0x03ff /* Largest denormal */);
+                }
+            }
+        }
+        return special_values;
+    }
+
+    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                      MTdata d) override
+    {
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (threadCount > ref_vals.size())
+            {
+                ref_vals.assign(threadCount, 0);
+                auto spec_vals = GetSpecialValues();
+
+                cl_uint total_cnt = 0;
+                while (total_cnt < threadCount)
+                {
+                    cl_uint block_cnt =
+                        std::min((cl_int)(threadCount - total_cnt),
+                                 (cl_int)spec_vals.size());
+                    memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
+                           sizeof(HostDataType) * block_cnt);
+                    total_cnt += block_cnt;
+                }
+            }
+
+            memcpy(startRefValues, ref_vals.data(),
+                   sizeof(HostDataType) * threadCount);
+
+            return true;
+        }
+        return false;
+    }
+    std::string ProgramCore() override
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            // The start_value variable (set by StartValue) is used
+            // as a divisor of the thread index when selecting the operand for
+            // atomic_fetch_add. This groups threads into blocks corresponding
+            // to the number of special values and implements an “all-to-all”
+            // addition pattern. As a result, each destination element is
+            // updated using different combinations of input values, enabling
+            // consistent comparison between host and device execution.
+
+            return std::string(DataType().AddSubOperandTypeName())
+                + " start_value = atomic_load_explicit(destMemory+tid, "
+                  "memory_order_relaxed, memory_scope_work_group);\n"
+                  "  atomic_store_explicit(destMemory+tid, oldValues[tid], "
+                  "memory_order_relaxed, memory_scope_work_group);\n"
+                  "  atomic_fetch_add"
+                + postfix + "(&destMemory[tid], ("
+                + DataType().AddSubOperandTypeName()
+                + ")oldValues[tid/(int)start_value]" + memoryOrderScope
+                + ");\n";
+        }
+    }
+    void HostFunction(cl_uint tid, cl_uint threadCount,
+                      volatile HostAtomicType *destMemory,
+                      HostDataType *oldValues) override
+    {
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_HALF> || std::is_same_v<HostDataType, HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
+                              MEMORY_ORDER_SEQ_CST);
+            host_atomic_fetch_add(
+                &destMemory[tid],
+                (HostDataType)oldValues[tid / spec_vals.size()], MemoryOrder());
+        }
+    }
+    bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                       HostDataType *startRefValues,
+                       cl_uint whichDestValue) override
+    {
+        expected = StartValue();
+        if constexpr (
+            std::is_same_v<
+                HostDataType,
+                HOST_DOUBLE> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            expected = startRefValues[whichDestValue]
+                + startRefValues[whichDestValue / spec_vals.size()];
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            auto spec_vals = GetSpecialValues();
+            expected = cl_half_from_float(
+                cl_half_to_float(startRefValues[whichDestValue])
+                    + cl_half_to_float(
+                        startRefValues[whichDestValue / spec_vals.size()]),
+                gHalfRoundingMode);
+        }
+
+        return true;
+    }
+    bool IsTestNotAsExpected(const HostDataType &expected,
+                             const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
+                             cl_uint whichDestValue) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            return static_cast<cl_half>(expected) != testValues[whichDestValue];
+        }
+        else
+        {
+            if (std::isnan(testValues[whichDestValue]) && std::isnan(expected))
+                return false;
+            else
+                return expected != testValues[whichDestValue];
+        }
+
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
+                                                               testValues,
+                                                               startRefValues,
+                                                               whichDestValue);
+    }
+    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+        {
+            if (LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
+                    == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gDoubleFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        {
+            if (DeclaredInProgram()) return 0; // skip test - not applicable
+
+            if (LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gHalfAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gHalfFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
         if constexpr (
             std::is_same_v<
@@ -1572,6 +1780,24 @@ static int test_atomic_fetch_add_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_DOUBLE, HOST_DOUBLE>
+            test_spec_double(TYPE_ATOMIC_DOUBLE, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_spec_double.Execute(deviceID, context, queue, num_elements));
+
+        CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
+            test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_spec_float.Execute(deviceID, context, queue, num_elements));
+
+        CBasicTestFetchAddSpecialFloats<HOST_ATOMIC_HALF, HOST_HALF>
+            test_spec_half(TYPE_ATOMIC_HALF, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_spec_half.Execute(deviceID, context, queue, num_elements));
+
         CBasicTestFetchAdd<HOST_ATOMIC_HALF, HOST_HALF> test_half(
             TYPE_ATOMIC_HALF, useSVM);
         EXECUTE_TEST(error,
@@ -1647,6 +1873,23 @@ REGISTER_TEST(svm_atomic_fetch_add)
                                          true);
 }
 
+template <typename T> double kahan_sub(const std::vector<T> &nums)
+{
+    return 0.0;
+}
+template <> double kahan_sub<double>(const std::vector<double> &nums)
+{
+    double sum = 0.0;
+    double compensation = 0.0;
+    for (double num : nums)
+    {
+        double y = -num - compensation;
+        double t = sum + y;
+        compensation = (t - sum) - y;
+        sum = t;
+    }
+    return sum;
+}
 template <typename HostAtomicType, typename HostDataType>
 class CBasicTestFetchSub
     : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
@@ -1668,14 +1911,18 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0), max_error(0.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             StartValue(0);
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
+
+            // Narrow down range for half to avoid overflow to infinity
+            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+            {
+                min_range = -50.0;
+                max_range = 50.0;
+            }
         }
     }
     template <typename Iterator>
@@ -1685,28 +1932,18 @@ public:
         for (auto it = begin; it != end; ++it) res = res - *it;
         return res;
     }
-    template <typename Iterator>
-    float subtract_halfs(Iterator begin, Iterator end)
-    {
-        cl_half res = 0;
-        for (auto it = begin; it != end; ++it)
-        {
-            res = cl_half_from_float(cl_half_to_float(res)
-                                         - cl_half_to_float(*it),
-                                     gHalfRoundingMode);
-        }
-        return cl_half_to_float(res);
-    }
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (threadCount > ref_vals.size())
             {
                 ref_vals.resize(threadCount);
+
                 for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = get_random_float(min_range, max_range, d);
+                    ref_vals[i] = (HostDataType)get_random_double(min_range,
+                                                                  max_range, d);
 
                 memcpy(startRefValues, ref_vals.data(),
                        sizeof(HostDataType) * ref_vals.size());
@@ -1717,72 +1954,33 @@ public:
                 sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
                 sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
 
-                std::sort(
-                    ref_vals.begin(), ref_vals.end(),
-                    [](float a, float b) { return std::abs(a) < std::abs(b); });
+                std::sort(ref_vals.begin(), ref_vals.end(),
+                          [](HostDataType a, HostDataType b) {
+                              return std::abs(a) < std::abs(b);
+                          });
 
                 double precise = 0.0;
-                for (auto elem : ref_vals) precise += double(elem);
+                if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+                    precise = kahan_sub(ref_vals);
+                else
+                    for (auto elem : ref_vals) precise += double(elem);
                 sums.push_back(precise);
+
                 sums.push_back(subtract(ref_vals.begin(), ref_vals.end()));
                 sums.push_back(subtract(ref_vals.rbegin(), ref_vals.rend()));
 
                 std::sort(sums.begin(), sums.end());
-                max_error =
-                    std::abs((HOST_ATOMIC_FLOAT)sums.front() - sums.back());
-
-                // restore unsorted order
-                memcpy(ref_vals.data(), startRefValues,
-                       sizeof(HostDataType) * ref_vals.size());
-            }
-            else
-            {
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * threadCount);
-            }
-            return true;
-        }
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (threadCount > ref_vals.size())
-            {
-                ref_vals.resize(threadCount);
-                for (cl_uint i = 0; i < threadCount; i++)
-                    ref_vals[i] = cl_half_from_float(
-                        get_random_float(min_range, max_range, d),
-                        gHalfRoundingMode);
-
-                memcpy(startRefValues, ref_vals.data(),
-                       sizeof(HostDataType) * ref_vals.size());
-
-                // Estimate highest possible summation error for given set.
-                std::vector<float> sums;
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return cl_half_to_float(a) < cl_half_to_float(b);
-                          });
-
-                sums.push_back(
-                    subtract_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(
-                    subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(ref_vals.begin(), ref_vals.end(),
-                          [](cl_half a, cl_half b) {
-                              return std::abs(cl_half_to_float(a))
-                                  < std::abs(cl_half_to_float(b));
-                          });
-
-                float precise = 0.f;
-                for (auto elem : ref_vals) precise -= cl_half_to_float(elem);
-                sums.push_back(precise);
-                sums.push_back(
-                    subtract_halfs(ref_vals.begin(), ref_vals.end()));
-                sums.push_back(
-                    subtract_halfs(ref_vals.rbegin(), ref_vals.rend()));
-
-                std::sort(sums.begin(), sums.end());
+                assert(std::all_of(sums.begin(), sums.end(),
+                                   [](const HostDataType &val) {
+                                       return std::isfinite(
+                                           static_cast<double>(val));
+                                   })
+                       && "Infinite subtraction value detected!");
                 max_error = std::abs(sums.front() - sums.back());
+
+                log_info("Max allowed error for %u elements: %.10f\n",
+                         threadCount, max_error);
+
                 // restore unsorted order
                 memcpy(ref_vals.data(), startRefValues,
                        sizeof(HostDataType) * ref_vals.size());
@@ -1801,10 +1999,7 @@ public:
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
 
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_sub" + postfix + "(&destMemory[0], ("
                 + DataType().AddSubOperandTypeName() + ")oldValues[tid]"
@@ -1826,10 +2021,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_sub(&destMemory[0], (HostDataType)oldValues[tid],
                                   MemoryOrder());
@@ -1851,25 +2043,12 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
                     expected -= startRefValues[i];
         }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    expected = cl_half_from_float(
-                        cl_half_to_float(expected)
-                            - cl_half_to_float(startRefValues[i]),
-                        gHalfRoundingMode);
-                }
-            }
-        }
         else
         {
             for (cl_uint i = 0; i < threadCount; i++)
@@ -1880,25 +2059,22 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
-                return std::abs((HOST_ATOMIC_FLOAT)expected
-                                - testValues[whichDestValue])
-                    > max_error;
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-                return std::abs(cl_half_to_float(expected)
-                                - cl_half_to_float(testValues[whichDestValue]))
+                return std::abs(
+                           static_cast<double>(expected
+                                               - static_cast<HostDataType>(
+                                                   testValues[whichDestValue])))
                     > max_error;
         }
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
@@ -1927,7 +2103,18 @@ public:
     int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
                           cl_command_queue queue) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (std::is_same_v<HostDataType, HOST_DOUBLE>)
+        {
+            if (LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gDoubleAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT)
+                    == 0)
+                return 0;
+        }
+        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             if (LocalMemory()
                 && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT) == 0)
@@ -1953,10 +2140,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }
@@ -1991,6 +2175,11 @@ static int test_atomic_fetch_sub_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        CBasicTestFetchSub<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+            TYPE_ATOMIC_DOUBLE, useSVM);
+        EXECUTE_TEST(
+            error, test_double.Execute(deviceID, context, queue, num_elements));
+
         CBasicTestFetchSub<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
             TYPE_ATOMIC_FLOAT, useSVM);
         EXECUTE_TEST(
@@ -2872,10 +3061,7 @@ public:
           min_range(-999.0), max_range(999.0)
     {
         StartValue(DataType().MaxValue());
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
@@ -2885,10 +3071,7 @@ public:
     {
         std::string memoryOrderScope = MemoryOrderScopeStr();
         std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return "  atomic_fetch_min" + postfix
                 + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n"
@@ -2907,10 +3090,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_min(&destMemory[0], oldValues[tid],
                                   MemoryOrder());
@@ -2965,19 +3145,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    if (cl_half_to_float(startRefValues[i])
-                        < cl_half_to_float(expected))
-                        expected = startRefValues[i];
-                }
-            }
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -2995,36 +3163,36 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
-                    IsTestNotAsExpected(expected, testValues, whichDestValue);
+                    IsTestNotAsExpected(expected, testValues, startRefValues,
+                                        whichDestValue);
             return false; // ignore all but 0 which stores final result
         }
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same<HostDataType, HOST_FLOAT>::value)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
             {
                 if (refValues[i] != StartValue())
                 {
-                    log_error("Thread %d found %d mismatch(es)\n", i,
-                              (cl_uint)refValues[i]);
+                    log_error(
+                        "Thread %d found %lf mismatch(es), start value=%lf\n",
+                        i, (double)refValues[i], (double)StartValue());
                     correct = false;
                 }
             }
@@ -3081,10 +3249,252 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
+        {
+            return threadCount;
+        }
+        return CBasicTestMemOrderScope<HostAtomicType,
+                                       HostDataType>::NumResults(threadCount,
+                                                                 deviceID);
+    }
+};
+
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchMinSpecialFloats
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+
+    std::vector<HostDataType> ref_vals;
+
+public:
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+    CBasicTestFetchMinSpecialFloats(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        // StartValue is used as an index divisor in the following test
+        // logic. It is set to the number of special values, which allows
+        // threads to be mapped deterministically onto the input data array.
+        // This enables repeated add operations arranged so that every
+        // special value is added to every other one (“all-to-all”).
+
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            StartValue(spec_vals.size());
+            CBasicTestMemOrderScope<HostAtomicType,
+                                    HostDataType>::OldValueCheck(false);
+        }
+    }
+
+    static std::vector<HostDataType> &GetSpecialValues()
+    {
+        static std::vector<HostDataType> special_values;
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            const HostDataType test_value_zero =
+                static_cast<HostDataType>(0.0f);
+            const HostDataType test_value_minus_zero =
+                static_cast<HostDataType>(-0.0f);
+            const HostDataType test_value_without_fraction =
+                static_cast<HostDataType>(2.0f);
+            const HostDataType test_value_with_fraction =
+                static_cast<HostDataType>(2.2f);
+
+            if (special_values.empty())
+            {
+                special_values = {
+                    static_cast<HostDataType>(test_value_minus_zero),
+                    static_cast<HostDataType>(test_value_zero),
+                    static_cast<HostDataType>(test_value_without_fraction),
+                    static_cast<HostDataType>(test_value_with_fraction),
+                    std::numeric_limits<HostDataType>::infinity(),
+                    std::numeric_limits<HostDataType>::quiet_NaN(),
+                    std::numeric_limits<HostDataType>::signaling_NaN(),
+                    -std::numeric_limits<HostDataType>::infinity(),
+                    -std::numeric_limits<HostDataType>::quiet_NaN(),
+                    -std::numeric_limits<HostDataType>::signaling_NaN(),
+                    std::numeric_limits<HostDataType>::lowest(),
+                    std::numeric_limits<HostDataType>::min(),
+                    std::numeric_limits<HostDataType>::max(),
+                };
+
+                if (0 != (CL_FP_DENORM & gFloatFPConfig))
+                {
+                    special_values.push_back(
+                        std::numeric_limits<HostDataType>::denorm_min());
+                }
+            }
+        }
+
+        return special_values;
+    }
+
+    bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                      MTdata d) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (threadCount > ref_vals.size())
+            {
+                ref_vals.assign(threadCount, 0);
+                auto spec_vals = GetSpecialValues();
+
+                cl_uint total_cnt = 0;
+                while (total_cnt < threadCount)
+                {
+                    cl_uint block_cnt =
+                        std::min((cl_int)(threadCount - total_cnt),
+                                 (cl_int)spec_vals.size());
+                    memcpy(&ref_vals.at(total_cnt), spec_vals.data(),
+                           sizeof(HostDataType) * block_cnt);
+                    total_cnt += block_cnt;
+                }
+            }
+
+            memcpy(startRefValues, ref_vals.data(),
+                   sizeof(HostDataType) * threadCount);
+
+            return true;
+        }
+        return false;
+    }
+    std::string ProgramCore() override
+    {
+        // The start_value variable (set by StartValue) is used
+        // as a divisor of the thread index when selecting the operand for
+        // atomic_fetch_add. This groups threads into blocks corresponding
+        // to the number of special values and implements an “all-to-all”
+        // addition pattern. As a result, each destination element is
+        // updated using different combinations of input values, enabling
+        // consistent comparison between host and device execution.
+
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string(DataType().AddSubOperandTypeName())
+            + "  start_value = atomic_load_explicit(destMemory+tid, "
+              "memory_order_relaxed, memory_scope_work_group);\n"
+              "  atomic_store_explicit(destMemory+tid, oldValues[tid], "
+              "memory_order_relaxed, memory_scope_work_group);\n"
+              "  atomic_fetch_min"
+            + postfix + "(&destMemory[tid], ("
+            + DataType().AddSubOperandTypeName()
+            + ")oldValues[tid/(int)start_value]" + memoryOrderScope + ");\n";
+    }
+    void HostFunction(cl_uint tid, cl_uint threadCount,
+                      volatile HostAtomicType *destMemory,
+                      HostDataType *oldValues) override
+    {
+        auto spec_vals = GetSpecialValues();
+        host_atomic_store(&destMemory[tid], (HostDataType)oldValues[tid],
+                          MEMORY_ORDER_SEQ_CST);
+        host_atomic_fetch_min(&destMemory[tid],
+                              (HostDataType)oldValues[tid / spec_vals.size()],
+                              MemoryOrder());
+    }
+    bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                       HostDataType *startRefValues,
+                       cl_uint whichDestValue) override
+    {
+        expected = StartValue();
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            auto spec_vals = GetSpecialValues();
+            expected =
+                std::min(startRefValues[whichDestValue],
+                         startRefValues[whichDestValue / spec_vals.size()]);
+        }
+        return true;
+    }
+    bool IsTestNotAsExpected(const HostDataType &expected,
+                             const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
+                             cl_uint whichDestValue) override
+    {
+        if (testValues[whichDestValue] != expected)
+        {
+            auto spec_vals = GetSpecialValues();
+            // special cases
+            // min(-0, +0) = min(+0, -0) = +0 or -0,
+            if (((startRefValues[whichDestValue] == -0.f)
+                 && (startRefValues[whichDestValue / spec_vals.size()] == 0.f))
+                || ((startRefValues[whichDestValue] == 0.f)
+                    && (startRefValues[whichDestValue / spec_vals.size()]
+                        == -0.f)))
+                return false;
+            else if (is_qnan(startRefValues[whichDestValue / spec_vals.size()])
+                     || is_qnan(startRefValues[whichDestValue]))
+            {
+                // min(x, qNaN) = min(qNaN, x) = x,
+                // min(qNaN, qNaN) = qNaN,
+                if (is_qnan(startRefValues[whichDestValue / spec_vals.size()])
+                    && is_qnan(startRefValues[whichDestValue]))
+                    return !is_qnan(testValues[whichDestValue]);
+                else if (is_qnan(
+                             startRefValues[whichDestValue / spec_vals.size()]))
+                    return !std::isnan(testValues[whichDestValue])
+                        && testValues[whichDestValue]
+                        != startRefValues[whichDestValue]; // NaN != NaN always
+                                                           // true
+                else
+                    return !std::isnan(testValues[whichDestValue])
+                        && testValues[whichDestValue]
+                        != startRefValues[whichDestValue / spec_vals.size()];
+            }
+            else if (is_snan(startRefValues[whichDestValue / spec_vals.size()])
+                     || is_snan(startRefValues[whichDestValue]))
+            {
+                // min(x, sNaN) = min(sNaN, x) = NaN or x, and
+                // min(NaN, sNaN) = min(sNaN, NaN) = NaN
+                if (std::isnan(testValues[whichDestValue])
+                    || testValues[whichDestValue]
+                        == startRefValues[whichDestValue]
+                    || testValues[whichDestValue]
+                        == startRefValues[whichDestValue / spec_vals.size()])
+                    return false;
+            }
+        }
+
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
+                                                               testValues,
+                                                               startRefValues,
+                                                               whichDestValue);
+    }
+    int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        {
+            if (LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0; // skip test - not applicable
+
+            if (!LocalMemory()
+                && (gFloatAtomicCaps & CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT)
+                    == 0)
+                return 0;
+
+            if (!CBasicTestMemOrderScope<HostAtomicType,
+                                         HostDataType>::LocalMemory()
+                && CBasicTestMemOrderScope<HostAtomicType,
+                                           HostDataType>::DeclaredInProgram())
+            {
+                if ((gFloatFPConfig & CL_FP_INF_NAN) == 0) return 0;
+            }
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
+    {
+        if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
         {
             return threadCount;
         }
@@ -3119,6 +3529,12 @@ static int test_atomic_fetch_min_generic(cl_device_id deviceID,
 
     if (gFloatAtomicsSupported)
     {
+        CBasicTestFetchMinSpecialFloats<HOST_ATOMIC_FLOAT, HOST_FLOAT>
+            test_spec_float(TYPE_ATOMIC_FLOAT, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_spec_float.Execute(deviceID, context, queue, num_elements));
+
         CBasicTestFetchMin<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
             TYPE_ATOMIC_DOUBLE, useSVM);
         EXECUTE_TEST(
@@ -3212,21 +3628,11 @@ public:
                                                                 useSVM),
           min_range(-999.0), max_range(999.0)
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT>)
+        StartValue(DataType().MinValue());
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             CBasicTestMemOrderScope<HostAtomicType,
                                     HostDataType>::OldValueCheck(false);
-            if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-                StartValue(cl_half_from_float(-CL_HALF_MAX, gHalfRoundingMode));
-            else
-                StartValue(-DataType().MaxValue());
-        }
-        else
-        {
-            StartValue(DataType().MinValue());
         }
     }
     std::string ProgramCore() override
@@ -3255,10 +3661,7 @@ public:
                       volatile HostAtomicType *destMemory,
                       HostDataType *oldValues) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             host_atomic_fetch_max(&destMemory[0], oldValues[tid],
                                   MemoryOrder());
@@ -3274,23 +3677,12 @@ public:
     bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
                       MTdata d) override
     {
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             for (cl_uint i = 0; i < threadCount; i++)
             {
-                startRefValues[i] = cl_half_from_float(
-                    get_random_float(min_range, max_range, d),
-                    gHalfRoundingMode);
-            }
-        }
-        else if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
-        {
-            for (cl_uint i = 0; i < threadCount; i++)
-            {
-                startRefValues[i] = get_random_float(min_range, max_range, d);
+                startRefValues[i] = static_cast<HostDataType>(
+                    get_random_float(min_range, max_range, d));
             }
         }
         else
@@ -3313,19 +3705,7 @@ public:
                        cl_uint whichDestValue) override
     {
         expected = StartValue();
-        if constexpr (std::is_same_v<HostDataType, HOST_HALF>)
-        {
-            if (whichDestValue == 0)
-            {
-                for (cl_uint i = 0; i < threadCount; i++)
-                {
-                    if (cl_half_to_float(startRefValues[i])
-                        > cl_half_to_float(expected))
-                        expected = startRefValues[i];
-                }
-            }
-        }
-        else if constexpr (std::is_same_v<HostDataType, HOST_FLOAT>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 for (cl_uint i = 0; i < threadCount; i++)
@@ -3343,36 +3723,36 @@ public:
     }
     bool IsTestNotAsExpected(const HostDataType &expected,
                              const std::vector<HostAtomicType> &testValues,
+                             const std::vector<HostDataType> &startRefValues,
                              cl_uint whichDestValue) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             if (whichDestValue == 0)
                 return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
-                    IsTestNotAsExpected(expected, testValues, whichDestValue);
+                    IsTestNotAsExpected(expected, testValues, startRefValues,
+                                        whichDestValue);
             return false; // ignore all but 0 which stores final result
         }
         return CBasicTestMemOrderScope<
             HostAtomicType, HostDataType>::IsTestNotAsExpected(expected,
                                                                testValues,
+                                                               startRefValues,
                                                                whichDestValue);
     }
     bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues,
                     HostAtomicType *finalValues) override
     {
-        if (std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             correct = true;
             for (cl_uint i = 1; i < threadCount; i++)
             {
                 if (refValues[i] != StartValue())
                 {
-                    log_error("Thread %d found %d mismatch(es)\n", i,
-                              (cl_uint)refValues[i]);
+                    log_error(
+                        "Thread %d found %lf mismatch(es), start value=%lf\n",
+                        i, (double)refValues[i], (double)StartValue());
                     correct = false;
                 }
             }
@@ -3429,10 +3809,7 @@ public:
     }
     cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID) override
     {
-        if constexpr (
-            std::is_same_v<
-                HostDataType,
-                HOST_HALF> || std::is_same_v<HostDataType, HOST_FLOAT> || std::is_same_v<HostDataType, HOST_DOUBLE>)
+        if constexpr (is_host_fp_v<HostDataType>)
         {
             return threadCount;
         }
diff --git a/test_conformance/common/directx_wrapper/directx_wrapper.hpp b/test_conformance/common/directx_wrapper/directx_wrapper.hpp
index dec85b78..fb758880 100644
--- a/test_conformance/common/directx_wrapper/directx_wrapper.hpp
+++ b/test_conformance/common/directx_wrapper/directx_wrapper.hpp
@@ -26,9 +26,9 @@ class DirectXWrapper {
 public:
     DirectXWrapper();
 
-    ID3D12Device* getDXDevice() const;
-    ID3D12CommandQueue* getDXCommandQueue() const;
-    ID3D12CommandAllocator* getDXCommandAllocator() const;
+    [[nodiscard]] ID3D12Device* getDXDevice() const;
+    [[nodiscard]] ID3D12CommandQueue* getDXCommandQueue() const;
+    [[nodiscard]] ID3D12CommandAllocator* getDXCommandAllocator() const;
 
 protected:
     ComPtr<ID3D12Device> dx_device = nullptr;
@@ -39,7 +39,7 @@ protected:
 class DirectXFenceWrapper {
 public:
     DirectXFenceWrapper(ID3D12Device* dx_device);
-    ID3D12Fence* operator*() const { return dx_fence.Get(); }
+    [[nodiscard]] ID3D12Fence* get() const { return dx_fence.Get(); }
 
 private:
     ComPtr<ID3D12Fence> dx_fence = nullptr;
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
index a474102d..b1f1b8c9 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
@@ -104,7 +104,8 @@
     VK_FUNC_DECL(vkGetImageSubresourceLayout)                                  \
     VK_FUNC_DECL(vkCreateDebugUtilsMessengerEXT)                               \
     VK_FUNC_DECL(vkDestroyDebugUtilsMessengerEXT)                              \
-    VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties)
+    VK_FUNC_DECL(vkGetPhysicalDeviceExternalBufferProperties)                  \
+    VK_FUNC_DECL(vkGetPhysicalDeviceFeatures2)
 #define VK_WINDOWS_FUNC_LIST                                                   \
     VK_FUNC_DECL(vkGetMemoryWin32HandleKHR)                                    \
     VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR)                                 \
@@ -209,5 +210,6 @@
 #define vkDestroyDebugUtilsMessengerEXT _vkDestroyDebugUtilsMessengerEXT
 #define vkGetPhysicalDeviceExternalBufferProperties                            \
     _vkGetPhysicalDeviceExternalBufferProperties
+#define vkGetPhysicalDeviceFeatures2 _vkGetPhysicalDeviceFeatures2
 
 #endif //_vulkan_api_list_hpp_
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
index 7254742d..08506d1c 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
@@ -147,6 +147,7 @@ VulkanInstance::VulkanInstance(bool useValidationLayers)
         // return WAIVED;
     }
 
+    VK_GET_NULL_INSTANCE_PROC_ADDR(vkGetPhysicalDeviceFeatures2);
     VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion);
     VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties);
     VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance);
@@ -612,7 +613,8 @@ VulkanDevice::VulkanDevice(const VulkanDevice &device)
 
 VulkanDevice::VulkanDevice(
     const VulkanPhysicalDevice &physicalDevice,
-    const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap)
+    const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap,
+    bool useShaderInt8)
     : m_physicalDevice(physicalDevice), m_vkDevice(NULL)
 {
     uint32_t maxQueueCount = 0;
@@ -676,7 +678,55 @@ VulkanDevice::VulkanDevice(
         enabledExtensionNameList.data();
     vkDeviceCreateInfo.pEnabledFeatures = NULL;
 
-    vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
+    if (useShaderInt8)
+    {
+        VkPhysicalDeviceShaderFloat16Int8Features int8Features{};
+        int8Features.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
+
+        VkPhysicalDevice8BitStorageFeatures storage8Features{};
+        storage8Features.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
+
+        int8Features.pNext = &storage8Features;
+
+        VkPhysicalDeviceFeatures2 features2{};
+        features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+        features2.pNext = &int8Features;
+
+        vkGetPhysicalDeviceFeatures2(physicalDevice, &features2);
+
+        if (!int8Features.shaderInt8
+            || !storage8Features.storageBuffer8BitAccess)
+        {
+            throw std::runtime_error("shaderInt8 not supported!\n");
+        }
+
+        VkPhysicalDevice8BitStorageFeatures storage8Enable{};
+        storage8Enable.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES;
+        storage8Enable.storageBuffer8BitAccess = VK_TRUE;
+
+        VkPhysicalDeviceShaderFloat16Int8Features int8Enable{};
+        int8Enable.sType =
+            VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
+        int8Enable.shaderInt8 = VK_TRUE;
+        int8Enable.pNext = &storage8Enable;
+
+        vkDeviceCreateInfo.pNext = &int8Enable;
+
+        enabledExtensionNameList.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
+        vkDeviceCreateInfo.ppEnabledExtensionNames =
+            enabledExtensionNameList.data();
+        vkDeviceCreateInfo.enabledExtensionCount =
+            (uint32_t)enabledExtensionNameList.size();
+
+        vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
+    }
+    else
+    {
+        vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
+    }
 
     for (uint32_t qfIdx = 0;
          qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size();
@@ -1071,7 +1121,8 @@ VulkanComputePipeline::VulkanComputePipeline(
 
 VulkanComputePipeline::VulkanComputePipeline(
     const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout,
-    const VulkanShaderModule &shaderModule, const std::string &entryFuncName)
+    const VulkanShaderModule &shaderModule, const std::string &entryFuncName,
+    const VkSpecializationInfo *spec)
     : VulkanPipeline(device)
 {
     VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {};
@@ -1084,6 +1135,8 @@ VulkanComputePipeline::VulkanComputePipeline(
     vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str();
     vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL;
 
+    if (spec) vkPipelineShaderStageCreateInfo.pSpecializationInfo = spec;
+
     VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {};
     vkComputePipelineCreateInfo.sType =
         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
index 9cb031f5..25e1d409 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
@@ -148,7 +148,8 @@ public:
     VulkanDevice(
         const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(),
         const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap =
-            getDefaultVulkanQueueFamilyToQueueCountMap());
+            getDefaultVulkanQueueFamilyToQueueCountMap(),
+        bool useShaderInt8 = false);
     virtual ~VulkanDevice();
     const VulkanPhysicalDevice &getPhysicalDevice() const;
     VulkanQueue &
@@ -296,7 +297,8 @@ public:
     VulkanComputePipeline(const VulkanDevice &device,
                           const VulkanPipelineLayout &pipelineLayout,
                           const VulkanShaderModule &shaderModule,
-                          const std::string &entryFuncName = "main");
+                          const std::string &entryFuncName = "main",
+                          const VkSpecializationInfo *spec = nullptr);
     virtual ~VulkanComputePipeline();
     VulkanPipelineBindPoint getPipelineBindPoint() const;
 };
diff --git a/test_conformance/compiler/test_build_helpers.cpp b/test_conformance/compiler/test_build_helpers.cpp
index e8ef3ee8..46a54efd 100644
--- a/test_conformance/compiler/test_build_helpers.cpp
+++ b/test_conformance/compiler/test_build_helpers.cpp
@@ -16,6 +16,7 @@
 #include "testBase.h"
 #include "harness/testHarness.h"
 #include "harness/parseParameters.h"
+#include "harness/stringHelpers.h"
 
 #include <array>
 #include <memory>
@@ -1009,3 +1010,113 @@ REGISTER_TEST(get_program_build_info)
 
     return 0;
 }
+
+cl_int test_kernel_name_len(cl_context context, cl_device_id device,
+                            const cl_uint length)
+{
+    cl_int error = CL_SUCCESS;
+
+    std::string buf = { "abcdefghijklmnopqrstuvwxyz" };
+    std::string name;
+    name.reserve(length);
+
+    for (cl_uint i = 0; i < length; ++i) name += buf[i % buf.size()];
+
+    const char *sample_name_size_test_kernel = R"(
+        __kernel void %s(int src, __global int *dst)
+        {
+            dst[0]=src;
+        }
+    )";
+    std::string program_source =
+        str_sprintf(std::string(sample_name_size_test_kernel), name.c_str());
+    const char *ptr = program_source.c_str();
+
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
+                                            name.c_str());
+        if (error != CL_SUCCESS)
+        {
+            log_error("ERROR: Unable to create program with length of "
+                      "kernel name "
+                      "%d : %s! (%s from %s:%d)\n",
+                      length, name.c_str(), IGetErrorString(error), __FILE__,
+                      __LINE__);
+            return TEST_FAIL;
+        }
+
+        // query kernel name
+        size_t kernel_name_size = 0;
+        error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 0, nullptr,
+                                &kernel_name_size);
+        test_error(error, "clGetKernelInfo (size) failed");
+
+        std::vector<char> kernel_name(kernel_name_size);
+        error = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME,
+                                kernel_name_size, kernel_name.data(), nullptr);
+        test_error(error, "clGetKernelInfo (name) failed");
+
+        if (name != std::string(kernel_name.data()))
+        {
+            log_error("Kernel name mismatch! expected=%s got=%s\n",
+                      name.c_str(), kernel_name.data());
+            return TEST_FAIL;
+        }
+    }
+
+    if (gCompilationMode == kOnline)
+    {
+        clProgramWrapper programObj =
+            clCreateProgramWithSource(context, 1, &ptr, nullptr, &error);
+        test_error(error, "clCreateProgramWithSource failed (compile)");
+
+        error = clCompileProgram(programObj, 0, nullptr, nullptr, 0, nullptr,
+                                 nullptr, nullptr, nullptr);
+        if (error != CL_SUCCESS)
+        {
+            log_error("ERROR: Unable to compile program with length of "
+                      "kernel name "
+                      "%d : %s! (%s from %s:%d)\n",
+                      length, name.c_str(), IGetErrorString(error), __FILE__,
+                      __LINE__);
+            return TEST_FAIL;
+        }
+
+        clProgramWrapper linkedProgram =
+            clLinkProgram(context, 0, nullptr, nullptr, 1, &programObj, nullptr,
+                          nullptr, &error);
+        if (error != CL_SUCCESS)
+        {
+            log_error("ERROR: Unable to link program with length of "
+                      "kernel name "
+                      "%d : %s! (%s from %s:%d)\n",
+                      length, name.c_str(), IGetErrorString(error), __FILE__,
+                      __LINE__);
+            return TEST_FAIL;
+        }
+
+        clKernelWrapper kernel =
+            clCreateKernel(linkedProgram, name.c_str(), &error);
+        test_error(error, "clCreateKernel after link failed");
+    }
+
+    return TEST_PASS;
+}
+
+REGISTER_TEST(kernel_name_size)
+{
+    for (cl_uint len = 32; len <= 2048; len *= 2)
+    {
+        cl_int status = test_kernel_name_len(context, device, len);
+        if (status == TEST_FAIL)
+        {
+            log_error("ERROR: test_kernel_name_len failed with length %d\n",
+                      len);
+            return TEST_FAIL;
+        }
+    }
+    return TEST_PASS;
+}
diff --git a/test_conformance/compiler/test_build_options.cpp b/test_conformance/compiler/test_build_options.cpp
index daba1b0e..15348e06 100644
--- a/test_conformance/compiler/test_build_options.cpp
+++ b/test_conformance/compiler/test_build_options.cpp
@@ -14,7 +14,11 @@
 // limitations under the License.
 //
 #include "testBase.h"
+#include "harness/kernelHelpers.h"
 #include "harness/os_helpers.h"
+#include "harness/testHarness.h"
+
+#include <array>
 
 const char *preprocessor_test_kernel[] = {
 "__kernel void sample_test(__global int *dst)\n"
@@ -42,26 +46,22 @@ const char *include_test_kernel[] = {
 "\n"
 "}\n" };
 
-const char *options_test_kernel[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    size_t tid = get_global_id(0);\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "}\n"
-};
+const char *options_test_kernel[] = { "__kernel void sample_test() {}\n" };
 
-const char *optimization_options[] = {
-    "-cl-single-precision-constant",
-    "-cl-denorms-are-zero",
-    "-cl-opt-disable",
-    "-cl-mad-enable",
-    "-cl-no-signed-zeros",
-    "-cl-unsafe-math-optimizations",
-    "-cl-finite-math-only",
-    "-cl-fast-relaxed-math",
-    "-w",
-    "-Werror",
-    };
+std::array optimization_options{
+    std::pair{ "-cl-single-precision-constant", Version(1, 0) },
+    std::pair{ "-cl-denorms-are-zero", Version(1, 0) },
+    std::pair{ "-cl-opt-disable", Version(1, 0) },
+    std::pair{ "-cl-mad-enable", Version(1, 0) },
+    std::pair{ "-cl-no-signed-zeros", Version(1, 0) },
+    std::pair{ "-cl-unsafe-math-optimizations", Version(1, 0) },
+    std::pair{ "-cl-finite-math-only", Version(1, 0) },
+    std::pair{ "-cl-fast-relaxed-math", Version(1, 0) },
+    std::pair{ "-w", Version(1, 0) },
+    std::pair{ "-Werror", Version(1, 0) },
+    std::pair{ "-cl-uniform-work-group-size", Version(2, 0) },
+    std::pair{ "-cl-no-subgroup-ifp", Version(2, 1) },
+};
 
 cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue )
 {
@@ -93,31 +93,44 @@ REGISTER_TEST(options_build_optimizations)
     int error;
     cl_build_status status;
 
-    for(size_t i = 0; i < sizeof(optimization_options) / (sizeof(char*)); i++) {
+    Version version = get_device_cl_version(device);
 
-        clProgramWrapper program;
-        error = create_single_kernel_helper_create_program(context, &program, 1, options_test_kernel, optimization_options[i]);
-        if( program == NULL || error != CL_SUCCESS )
+    for (const auto &optimization_option : optimization_options)
+    {
+        if (version < optimization_option.second)
         {
-            log_error( "ERROR: Unable to create reference program!\n" );
+            continue;
+        }
+
+        auto build_options = std::string("-cl-std=CL")
+            + get_max_OpenCL_C_for_context(context).to_string() + " "
+            + optimization_option.first;
+        const char *option = build_options.c_str();
+        clProgramWrapper program;
+        error = create_single_kernel_helper_create_program(
+            context, &program, 1, options_test_kernel, option);
+        if (program == NULL || error != CL_SUCCESS)
+        {
+            log_error("ERROR: Unable to create reference program!\n");
             return -1;
         }
 
         /* Build with the macro defined */
-        log_info("Testing optimization option '%s'\n", optimization_options[i]);
-        error = clBuildProgram(program, 1, &device, optimization_options[i],
-                               NULL, NULL);
-        test_error( error, "Test program did not properly build" );
+        log_info("Testing optimization option '%s'\n", option);
+        error = clBuildProgram(program, 1, &device, option, NULL, NULL);
+        test_error(error, "Test program did not properly build");
 
         error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS,
                                       sizeof(status), &status, NULL);
-        test_error( error, "Unable to get program build status" );
+        test_error(error, "Unable to get program build status");
 
-        if( (int)status != CL_BUILD_SUCCESS )
+        if ((int)status != CL_BUILD_SUCCESS)
         {
-            log_info("Building with optimization option '%s' failed to compile!\n", optimization_options[i]);
-            print_error( error, "Failed to build with optimization defined")
-            return -1;
+            log_info(
+                "Building with optimization option '%s' failed to compile!\n",
+                option);
+            print_error(error,
+                        "Failed to build with optimization defined") return -1;
         }
     }
     return 0;
@@ -415,3 +428,53 @@ REGISTER_TEST(options_denorm_cache)
 
     return 0;
 }
+
+REGISTER_TEST(options_uniform_work_group_size)
+{
+    if (get_device_cl_version(device) < Version(2, 0))
+    {
+        return TEST_SKIPPED_ITSELF;
+    }
+    std::string build_options = "-cl-std=CL"
+        + get_max_OpenCL_C_for_context(context).to_string()
+        + " -cl-uniform-work-group-size";
+    const char *options = build_options.c_str();
+    clProgramWrapper program;
+    int error = create_single_kernel_helper_create_program(
+        context, &program, 1, options_test_kernel, options);
+    if (program == NULL || error != CL_SUCCESS)
+    {
+        log_error("Error: Unable to create reference program!\n");
+        return TEST_FAIL;
+    }
+    error = clBuildProgram(program, 1, &device, options, NULL, NULL);
+    test_error(error, "Test program did not properly build");
+
+    clKernelWrapper kernel = clCreateKernel(program, "sample_test", &error);
+    test_error(error, "Unable to create kernel");
+
+    size_t global_work_size = 4;
+    size_t uniform_local_work_size = 2;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size,
+                                   &uniform_local_work_size, 0, NULL, NULL);
+    test_error(error,
+               "Unable to enqueue NDRange kernel with uniform work group size");
+    error = clFinish(queue);
+    test_error(error, "Unable to finish");
+
+    size_t non_uniform_local_work_size = 3;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_work_size,
+                                   &non_uniform_local_work_size, 0, NULL, NULL);
+
+    if (error != CL_INVALID_WORK_GROUP_SIZE)
+    {
+        log_error(
+            "Error: expected error 'CL_INVALID_WORK_GROUP_SIZE' (got '%s') "
+            "trying to enqueue kernel compiled with '%s' with non-uniform work "
+            "group size\n",
+            IGetErrorString(error), options);
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp
index b5a134d1..672291ff 100644
--- a/test_conformance/compiler/test_compile.cpp
+++ b/test_conformance/compiler/test_compile.cpp
@@ -3983,6 +3983,9 @@ REGISTER_TEST(multiple_build_program)
         error = clEnqueueNDRangeKernel(queue, kernel0, 1, NULL, &num_threads,
                                        NULL, 0, NULL, NULL);
         test_error(error, "clEnqueueNDRangeKernel failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
     }
 
     {
@@ -4001,10 +4004,10 @@ REGISTER_TEST(multiple_build_program)
         error = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &num_threads,
                                        NULL, 0, NULL, NULL);
         test_error(error, "clEnqueueNDRangeKernel failed");
-    }
 
-    error = clFinish(queue);
-    test_error(error, "clFinish failed");
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+    }
 
     std::vector<cl_int> test_values(num_threads, 0);
     error = clEnqueueReadBuffer(queue, out_stream_0, true, 0,
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index b9cfb608..04abde9a 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -56,6 +56,7 @@ const char *known_extensions[] = {
     "cl_khr_integer_dot_product",
     "cl_khr_subgroup_rotate",
     "cl_khr_kernel_clock",
+    "cl_khr_icd_unloadable",
     // API-only extensions after this point.  If you add above here, modify
     // first_API_extension below.
     "cl_khr_icd",
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index 0c868764..b3f1098d 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -191,7 +191,7 @@ double sse_mul_sd(double x, double y)
 }
 #endif
 
-#ifdef __PPC__
+#if defined(__PPC__) || defined(__riscv)
 float ppc_mul(float a, float b)
 {
     float p;
@@ -630,9 +630,11 @@ test_status InitCL( cl_device_id device )
             // turn that off
             f3[i] = sse_mul(q, q2);
             f4[i] = sse_mul(-q, q2);
-#elif defined(__PPC__)
-            // None of the current generation PPC processors support HW
-            // FTZ, emulate it in sw.
+#elif (defined(__PPC__) || defined(__riscv))
+            // RISC-V CPUs with default 'f' fp32 extension do not support
+            // enabling/disabling FTZ mode, subnormals are always handled
+            // without FTZ. None of the current generation PPC processors
+            // support HW FTZ, emulate it in sw.
             f3[i] = ppc_mul(q, q2);
             f4[i] = ppc_mul(-q, q2);
 #else
@@ -721,9 +723,10 @@ test_status InitCL( cl_device_id device )
                 skipTest[j][i] = (bufSkip[i] ||
                                   (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
 
-#if defined(__PPC__)
-                // Since the current Power processors don't emulate flush to zero in HW,
-                // it must be emulated in SW instead.
+#if defined(__PPC__) || defined(__riscv)
+                // Since the current Power processors don't emulate flush to
+                // zero in HW, it must be emulated in SW instead. (same for
+                // RISC-V CPUs with 'f' extension)
                 if (gForceFTZ)
                 {
                     if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
@@ -760,7 +763,6 @@ test_status InitCL( cl_device_id device )
                 }
             }
 
-
             double *f  = (double*) buf1;
             double *f2 = (double*) buf2;
             double *f3 = (double*) buf3_double;
diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h
index 6846f780..496ea730 100644
--- a/test_conformance/conversions/basic_test_conversions.h
+++ b/test_conformance/conversions/basic_test_conversions.h
@@ -120,8 +120,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
 uint64_t GetTime(void);
 
 void WriteInputBufferComplete(void *);
-void *FlushToZero(void);
-void UnFlushToZero(void *);
 }
 
 struct CalcRefValsBase
diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp
index 10d7b86d..eb0ea40f 100644
--- a/test_conformance/device_execution/enqueue_block.cpp
+++ b/test_conformance/device_execution/enqueue_block.cpp
@@ -340,7 +340,7 @@ static const char* enqueue_block_capture_event_profiling_info_before_execution[]
 
         set_user_event_status(user_evt, CL_COMPLETE);
 
-        void (^checkBlock) (void)  = ^{ check_res(tid, &value, res);      };
+        void (^checkBlock) (void)  = ^{ check_res(tid, value, res);      };
 
         enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);
         if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp
index 8f71ac4e..4307058a 100644
--- a/test_conformance/device_execution/enqueue_ndrange.cpp
+++ b/test_conformance/device_execution/enqueue_ndrange.cpp
@@ -129,8 +129,8 @@ static const char *helper_ndrange_2d_glo[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -156,8 +156,8 @@ static const char *helper_ndrange_2d_loc[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -193,8 +193,8 @@ static const char *helper_ndrange_2d_ofs[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -233,8 +233,8 @@ static const char *helper_ndrange_3d_glo[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -266,8 +266,8 @@ static const char *helper_ndrange_3d_loc[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
@@ -306,8 +306,8 @@ static const char *helper_ndrange_3d_ofs[] = {
     "}" NL,
     "" NL,
     "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, "
-    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global int* "
-    "val,  __global uint* ofs_arr)" NL,
+    "__global uint* glob_size_arr, __global uint* loc_size_arr, __global "
+    "atomic_uint* val,  __global uint* ofs_arr)" NL,
     "{" NL,
     "  size_t tid = get_global_id(0);" NL,
     "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };" NL,
diff --git a/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp b/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
index 97872613..8e8ced30 100644
--- a/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
+++ b/test_conformance/extensions/cl_ext_buffer_device_address/buffer_device_address.cpp
@@ -14,7 +14,10 @@
 //
 
 #include "harness/typeWrappers.h"
+#include "harness/extensionHelpers.h"
 #include <cinttypes>
+#include <vector>
+#include <string>
 
 #define BUF_SIZE 1024
 #define BUF_SIZE_STR "1024"
@@ -310,8 +313,8 @@ private:
             // A basic buffer used to pass the other buffer's address.
             error = clEnqueueWriteBuffer(queue, buffer_in_long,
                                          CL_TRUE, // block
-                                         0, sizeof(cl_long), &DeviceAddrFromAPI,
-                                         0, NULL, NULL);
+                                         0, sizeof(DeviceAddrFromAPI),
+                                         &DeviceAddrFromAPI, 0, NULL, NULL);
         test_error_fail(error,
                         "clEnqueueWriteBuffer of dev_addr_buffer failed\n");
 
@@ -322,9 +325,9 @@ private:
                                &buffer_out_int);
         test_error_fail(error, "clSetKernelArg 1 failed\n");
 
-        error = clSetKernelExecInfo(ind_access_kernel,
-                                    CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT,
-                                    sizeof(void *), &DeviceAddrFromAPI);
+        error = clSetKernelExecInfo(
+            ind_access_kernel, CL_KERNEL_EXEC_INFO_DEVICE_PTRS_EXT,
+            sizeof(DeviceAddrFromAPI), &DeviceAddrFromAPI);
         test_error_fail(error,
                         "Setting indirect access for "
                         "device ptrs failed!\n");
@@ -421,6 +424,8 @@ private:
 
 REGISTER_TEST(private_address)
 {
+    REQUIRE_EXTENSION("cl_ext_buffer_device_address");
+
     BufferDeviceAddressTest test_fixture = BufferDeviceAddressTest(
         device, context, queue, CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT);
 
@@ -435,3 +440,180 @@ REGISTER_TEST(private_address)
 
     return TEST_PASS;
 }
+
+REGISTER_TEST(private_address_multi_device)
+{
+    REQUIRE_EXTENSION("cl_ext_buffer_device_address");
+
+    cl_platform_id platform = 0;
+    cl_int error = CL_SUCCESS;
+    cl_uint numDevices = 0;
+
+    error = clGetPlatformIDs(1, &platform, NULL);
+    test_error_ret(error, "Unable to get platform\n", TEST_FAIL);
+
+    /* Get some devices */
+    error =
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &numDevices);
+    test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL);
+
+    if (numDevices < 2)
+    {
+        log_info(
+            "WARNING: multi device test unable to get multiple devices via "
+            "CL_DEVICE_TYPE_ALL (got %u devices). Skipping test...\n",
+            numDevices);
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_device_id> devices(numDevices);
+    error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, numDevices,
+                           devices.data(), &numDevices);
+    test_error_ret(error, "Unable to get multiple devices\n", TEST_FAIL);
+
+    GET_PFN(devices[0], clSetKernelArgDevicePointerEXT);
+
+    cl_context_properties properties[] = { CL_CONTEXT_PLATFORM,
+                                           (cl_context_properties)platform, 0 };
+    clContextWrapper ctx = clCreateContext(
+        properties, numDevices, devices.data(), nullptr, nullptr, &error);
+    test_error_ret(error, "Unable to create context\n", TEST_FAIL);
+
+    /* Create buffer */
+    cl_mem_properties props[] = { CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT, CL_TRUE,
+                                  0 };
+    clMemWrapper buffer = clCreateBufferWithProperties(
+        ctx, props, CL_MEM_READ_WRITE, 16, nullptr, &error);
+    std::vector<cl_mem_device_address_ext> addresses(numDevices);
+    error =
+        clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT,
+                           sizeof(cl_mem_device_address_ext) * addresses.size(),
+                           addresses.data(), nullptr);
+    test_error_ret(error, "clGetMemObjectInfo failed\n", TEST_FAIL);
+
+    std::vector<clCommandQueueWrapper> queues(numDevices);
+    for (cl_uint i = 0; i < numDevices; ++i)
+    {
+        queues[i] = clCreateCommandQueue(ctx, devices[i], 0, &error);
+        test_error_ret(error, "Unable to create command queue\n", TEST_FAIL);
+    }
+    static std::string source = R"(
+        void kernel test_device_address(
+            global ulong* ptr,
+            ulong value)
+        {
+            *ptr = value;
+        })";
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *source_ptr = source.data();
+    error = create_single_kernel_helper(ctx, &program, &kernel, 1, &source_ptr,
+                                        "test_device_address");
+    test_error(error, "Unable to create test kernel");
+    for (cl_uint i = 0; i < numDevices; ++i)
+    {
+        cl_command_queue queue = queues[i];
+
+        error = clSetKernelArgDevicePointerEXT(kernel, 0, 0);
+        test_error_fail(error,
+                        "clSetKernelArgDevicePointerEXT failed with NULL "
+                        "pointer argument\n");
+
+        error = clSetKernelArgDevicePointerEXT(kernel, 0, addresses[i] + 8);
+        test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL);
+
+        const cl_ulong pattern = 0xAABBCCDDEEFF0011 + i;
+        error = clSetKernelArg(kernel, 1, sizeof(pattern), &pattern);
+        test_error_ret(error, "Unable to set kernel arg\n", TEST_FAIL);
+
+        size_t gwo = 0;
+        size_t gws = 1;
+        size_t lws = 1;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, &gwo, &gws, &lws, 0,
+                                       nullptr, nullptr);
+        test_error_ret(error, "Unable to enqueue kernel\n", TEST_FAIL);
+
+        error = clFinish(queue);
+        test_error_ret(error, "clFinish failed\n", TEST_FAIL);
+
+        std::vector<cl_ulong> results(2, 0);
+        error = clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0,
+                                    results.size() * sizeof(cl_ulong),
+                                    results.data(), 0, nullptr, nullptr);
+        test_error_ret(error, "clEnqueueReadBuffer failed\n", TEST_FAIL);
+
+        if (results[1] != pattern)
+            test_fail("Test value doesn't match expected value\n");
+    }
+    return TEST_PASS;
+}
+
+REGISTER_TEST(negative_private_address)
+{
+    REQUIRE_EXTENSION("cl_ext_buffer_device_address");
+
+    cl_int error = CL_SUCCESS;
+
+    GET_PFN(device, clSetKernelArgDevicePointerEXT);
+
+    /* Create buffer */
+    clMemWrapper buffer = clCreateBufferWithProperties(
+        context, nullptr, CL_MEM_READ_WRITE, 16, nullptr, &error);
+    cl_mem_device_address_ext address;
+    error = clGetMemObjectInfo(buffer, CL_MEM_DEVICE_ADDRESS_EXT,
+                               sizeof(cl_mem_device_address_ext), &address,
+                               nullptr);
+    test_failure_error_ret(
+        error, CL_INVALID_OPERATION,
+        "clGetMemObjectInfo should return CL_INVALID_OPERATION when: "
+        "\"the buffer was not created with CL_MEM_DEVICE_PRIVATE_ADDRESS_EXT\"",
+        TEST_FAIL);
+
+    static std::string source = R"(
+        void kernel test_device_address(
+            global ulong* ptr,
+            local ulong* ptr2,
+            ulong value)
+        {
+            *ptr = value;
+        })";
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *source_ptr = source.data();
+    error = create_single_kernel_helper(context, &program, &kernel, 1,
+                                        &source_ptr, "test_device_address");
+    test_error(error, "Unable to create test kernel");
+
+    error = clSetKernelArgDevicePointerEXT(nullptr, 0, 0);
+    test_failure_error_ret(
+        error, CL_INVALID_KERNEL,
+        "clSetKernelArgDevicePointerEXT should return CL_INVALID_KERNEL when: "
+        "\"kernel is not a valid kernel object\"",
+        TEST_FAIL);
+
+    error = clSetKernelArgDevicePointerEXT(kernel, 1, 0x15465);
+    test_failure_error_ret(
+        error, CL_INVALID_ARG_INDEX,
+        "clSetKernelArgDevicePointerEXT should return "
+        "CL_INVALID_ARG_INDEX when: "
+        "\"the expected kernel argument is not a pointer to global memory\"",
+        TEST_FAIL);
+
+    error = clSetKernelArgDevicePointerEXT(kernel, 2, 0x15465);
+    test_failure_error_ret(error, CL_INVALID_ARG_INDEX,
+                           "clSetKernelArgDevicePointerEXT should return "
+                           "CL_INVALID_ARG_INDEX when: "
+                           "\"the expected kernel argument is not a pointer\"",
+                           TEST_FAIL);
+
+    error = clSetKernelArgDevicePointerEXT(kernel, 3, 0x15465);
+    test_failure_error_ret(error, CL_INVALID_ARG_INDEX,
+                           "clSetKernelArgDevicePointerEXT should return "
+                           "CL_INVALID_ARG_INDEX when: "
+                           "\"arg_index is not a valid argument index\"",
+                           TEST_FAIL);
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
index aee287fd..56be5c65 100644
--- a/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
+++ b/test_conformance/extensions/cl_ext_cxx_for_opencl/cxx_for_opencl_ext.cpp
@@ -42,8 +42,8 @@ int test_cxx_for_opencl(cl_device_id device, cl_context context,
             execute(*p, x);
         })";
 
-    error = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel1, 1, &kernel_sstr, "k1", "-cl-std=CLC++");
+    error = create_single_kernel_helper(context, &program, &kernel1, 1,
+                                        &kernel_sstr, "k1", "-cl-std=CLC++");
     test_error(error, "Failed to create k1 kernel");
 
     kernel2 = clCreateKernel(program, "k2", &error);
diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
index 9e54fecc..1a50db93 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
@@ -18,6 +18,7 @@ set(${MODULE_NAME}_SOURCES
     command_buffer_test_event_info.cpp
     command_buffer_finalize.cpp
     command_buffer_pipelined_enqueue.cpp
+    command_buffer_kernel_attributes.cpp
     negative_command_buffer_finalize.cpp
     negative_command_buffer_svm_mem.cpp
     negative_command_buffer_copy_image.cpp
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
index aed183ff..bb085035 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
@@ -16,6 +16,7 @@ set(${MODULE_NAME}_SOURCES
     mutable_command_work_groups.cpp
     mutable_command_work_dim.cpp
     mutable_command_update_state.cpp
+    mutable_command_defer_arguments.cpp
     ../basic_command_buffer.cpp
 )
 
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
index 59f07dd7..917183c8 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
@@ -76,12 +76,10 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
 
     bool Skip() override
     {
-        bool extension_avaliable =
-            is_extension_available(device,
-                                   "cl_khr_command_buffer_mutable_dispatch")
-            == true;
+        bool extension_available = is_extension_available(
+            device, "cl_khr_command_buffer_mutable_dispatch");
 
-        if (extension_avaliable)
+        if (extension_available)
         {
             Version device_version = get_device_cl_version(device);
             if ((device_version >= Version(3, 0))
@@ -91,12 +89,12 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
                 cl_version extension_version = get_extension_version(
                     device, "cl_khr_command_buffer_mutable_dispatch");
 
-                if (extension_version != CL_MAKE_VERSION(0, 9, 4))
+                if (extension_version != CL_MAKE_VERSION(0, 9, 5))
                 {
                     log_info("cl_khr_command_buffer_mutable_dispatch version "
-                             "0.9.4 is "
+                             "0.9.5 is "
                              "required to run the test, skipping.\n ");
-                    extension_avaliable = false;
+                    extension_available = false;
                 }
             }
         }
@@ -109,7 +107,7 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
                 sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
             && mutable_capabilities != 0;
 
-        return !mutable_support || !extension_avaliable
+        return !mutable_support || !extension_available
             || BasicCommandBufferTest::Skip();
     }
 
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp
new file mode 100644
index 00000000..2e10cbf8
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_defer_arguments.cpp
@@ -0,0 +1,219 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "testHarness.h"
+#include "mutable_command_basic.h"
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Mutable dispatch test which handles the case where all the arguments of a
+// kernel aren't set when a kernel is initially added to a mutable
+// command-buffer, but deferred until an update is made to the command to set
+// them before command-buffer enqueue.
+struct MutableDispatchDeferArguments : public BasicMutableCommandBufferTest
+{
+    MutableDispatchDeferArguments(cl_device_id device, cl_context context,
+                                  cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    bool Skip() override
+    {
+        if (BasicMutableCommandBufferTest::Skip()) return true;
+        cl_mutable_dispatch_fields_khr mutable_capabilities;
+        bool mutable_support =
+            !clGetDeviceInfo(
+                device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+                sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+            && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
+
+        // require mutable arguments capability
+        return !mutable_support;
+    }
+
+    cl_int SetUpKernel() override
+    {
+        // Create kernel
+        const char *defer_args_kernel =
+            R"(
+            __kernel void defer_args_test(__constant int *src, __global int *dst)
+            {
+                size_t tid = get_global_id(0);
+                dst[tid] = src[tid];
+            })";
+
+        cl_int error =
+            create_single_kernel_helper(context, &program, &kernel, 1,
+                                        &defer_args_kernel, "defer_args_test");
+        test_error(error, "Creating kernel failed");
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUpKernelArgs() override
+    {
+        // Create and initialize buffers
+        MTdataHolder d(gRandomSeed);
+
+        src_data.resize(num_elements);
+        for (size_t i = 0; i < num_elements; i++)
+            src_data[i] = (cl_int)genrand_int32(d);
+
+        cl_int error = CL_SUCCESS;
+        in_mem = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                                num_elements * sizeof(cl_int), src_data.data(),
+                                &error);
+        test_error(error, "Creating src buffer");
+
+        out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                 num_elements * sizeof(cl_int), NULL, &error);
+        test_error(error, "Creating initial dst buffer failed");
+
+        // Only set a single kernel argument, leaving argument at index 1 unset
+        error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
+        test_error(error, "Unable to set src kernel arguments");
+
+        return CL_SUCCESS;
+    }
+
+    bool verify_state(cl_command_buffer_state_khr expected)
+    {
+        cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
+        cl_int error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state), &state,
+            nullptr);
+        if (error != CL_SUCCESS)
+        {
+            log_error("clGetCommandBufferInfoKHR failed: %d", error);
+            return false;
+        }
+
+        if (state != expected)
+        {
+            log_error("Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query. "
+                      "Expected %u, but was %u\n",
+                      expected, state);
+            return false;
+        }
+        return true;
+    }
+
+    bool verify_result(const cl_mem &buffer)
+    {
+        std::vector<cl_int> data(num_elements);
+        cl_int error =
+            clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size(),
+                                data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            if (data[i] != src_data[i])
+            {
+                log_error("Modified verification failed at index %zu: Got %d, "
+                          "wanted %d\n",
+                          i, data[i], src_data[i]);
+                return false;
+            }
+        }
+        return true;
+    }
+
+    cl_int Run() override
+    {
+        // Create command while the kernel still has the second argument unset.
+        // Passing 'CL_MUTABLE_DISPATCH_ARGUMENTS_KHR' as a property means this
+        // shouldn't be an error.
+        cl_command_properties_khr props[] = {
+            CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+            CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+        };
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        // Finalizing the command buffer shouldn't be an error, but result in
+        // the command-buffer entering the CL_COMMAND_BUFFER_STATE_FINALIZED
+        // state.
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        if (!verify_state(CL_COMMAND_BUFFER_STATE_FINALIZED_KHR))
+        {
+            return TEST_FAIL;
+        }
+
+        // Check that trying to enqueue the command-buffer in this state is an
+        // error, as it needs to be in the executable state.
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clEnqueueCommandBufferKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        // Update the kernel command to set the missing argument.
+        cl_mutable_dispatch_arg_khr arg{ 1, sizeof(out_mem), &out_mem };
+        cl_mutable_dispatch_config_khr dispatch_config{
+            command,
+            1 /* num_args */,
+            0 /* num_svm_arg */,
+            0 /* num_exec_infos */,
+            0 /* work_dim - 0 means no change to dimensions */,
+            &arg /* arg_list */,
+            nullptr /* arg_svm_list - nullptr means no change*/,
+            nullptr /* exec_info_list */,
+            nullptr /* global_work_offset */,
+            nullptr /* global_work_size */,
+            nullptr /* local_work_size */
+        };
+
+        cl_uint num_configs = 1;
+        cl_command_buffer_update_type_khr config_types[1] = {
+            CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR
+        };
+        const void *configs[1] = { &dispatch_config };
+        error = clUpdateMutableCommandsKHR(command_buffer, num_configs,
+                                           config_types, configs);
+        test_error(error, "clUpdateMutableCommandsKHR failed");
+
+        // Now that all the arguments have been set, verify the
+        // command-buffer has entered the executable state.
+        if (!verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR))
+        {
+            return TEST_FAIL;
+        }
+
+        // Execute command-buffer and verify results are expected
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+        if (!verify_result(out_mem)) return TEST_FAIL;
+
+        return TEST_PASS;
+    }
+
+    cl_mutable_command_khr command;
+    std::vector<cl_int> src_data;
+};
+
+} // anonymous namespace
+
+REGISTER_TEST(mutable_dispatch_defer_arguments)
+{
+    return MakeAndRunTest<MutableDispatchDeferArguments>(device, context, queue,
+                                                         num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
index b5c8ecd9..8919d5c2 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_full_dispatch.cpp
@@ -20,6 +20,7 @@
 #include <CL/cl.h>
 #include <CL/cl_ext.h>
 
+#include <algorithm>
 #include <vector>
 
 namespace {
@@ -126,7 +127,20 @@ struct MutableCommandFullDispatch : InfoMutableCommandBufferTest
             &workgroupinfo_size, NULL);
         test_error(error, "clGetKernelWorkGroupInfo failed");
 
-        group_size = std::min(num_elements, workgroupinfo_size);
+        cl_uint max_work_dimension = 0;
+        error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+                                sizeof(max_work_dimension), &max_work_dimension,
+                                NULL);
+        test_error(error, "clGetDeviceInfo failed");
+
+        std::vector<size_t> max_work_item_sizes(max_work_dimension, 0);
+        error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                                sizeof(size_t) * max_work_item_sizes.size(),
+                                max_work_item_sizes.data(), NULL);
+        test_error(error, "clGetDeviceInfo failed");
+
+        group_size = std::min(
+            { num_elements, workgroupinfo_size, max_work_item_sizes[0] });
         const size_t size_to_allocate_src = group_size * sizeof(cl_int);
 
         // create and initialize source buffer
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp
new file mode 100644
index 00000000..4eddd1d8
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_kernel_attributes.cpp
@@ -0,0 +1,235 @@
+//
+// Copyright (c) 2025 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "basic_command_buffer.h"
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Tests for cl_khr_command_buffer while enqueueing a kernel with a
+// reqd_work_group_size with a NULL local_work_size.
+
+struct KernelAttributesReqGroupSizeTest : public BasicCommandBufferTest
+{
+    inline static const std::string body_str = R"(
+        __kernel void wg_size(__global int* dst)
+        {
+            if (get_global_id(0) == 0 &&
+                get_global_id(1) == 0 &&
+                get_global_id(2) == 0) {
+                dst[0] = get_local_size(0);
+                dst[1] = get_local_size(1);
+                dst[2] = get_local_size(2);
+            }
+        }
+    )";
+
+    KernelAttributesReqGroupSizeTest(cl_device_id device, cl_context context,
+                                     cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue), dst(nullptr),
+          clGetKernelSuggestedLocalWorkSizeKHR(nullptr),
+          device_max_work_group_size(0)
+    {}
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        if (is_extension_available(device, "cl_khr_suggested_local_work_size"))
+        {
+            cl_platform_id platform = nullptr;
+            error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
+                                    sizeof(platform), &platform, NULL);
+            test_error(error, "clGetDeviceInfo for platform failed");
+
+            clGetKernelSuggestedLocalWorkSizeKHR =
+                (clGetKernelSuggestedLocalWorkSizeKHR_fn)
+                    clGetExtensionFunctionAddressForPlatform(
+                        platform, "clGetKernelSuggestedLocalWorkSizeKHR");
+            test_assert_error(clGetKernelSuggestedLocalWorkSizeKHR != nullptr,
+                              "Couldn't get function pointer for "
+                              "clGetKernelSuggestedLocalWorkSizeKHR");
+        }
+
+        dst = clCreateBuffer(context, CL_MEM_READ_WRITE, 3 * sizeof(cl_int),
+                             nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+
+        cl_uint device_max_dim = 0;
+        error =
+            clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+                            sizeof(device_max_dim), &device_max_dim, nullptr);
+        test_error(
+            error,
+            "clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
+        test_assert_error(
+            device_max_dim >= 3,
+            "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS must be at least 3!");
+
+        device_max_work_item_sizes.resize(device_max_dim);
+        error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                                sizeof(size_t) * device_max_dim,
+                                device_max_work_item_sizes.data(), nullptr);
+
+        error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                                sizeof(device_max_work_group_size),
+                                &device_max_work_group_size, nullptr);
+        test_error(error,
+                   "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+        struct KernelAttribInfo
+        {
+            cl_int wgs[3];
+            cl_uint min_dim;
+        };
+
+        std::vector<KernelAttribInfo> attribs = { { { 2, 1, 1 }, 1 },
+                                                  { { 2, 3, 1 }, 2 },
+                                                  { { 2, 3, 4 }, 3 } };
+
+        for (auto& attrib : attribs)
+        {
+            const std::string attrib_str =
+                "__attribute__((reqd_work_group_size("
+                + std::to_string(attrib.wgs[0]) + ","
+                + std::to_string(attrib.wgs[1]) + ","
+                + std::to_string(attrib.wgs[2]) + ")))";
+            const std::string source_str = attrib_str + body_str;
+            const char* source = source_str.c_str();
+
+            clProgramWrapper program;
+            clKernelWrapper kernel;
+            error = create_single_kernel_helper(context, &program, &kernel, 1,
+                                                &source, "wg_size");
+            test_error(error, "Unable to create test kernel");
+
+            error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dst);
+            test_error(error, "clSetKernelArg failed");
+
+            for (cl_uint work_dim = attrib.min_dim; work_dim <= 3; work_dim++)
+            {
+                const size_t test_work_group_size =
+                    attrib.wgs[0] * attrib.wgs[1] * attrib.wgs[2];
+                if ((size_t)attrib.wgs[0] > device_max_work_item_sizes[0]
+                    || (size_t)attrib.wgs[1] > device_max_work_item_sizes[1]
+                    || (size_t)attrib.wgs[2] > device_max_work_item_sizes[2]
+                    || test_work_group_size > device_max_work_group_size)
+                {
+                    log_info(
+                        "Skipping test for work_dim = %u: required work group "
+                        "size (%i, %i, %i) (total %zu) exceeds device max "
+                        "work group size (%zu, %zu, %zu) (total %zu)\n",
+                        work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
+                        test_work_group_size, device_max_work_item_sizes[0],
+                        device_max_work_item_sizes[1],
+                        device_max_work_item_sizes[2],
+                        device_max_work_group_size);
+                    continue;
+                }
+
+                const cl_int zero = 0;
+                error = clCommandFillBufferKHR(
+                    command_buffer, nullptr, nullptr, dst, &zero, sizeof(zero),
+                    0, sizeof(attrib.wgs), 0, nullptr, nullptr, nullptr);
+                test_error(error, "clCommandFillBufferKHR failed");
+
+                const size_t global_work_size[3] = { 2 * 32, 3 * 32, 4 * 32 };
+                error = clCommandNDRangeKernelKHR(
+                    command_buffer, nullptr, nullptr, kernel, work_dim, nullptr,
+                    global_work_size, nullptr, 0, nullptr, nullptr, nullptr);
+                test_error(error, "clCommandNDRangeKernelKHR failed");
+
+                error = clFinalizeCommandBufferKHR(command_buffer);
+                test_error(error, "clFinalizeCommandBufferKHR failed");
+
+                error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                                  nullptr, nullptr);
+                test_error(error, "clEnqueueCommandBufferKHR failed");
+
+                cl_int results[3] = { -1, -1, -1 };
+                error =
+                    clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(results),
+                                        results, 0, nullptr, nullptr);
+                test_error(error, "clEnqueueReadBuffer failed");
+
+                // Verify the result
+                if (results[0] != attrib.wgs[0] || results[1] != attrib.wgs[1]
+                    || results[2] != attrib.wgs[2])
+                {
+                    log_error(
+                        "Executed local size mismatch with work_dim = %u: "
+                        "Expected (%d,%d,%d) got (%d,%d,%d)\n",
+                        work_dim, attrib.wgs[0], attrib.wgs[1], attrib.wgs[2],
+                        results[0], results[1], results[2]);
+                    return TEST_FAIL;
+                }
+
+                if (clGetKernelSuggestedLocalWorkSizeKHR != nullptr)
+                {
+                    size_t suggested[3] = { 1, 1, 1 };
+                    error = clGetKernelSuggestedLocalWorkSizeKHR(
+                        queue, kernel, work_dim, nullptr, global_work_size,
+                        suggested);
+                    test_error(error,
+                               "clGetKernelSuggestedLocalWorkSizeKHR failed");
+
+                    if (suggested[0] != (size_t)attrib.wgs[0]
+                        || suggested[1] != (size_t)attrib.wgs[1]
+                        || suggested[2] != (size_t)attrib.wgs[2])
+                    {
+                        log_error(
+                            "Suggested local size mismatch with work_dim = "
+                            "%u: Expected (%d,%d,%d) got (%zu,%zu,%zu)\n",
+                            work_dim, attrib.wgs[0], attrib.wgs[1],
+                            attrib.wgs[2], suggested[0], suggested[1],
+                            suggested[2]);
+                        return TEST_FAIL;
+                    }
+                }
+
+                // create new command buffer
+                command_buffer =
+                    clCreateCommandBufferKHR(1, &queue, nullptr, &error);
+                test_error(error, "clCreateCommandBufferKHR failed");
+            }
+        }
+
+        return CL_SUCCESS;
+    }
+
+    clMemWrapper dst;
+    clGetKernelSuggestedLocalWorkSizeKHR_fn
+        clGetKernelSuggestedLocalWorkSizeKHR;
+
+    size_t device_max_work_group_size;
+    std::vector<size_t> device_max_work_item_sizes;
+};
+
+} // anonymous namespace
+
+REGISTER_TEST(command_null_required_work_group_size)
+{
+    return MakeAndRunTest<KernelAttributesReqGroupSizeTest>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp
index 8900e6ff..c1079a4d 100644
--- a/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp
+++ b/test_conformance/extensions/cl_khr_external_memory_ahb/main.cpp
@@ -20,4 +20,4 @@ int main(int argc, const char *argv[])
 {
     return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
                           test_registry::getInstance().definitions(), false, 0);
-}
\ No newline at end of file
+}
diff --git a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
index 5151a668..6220a87e 100644
--- a/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
+++ b/test_conformance/extensions/cl_khr_external_memory_ahb/test_ahb.cpp
@@ -23,9 +23,32 @@
 #include <android/hardware_buffer.h>
 #include "debug_ahb.h"
 
-static bool isAHBUsageReadable(const AHardwareBuffer_UsageFlags usage)
+static bool isAHBUsageReadableHost(AHardwareBuffer_UsageFlags usage)
 {
-    return (AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE & usage) != 0;
+    return (AHARDWAREBUFFER_USAGE_CPU_READ_MASK & usage) != 0;
+}
+
+static bool isAHBUsageWritableHost(AHardwareBuffer_UsageFlags usage)
+{
+    return (AHARDWAREBUFFER_USAGE_CPU_WRITE_MASK & usage) != 0;
+}
+
+static bool isAHBUsageReadableDevice(const AHardwareBuffer_UsageFlags usage)
+{
+    return ((AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE
+             | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER
+             | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA)
+            & usage)
+        != 0;
+}
+
+static cl_ulong getMaxAllocSize(cl_device_id device)
+{
+    cl_ulong ret;
+    cl_int err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                                 sizeof(cl_ulong), &ret, nullptr);
+    test_error(err, "clGetDeviceInfo failed");
+    return ret;
 }
 
 struct ahb_format_table
@@ -50,6 +73,32 @@ ahb_image_size_table test_sizes[] = {
     { 64, 64 }, { 128, 128 }, { 256, 256 }, { 512, 512 }
 };
 
+uint32_t test_buffer_sizes[] = { 2, 8, 32, 128, 512, 2048, 16384, 65536 };
+
+ahb_usage_table test_buffer_usages[] = {
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY
+        | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY
+        | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
+        | AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
+        | AHARDWAREBUFFER_USAGE_SENSOR_DIRECT_DATA) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
+        | AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY) },
+    { static_cast<AHardwareBuffer_UsageFlags>(
+        AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN) }
+};
+
 ahb_usage_table test_usages[] = {
     { static_cast<AHardwareBuffer_UsageFlags>(
         AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN
@@ -96,6 +145,23 @@ static const char *diff_images_kernel_source = {
         })"
 };
 
+static const char *lifetime_kernel_source = {
+    R"(
+            __kernel void increment_buffer(global uchar* buffer)
+            {
+                int tid = get_global_id(0);
+                buffer[tid] ++;
+            }
+
+            __kernel void set_image_color(write_only image2d_t ahb_image, float4 set_color)
+            {
+                int tidX = get_global_id(0);
+                int tidY = get_global_id(1);
+
+                write_imagef(ahb_image, (int2)( tidX, tidY ), set_color);
+            })"
+};
+
 // Checks that the inferred image format is correct
 REGISTER_TEST(images)
 {
@@ -217,8 +283,9 @@ REGISTER_TEST(images_read)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageWritableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -512,8 +579,9 @@ REGISTER_TEST(enqueue_read_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageWritableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -690,8 +758,9 @@ REGISTER_TEST(enqueue_copy_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageWritableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -993,8 +1062,9 @@ REGISTER_TEST(enqueue_copy_image_to_buffer)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageWritableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -1181,8 +1251,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -1222,8 +1292,8 @@ REGISTER_TEST(enqueue_copy_buffer_to_image)
                 imageInfo.type = format.clMemObjectType;
                 imageInfo.width = resolution.width;
                 imageInfo.height = resolution.height;
-                imageInfo.rowPitch = resolution.width * resolution.height
-                    * pixelSize; // data is tightly packed in buffer
+                // data is tightly packed in buffer
+                imageInfo.rowPitch = resolution.width * pixelSize;
                 test_assert_error(imageInfo.rowPitch
                                       >= pixelSize * imageInfo.width,
                                   "Row pitch is smaller than width");
@@ -1376,8 +1446,8 @@ REGISTER_TEST(enqueue_write_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -1429,8 +1499,8 @@ REGISTER_TEST(enqueue_write_image)
                 imageInfo.type = format.clMemObjectType;
                 imageInfo.width = resolution.width;
                 imageInfo.height = resolution.height;
-                imageInfo.rowPitch = resolution.width * resolution.height
-                    * pixelSize; // Data is tightly packed
+                // Data is tightly packed
+                imageInfo.rowPitch = resolution.width * pixelSize;
                 test_assert_error(imageInfo.rowPitch
                                       >= pixelSize * imageInfo.width,
                                   "Row pitch is smaller than width");
@@ -1568,8 +1638,8 @@ REGISTER_TEST(enqueue_fill_image)
         aHardwareBufferDesc.format = format.aHardwareBufferFormat;
         for (auto usage : test_usages)
         {
-            // Filter out usage flags that are not readable on device
-            if (!isAHBUsageReadable(usage.usageFlags))
+            if (!(isAHBUsageReadableHost(usage.usageFlags)
+                  && isAHBUsageReadableDevice(usage.usageFlags)))
             {
                 continue;
             }
@@ -1620,8 +1690,8 @@ REGISTER_TEST(enqueue_fill_image)
                 imageInfo.type = format.clMemObjectType;
                 imageInfo.width = resolution.width;
                 imageInfo.height = resolution.height;
-                imageInfo.rowPitch = resolution.width * resolution.height
-                    * pixelSize; // Data is tightly packed
+                imageInfo.rowPitch = resolution.width * pixelSize;
+                // Data is tightly packed
                 test_assert_error(imageInfo.rowPitch
                                       >= pixelSize * imageInfo.width,
                                   "Row pitch is smaller than width");
@@ -1857,3 +1927,412 @@ REGISTER_TEST(blob)
 
     return TEST_PASS;
 }
+
+/*
+ * For cl buffer and cl image
+ *  Create a AHB
+ *  Create a mem object from the AHB
+ *  Release the AHB
+ *  Read and write using the mem object
+ *  Verify reads and writes
+ */
+REGISTER_TEST(lifetime_buffer)
+{
+    REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer");
+
+    cl_int err;
+    constexpr cl_uint buffer_size = 4096;
+    std::vector<uint8_t> host_buffer(buffer_size, 1);
+    clMemWrapper imported_buffer;
+
+    {
+        // Check if AHB descriptors for buffers and images are supported
+        AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
+        aHardwareBufferDesc.width = buffer_size;
+        aHardwareBufferDesc.height = 1;
+        aHardwareBufferDesc.layers = 1;
+        aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
+        aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
+            | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
+
+        if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
+        {
+            log_unsupported_ahb_format(aHardwareBufferDesc);
+            return TEST_SKIPPED_ITSELF;
+        }
+
+        log_info("Testing buffer lifetime\n");
+
+        AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
+
+        const cl_mem_properties props[] = {
+            CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
+            aHardwareBuffer.get_props(),
+            0,
+        };
+
+        imported_buffer = clCreateBufferWithProperties(
+            context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
+        test_error(err, "Failed to create CL buffer from AHardwareBuffer");
+
+        // Fill AHB buffer
+        void *data_ptr = nullptr;
+        int ahb_result = AHardwareBuffer_lock(
+            aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1, nullptr,
+            &data_ptr);
+        if (ahb_result != 0)
+        {
+            log_error("AHardwareBuffer_lock failed with code %d\n", ahb_result);
+            return TEST_FAIL;
+        }
+
+        memcpy(data_ptr, host_buffer.data(), buffer_size);
+
+        ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
+        if (ahb_result != 0)
+        {
+            log_error("AHardwareBuffer_unlock failed with code %d\n",
+                      ahb_result);
+            return TEST_FAIL;
+        }
+    } // Release test scope reference to AHB
+
+
+    // Verify buffer read by comparing to host buffer
+    std::vector<uint8_t> read_buffer(buffer_size);
+    err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size,
+                              read_buffer.data(), 0, nullptr, nullptr);
+    test_error(err, "failed clEnqueueReadBuffer");
+
+    for (size_t i = 0; i < buffer_size; i++)
+    {
+        if (read_buffer[i] != host_buffer[i])
+        {
+            log_error("At position %zu expected value: %u but got value: %u\n",
+                      i, host_buffer[i], read_buffer[i]);
+            return TEST_FAIL;
+        }
+    }
+
+    // Attempt buffer write
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &lifetime_kernel_source,
+                                      "increment_buffer");
+    test_error(err, "kernel creation failed");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_buffer);
+    test_error(err, "clSetKernelArg failed");
+
+    size_t gws[1] = { buffer_size };
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, gws, nullptr, 0,
+                                 nullptr, nullptr);
+    test_error(err, "Failed clEnqueueNDRangeKernel");
+
+    // Verify write
+    err = clEnqueueReadBuffer(queue, imported_buffer, true, 0, buffer_size,
+                              read_buffer.data(), 0, nullptr, nullptr);
+    test_error(err, "failed clEnqueueReadBuffer");
+
+    for (size_t i = 0; i < buffer_size; i++)
+    {
+        if (read_buffer[i]
+            != host_buffer[i] + 1) // Kernel incremented each index by 1
+        {
+            log_error("At position %zu expected value: %u but got value: %u\n",
+                      i, host_buffer[i], read_buffer[i]);
+            return TEST_FAIL;
+        }
+    }
+
+    return TEST_PASS;
+}
+
+
+REGISTER_TEST(lifetime_image)
+{
+    REQUIRE_EXTENSION("cl_khr_external_memory_android_hardware_buffer");
+
+    int err;
+    const AHardwareBuffer_Format aHardwareBufferFormat =
+        AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM;
+    const cl_image_format clImageFormat = { CL_RGBA, CL_UNORM_INT8 };
+    const size_t pixel_size = get_pixel_size(&clImageFormat);
+
+    for (auto resolution : test_sizes)
+    {
+        const size_t image_size =
+            resolution.width * resolution.height * pixel_size;
+
+        std::vector<uint8_t> host_image_data(image_size, 1);
+        clMemWrapper imported_image;
+        {
+            // Check if AHB descriptors for buffers and images are supported
+            AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
+            aHardwareBufferDesc.width = resolution.width;
+            aHardwareBufferDesc.height = resolution.height;
+            aHardwareBufferDesc.layers = 1;
+            aHardwareBufferDesc.format = aHardwareBufferFormat;
+            aHardwareBufferDesc.usage = AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN
+                | AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN;
+
+            if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
+            {
+                log_unsupported_ahb_format(aHardwareBufferDesc);
+                continue;
+            }
+
+            log_info("Testing image lifetime\n");
+
+            AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
+
+            const cl_mem_properties props_image[] = {
+                CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
+                aHardwareBuffer.get_props(),
+                0,
+            };
+
+            imported_image = clCreateImageWithProperties(
+                context, props_image, CL_MEM_READ_WRITE, nullptr, nullptr,
+                nullptr, &err);
+            test_error(err, "Failed to create CL image from AHardwareBuffer");
+
+            void *data_ptr = nullptr;
+            int ahb_result = AHardwareBuffer_lock(
+                aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN, -1,
+                nullptr, &data_ptr);
+            if (ahb_result != 0)
+            {
+                log_error("AHardwareBuffer_lock failed with code %d\n",
+                          ahb_result);
+                return TEST_FAIL;
+            }
+
+            memcpy(data_ptr, host_image_data.data(), image_size);
+
+            ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
+            if (ahb_result != 0)
+            {
+                log_error("AHardwareBuffer_unlock failed with code %d\n",
+                          ahb_result);
+                return TEST_FAIL;
+            }
+        } // Release test scope reference to AHB
+
+
+        // Verify image read using host data
+        size_t origin[3] = { 0, 0, 0 };
+        size_t region[3] = { resolution.width, resolution.height, 1 };
+        size_t row_pitch;
+        uint8_t *mapped_image_ptr = static_cast<uint8_t *>(clEnqueueMapImage(
+            queue, imported_image, true, CL_MAP_READ, origin, region,
+            &row_pitch, nullptr, 0, nullptr, nullptr, &err));
+        test_error(err, "clEnqueueMapImage failed");
+
+        for (size_t row = 0; row < resolution.height; ++row)
+        {
+            for (size_t col = 0; col < resolution.width; ++col)
+            {
+                size_t mapped_image_idx = row * row_pitch + col;
+                size_t host_image_idx = row * resolution.width + col;
+                if (mapped_image_ptr[mapped_image_idx]
+                    != host_image_data[host_image_idx])
+                {
+                    log_error(
+                        "At position (%zu, %zu) expected value: %u but got "
+                        "value: %u\n",
+                        row, col, host_image_data[host_image_idx],
+                        mapped_image_ptr[mapped_image_idx]);
+                    return TEST_FAIL;
+                }
+            }
+        }
+
+        err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr,
+                                      0, nullptr, nullptr);
+        test_error(err, "clEnqueueUnmapMemObject failed");
+
+        err = clFinish(queue);
+        test_error(err, "clFinish failed");
+
+
+        // Attempt image write
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                          &lifetime_kernel_source,
+                                          "set_image_color");
+        test_error(err, "kernel creation failed");
+
+        err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imported_image);
+        test_error(err, "clSetKernelArg failed");
+
+        cl_float4 color = { { 0.5f, 0.5f, 0.5f, 0.5f } };
+        err = clSetKernelArg(kernel, 1, sizeof(cl_float4), &color);
+        test_error(err, "clSetKernelArg failed");
+
+        std::vector<size_t> gws = { resolution.width, resolution.height };
+        err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, gws.data(),
+                                     nullptr, 0, nullptr, nullptr);
+        test_error(err, "Failed clEnqueueNDRangeKernel");
+
+        err = clFinish(queue);
+        test_error(err, "clFinish failed");
+
+
+        // Verify image write
+        mapped_image_ptr = static_cast<uint8_t *>(clEnqueueMapImage(
+            queue, imported_image, true, CL_MAP_READ, origin, region,
+            &row_pitch, nullptr, 0, nullptr, nullptr, &err));
+        test_error(err, "clEnqueueMapImage failed");
+
+        for (size_t row = 0; row < resolution.height; ++row)
+        {
+            for (size_t col = 0; col < resolution.width; ++col)
+            {
+                size_t mapped_image_idx = row * row_pitch + col;
+                if (128 != mapped_image_ptr[mapped_image_idx])
+                {
+                    log_error(
+                        "At position (%zu, %zu) expected value: %u but got "
+                        "value: %u\n",
+                        row, col, 128, mapped_image_ptr[mapped_image_idx]);
+                    return TEST_FAIL;
+                }
+            }
+        }
+
+        err = clEnqueueUnmapMemObject(queue, imported_image, mapped_image_ptr,
+                                      0, nullptr, nullptr);
+        test_error(err, "clEnqueueUnmapMemObject failed");
+
+        err = clFinish(queue);
+        test_error(err, "clFinish failed");
+    }
+    return TEST_PASS;
+}
+
+
+/* Testing clCreateSubBuffer
+ *  Create AHB
+ *  Write to AHB
+ *  Create CL buffer from AHB
+ *  Create a sub buffer into half of the buffer
+ *  Read & verify sub buffer
+ */
+REGISTER_TEST(sub_buffer)
+{
+    cl_int err;
+    RandomSeed seed(gRandomSeed);
+
+    if (!is_extension_available(
+            device, "cl_khr_external_memory_android_hardware_buffer"))
+    {
+        log_info("cl_khr_external_memory_android_hardware_buffer is not "
+                 "supported on this platform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    AHardwareBuffer_Desc aHardwareBufferDesc = { 0 };
+    aHardwareBufferDesc.format = AHARDWAREBUFFER_FORMAT_BLOB;
+    for (auto usage : test_buffer_usages)
+    {
+        if (!(isAHBUsageReadableHost(usage.usageFlags)
+              && isAHBUsageWritableHost(usage.usageFlags)
+              && isAHBUsageReadableDevice(usage.usageFlags)))
+        {
+            continue;
+        }
+
+        aHardwareBufferDesc.usage = usage.usageFlags;
+        for (uint32_t buffer_size : test_buffer_sizes)
+        {
+            if (buffer_size > getMaxAllocSize(device))
+            {
+                continue;
+            }
+
+            aHardwareBufferDesc.width = buffer_size;
+            aHardwareBufferDesc.height = 1;
+            aHardwareBufferDesc.layers = 1;
+            if (!AHardwareBuffer_isSupported(&aHardwareBufferDesc))
+            {
+                log_unsupported_ahb_format(aHardwareBufferDesc);
+                continue;
+            }
+
+            AHardwareBufferWrapper aHardwareBuffer(&aHardwareBufferDesc);
+
+            log_info("Testing usage: %s, buffer size: %u\n",
+                     ahardwareBufferDecodeUsageFlagsToString(usage.usageFlags)
+                         .c_str(),
+                     buffer_size);
+
+            void *hardware_buffer_data = nullptr;
+            int ahb_result = AHardwareBuffer_lock(
+                aHardwareBuffer, AHARDWAREBUFFER_USAGE_CPU_WRITE_RARELY, -1,
+                nullptr, &hardware_buffer_data);
+            if (ahb_result != 0)
+            {
+                log_error("AHardwareBuffer_lock failed with code %d\n",
+                          ahb_result);
+                return TEST_FAIL;
+            }
+
+            std::vector<uint8_t> host_buffer(buffer_size);
+
+            generate_random_data(ExplicitType::kUnsignedChar, buffer_size, seed,
+                                 host_buffer.data());
+
+            memcpy(hardware_buffer_data, host_buffer.data(), buffer_size);
+
+            ahb_result = AHardwareBuffer_unlock(aHardwareBuffer, nullptr);
+            if (ahb_result != 0)
+            {
+                log_error("AHardwareBuffer_unlock failed with code %d\n",
+                          ahb_result);
+                return TEST_FAIL;
+            }
+
+            cl_mem_properties props[] = {
+                CL_EXTERNAL_MEMORY_HANDLE_ANDROID_HARDWARE_BUFFER_KHR,
+                aHardwareBuffer.get_props(), 0
+            };
+
+            clMemWrapper buffer = clCreateBufferWithProperties(
+                context, props, CL_MEM_READ_WRITE, 0, nullptr, &err);
+            test_error(err, "Failed to create CL buffer from AHardwareBuffer");
+
+            cl_uint sub_buffer_size = buffer_size / 2;
+            cl_buffer_region region = { 0 };
+            region.origin = 0;
+            region.size = sub_buffer_size;
+
+            clMemWrapper sub_buffer =
+                clCreateSubBuffer(buffer, CL_MEM_READ_WRITE,
+                                  CL_BUFFER_CREATE_TYPE_REGION, &region, &err);
+            test_error(err, "clCreateSubBuffer failed");
+
+            std::vector<uint8_t> host_sub_buffer(sub_buffer_size);
+            err = clEnqueueReadBuffer(queue, sub_buffer, true, 0,
+                                      sub_buffer_size, host_sub_buffer.data(),
+                                      0, nullptr, nullptr);
+            test_error(err, "clEnqueueReadBuffer failed");
+
+            for (size_t i = 0; i < sub_buffer_size; ++i)
+            {
+                if (host_buffer[i] != host_sub_buffer[i])
+                {
+                    log_error(
+                        "At position i=%zu expected value %u but got %u\n", i,
+                        host_buffer[i], host_sub_buffer[i]);
+                    return TEST_FAIL;
+                }
+            }
+        }
+    }
+    return TEST_PASS;
+}
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
index 1161e519..34d9034b 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
@@ -3,7 +3,6 @@ set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE)
 set(${MODULE_NAME}_SOURCES
     main.cpp
     test_external_semaphore.cpp
-    test_external_semaphore_sync_fd.cpp
 )
 
 set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}")
@@ -18,7 +17,6 @@ include_directories (${CLConform_INCLUDE_DIR})
 
 list(APPEND CLConform_LIBRARIES vulkan_wrapper)
 set(CMAKE_COMPILE_WARNING_AS_ERROR OFF)
-set(CMAKE_CXX_FLAGS "-fpermissive")
 
 include_directories("../../common/vulkan_wrapper")
 
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
index 198bf046..1e9fd74a 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
@@ -197,8 +197,11 @@ REGISTER_TEST_VERSION(external_semaphores_queries, Version(1, 2))
     return TEST_PASS;
 }
 
-REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
+cl_int doTestImportExport(cl_device_id device, cl_context contexts[2],
+                          cl_command_queue queues[2])
 {
+    cl_int err = CL_SUCCESS;
+
     REQUIRE_EXTENSION("cl_khr_external_semaphore");
 
     GET_PFN(device, clEnqueueSignalSemaphoresKHR);
@@ -210,7 +213,6 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
     std::vector<cl_external_semaphore_handle_type_khr> import_handle_types;
     std::vector<cl_external_semaphore_handle_type_khr> export_handle_types;
 
-    cl_int err = CL_SUCCESS;
     err = get_device_semaphore_handle_types(
         device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
         import_handle_types);
@@ -237,17 +239,9 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
         export_handle_types.begin(), export_handle_types.end(),
         std::back_inserter(import_export_handle_types));
 
-    cl_context context2 =
-        clCreateContext(NULL, 1, &device, notify_callback, NULL, &err);
-    test_error(err, "Failed to create context2");
-
-    clCommandQueueWrapper queue1 =
-        clCreateCommandQueue(context, device, 0, &err);
-    test_error(err, "Could not create command queue");
-
-    clCommandQueueWrapper queue2 =
-        clCreateCommandQueue(context2, device, 0, &err);
-    test_error(err, "Could not create command queue");
+    cl_context& context2 = contexts[1];
+    cl_command_queue& queue1 = queues[0];
+    cl_command_queue& queue2 = queues[1];
 
     if (import_export_handle_types.empty())
     {
@@ -270,7 +264,7 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
 
         // Signal semaphore on context1
         cl_semaphore_khr exportable_semaphore =
-            clCreateSemaphoreWithPropertiesKHR(context, export_props, &err);
+            clCreateSemaphoreWithPropertiesKHR(contexts[0], export_props, &err);
         test_error(err, "Failed to create exportable semaphore");
 
         err = clEnqueueSignalSemaphoresKHR(queue1, 1, &exportable_semaphore,
@@ -313,12 +307,77 @@ REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
         test_error(err, "Failed to release semaphore");
     }
 
-    err = clReleaseContext(context2);
-    test_error(err, "Failed to release context2");
-
     return TEST_PASS;
 }
 
+REGISTER_TEST_VERSION(external_semaphores_cross_context, Version(1, 2))
+{
+    cl_int err = CL_SUCCESS;
+
+    clContextWrapper context_sec =
+        clCreateContext(NULL, 1, &device, notify_callback, NULL, &err);
+    test_error(err, "Failed to create context2");
+    cl_context contexts[2] = { context, context_sec };
+
+    clCommandQueueWrapper queue0 =
+        clCreateCommandQueue(context, device, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    clCommandQueueWrapper queue1 =
+        clCreateCommandQueue(contexts[1], device, 0, &err);
+    test_error(err, "Could not create command queue");
+    cl_command_queue queues[2] = { queue0, queue1 };
+
+    return doTestImportExport(device, contexts, queues);
+}
+
+REGISTER_TEST_VERSION(external_semaphores_import_export, Version(1, 2))
+{
+    cl_int err = CL_SUCCESS;
+    cl_int total_status = TEST_PASS;
+
+    // test external semaphores with out-of-order queue
+    {
+        cl_command_queue_properties device_props = 0;
+        err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
+                              sizeof(device_props), &device_props, NULL);
+        test_error(err,
+                   "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+        if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0)
+        {
+            // Create ooo queue
+            clCommandQueueWrapper test_queue = clCreateCommandQueue(
+                context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+            test_error(err, "Could not create command queue");
+
+            cl_command_queue queues[2] = { test_queue, test_queue };
+            cl_context contexts[2] = { context, context };
+
+            cl_int status = doTestImportExport(device, contexts, queues);
+            if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
+            {
+                total_status = TEST_FAIL;
+            }
+        }
+    }
+
+    // test external semaphore sync fd with in-order harness queue
+    {
+        cl_command_queue queues[2] = { queue, queue };
+        cl_context contexts[2] = { context, context };
+
+        cl_int status = doTestImportExport(device, contexts, queues);
+        if (status != TEST_PASS && status != TEST_SKIPPED_ITSELF)
+        {
+            total_status = TEST_FAIL;
+        }
+    }
+
+    return total_status;
+}
+
+
 // Confirm that a signal followed by a wait will complete successfully
 REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
 {
@@ -366,9 +425,14 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
 
         // Signal semaphore
         clEventWrapper signal_event;
-        err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
-                                           nullptr, 0, nullptr, &signal_event);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 1, &sema_ext.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_event);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // Wait semaphore
         clEventWrapper wait_event;
@@ -381,7 +445,11 @@ REGISTER_TEST_VERSION(external_semaphores_simple_1, Version(1, 2))
         test_error(err, "Could not finish queue");
 
         // Ensure all events are completed
-        test_assert_event_complete(signal_event);
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            test_assert_event_complete(signal_event);
+        }
         test_assert_event_complete(wait_event);
     }
 
@@ -405,6 +473,7 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
     // Obtain pointers to semaphore's API
     GET_PFN(device, clEnqueueSignalSemaphoresKHR);
     GET_PFN(device, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(device, clReImportSemaphoreSyncFdKHR);
 
     std::vector<VulkanExternalSemaphoreHandleType>
         vkExternalSemaphoreHandleTypeList =
@@ -448,11 +517,15 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
         err = clEnqueueTask(queue, kernel, 0, nullptr, &task_events[0]);
         test_error(err, "Unable to enqueue task_1");
 
-        // Signal semaphore (dependency on task_1)
-        err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
-                                           nullptr, 1, &task_events[0],
-                                           &signal_events[0]);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            // Signal semaphore (dependency on task_1)
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
+                &task_events[0], &signal_events[0]);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // In a loop
         size_t loop;
@@ -473,11 +546,21 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
             err = clWaitForEvents(1, &wait_events[loop - 1]);
             test_error(err, "Unable to wait for wait semaphore to complete");
 
-            // Signal semaphore (dependency on task_loop)
-            err = clEnqueueSignalSemaphoresKHR(
-                queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
-                &task_events[loop], &signal_events[loop]);
-            test_error(err, "Could not signal semaphore");
+            if (vkExternalSemaphoreHandleType
+                == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+            {
+                err = clReImportSemaphoreSyncFdKHR(sema_ext.getCLSemaphore(),
+                                                   nullptr, -1);
+                test_error(err, "Could not reimport semaphore sync fd");
+            }
+            else
+            {
+                // Signal semaphore (dependency on task_loop)
+                err = clEnqueueSignalSemaphoresKHR(
+                    queue, 1, &sema_ext.getCLSemaphore(), nullptr, 1,
+                    &task_events[loop], &signal_events[loop]);
+                test_error(err, "Could not signal semaphore");
+            }
         }
 
         // Wait semaphore
@@ -494,7 +577,11 @@ REGISTER_TEST_VERSION(external_semaphores_reuse, Version(1, 2))
         for (loop = 0; loop < loop_count; ++loop)
         {
             test_assert_event_complete(wait_events[loop]);
-            test_assert_event_complete(signal_events[loop]);
+            if (vkExternalSemaphoreHandleType
+                != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+            {
+                test_assert_event_complete(signal_events[loop]);
+            }
             test_assert_event_complete(task_events[loop]);
         }
     }
@@ -536,6 +623,19 @@ static int external_semaphore_cross_queue_helper(cl_device_id device,
     for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
          vkExternalSemaphoreHandleTypeList)
     {
+        if (vkExternalSemaphoreHandleType
+            == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            std::stringstream log_message;
+            log_message
+                << "Skipping semaphore type: \""
+                << vkExternalSemaphoreHandleType
+                << "\"; it cannot be signaled from OpenCL when imported."
+                << std::endl;
+            log_info("%s", log_message.str().c_str());
+            continue;
+        }
+
         log_info_semaphore_type(vkExternalSemaphoreHandleType);
         VulkanSemaphore vkVk2CLSemaphore(vkDevice,
                                          vkExternalSemaphoreHandleType);
@@ -668,10 +768,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
 
         // Signal semaphore 1
         clEventWrapper signal_1_event;
-        err = clEnqueueSignalSemaphoresKHR(
-            queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
-            &signal_1_event);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            err = clEnqueueSignalSemaphoresKHR(
+                queue1, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_1_event);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // Wait semaphore 1
         clEventWrapper wait_1_event;
@@ -682,10 +786,14 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
 
         // Signal semaphore 2
         clEventWrapper signal_2_event;
-        err = clEnqueueSignalSemaphoresKHR(
-            queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
-            &signal_2_event);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            err = clEnqueueSignalSemaphoresKHR(
+                queue2, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_2_event);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // Wait semaphore 2
         clEventWrapper wait_2_event;
@@ -702,8 +810,12 @@ REGISTER_TEST_VERSION(external_semaphores_cross_queues_io2, Version(1, 2))
         test_error(err, "Could not finish queue");
 
         // Ensure all events are completed
-        test_assert_event_complete(signal_1_event);
-        test_assert_event_complete(signal_2_event);
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            test_assert_event_complete(signal_1_event);
+            test_assert_event_complete(signal_2_event);
+        }
         test_assert_event_complete(wait_1_event);
         test_assert_event_complete(wait_2_event);
     }
@@ -741,6 +853,19 @@ REGISTER_TEST_VERSION(external_semaphores_multi_signal, Version(1, 2))
     for (VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType :
          vkExternalSemaphoreHandleTypeList)
     {
+        if (vkExternalSemaphoreHandleType
+            == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            std::stringstream log_message;
+            log_message
+                << "Skipping semaphore type: \""
+                << vkExternalSemaphoreHandleType
+                << "\"; it cannot be signaled from OpenCL when imported."
+                << std::endl;
+            log_info("%s", log_message.str().c_str());
+            continue;
+        }
+
         log_info_semaphore_type(vkExternalSemaphoreHandleType);
         VulkanSemaphore vkVk2CLSemaphore1(vkDevice,
                                           vkExternalSemaphoreHandleType);
@@ -842,19 +967,23 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2))
             context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
         test_error(err, "Could not create command queue");
 
-        // Signal semaphore 1
         clEventWrapper signal_1_event;
-        err =
-            clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
-                                         nullptr, 0, nullptr, &signal_1_event);
-        test_error(err, "Could not signal semaphore");
-
-        // Signal semaphore 2
         clEventWrapper signal_2_event;
-        err =
-            clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_2.getCLSemaphore(),
-                                         nullptr, 0, nullptr, &signal_2_event);
-        test_error(err, "Could not signal semaphore");
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            // Signal semaphore 1
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 1, &sema_ext_1.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_1_event);
+            test_error(err, "Could not signal semaphore");
+
+            // Signal semaphore 2
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 1, &sema_ext_2.getCLSemaphore(), nullptr, 0, nullptr,
+                &signal_2_event);
+            test_error(err, "Could not signal semaphore");
+        }
 
         // Wait semaphore 1 and 2
         clEventWrapper wait_event;
@@ -869,8 +998,12 @@ REGISTER_TEST_VERSION(external_semaphores_multi_wait, Version(1, 2))
         test_error(err, "Could not finish queue");
 
         // Ensure all events are completed
-        test_assert_event_complete(signal_1_event);
-        test_assert_event_complete(signal_2_event);
+        if (vkExternalSemaphoreHandleType
+            != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD)
+        {
+            test_assert_event_complete(signal_1_event);
+            test_assert_event_complete(signal_2_event);
+        }
         test_assert_event_complete(wait_event);
     }
 
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp
deleted file mode 100644
index 2fcf4f3f..00000000
--- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore_sync_fd.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-//
-// Copyright (c) 2024 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "harness/typeWrappers.h"
-#include "harness/extensionHelpers.h"
-#include "harness/errorHelpers.h"
-
-// Test it is possible to export a semaphore to a sync fd and import the same
-// sync fd to a new semaphore
-REGISTER_TEST_VERSION(external_semaphores_import_export_fd, Version(1, 2))
-{
-    cl_int err = CL_SUCCESS;
-
-    if (!is_extension_available(device, "cl_khr_external_semaphore"))
-    {
-        log_info(
-            "cl_khr_external_semaphore is not supported on this platoform. "
-            "Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    if (!is_extension_available(device, "cl_khr_external_semaphore_sync_fd"))
-    {
-        log_info("cl_khr_external_semaphore_sync_fd is not supported on this "
-                 "platoform. Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    cl_command_queue_properties device_props = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES,
-                          sizeof(device_props), &device_props, NULL);
-    test_error(err, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
-
-    if ((device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0)
-    {
-        log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not "
-                 "supported. Skipping test.\n");
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-
-    // Create ooo queue
-    clCommandQueueWrapper test_queue = clCreateCommandQueue(
-        context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
-    test_error(err, "Could not create command queue");
-
-    // Create semaphore
-    cl_semaphore_properties_khr sema_1_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_HANDLE_SYNC_FD_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
-        0
-    };
-    cl_semaphore_khr sema_1 =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_1_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    // Signal semaphore
-    clEventWrapper signal_event;
-    err = clEnqueueSignalSemaphoresKHR(test_queue, 1, &sema_1, nullptr, 0,
-                                       nullptr, &signal_event);
-    test_error(err, "Could not signal semaphore");
-
-    // Extract sync fd
-    int handle = -1;
-    size_t handle_size;
-    err = clGetSemaphoreHandleForTypeKHR(sema_1, device,
-                                         CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
-                                         sizeof(handle), &handle, &handle_size);
-    test_error(err, "Could not extract semaphore handle");
-    test_assert_error(sizeof(handle) == handle_size, "Invalid handle size");
-    test_assert_error(handle >= 0, "Invalid handle");
-
-    // Create semaphore from sync fd
-    cl_semaphore_properties_khr sema_2_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
-        static_cast<cl_semaphore_properties_khr>(handle), 0
-    };
-
-    cl_semaphore_khr sema_2 =
-        clCreateSemaphoreWithPropertiesKHR(context, sema_2_props, &err);
-    test_error(err, "Could not create semaphore");
-
-    // Wait semaphore
-    clEventWrapper wait_event;
-    err = clEnqueueWaitSemaphoresKHR(test_queue, 1, &sema_2, nullptr, 0,
-                                     nullptr, &wait_event);
-    test_error(err, "Could not wait semaphore");
-
-    // Finish
-    err = clFinish(test_queue);
-    test_error(err, "Could not finish queue");
-
-    // Check all events are completed
-    test_assert_event_complete(signal_event);
-    test_assert_event_complete(wait_event);
-
-    // Release semaphore
-    err = clReleaseSemaphoreKHR(sema_1);
-    test_error(err, "Could not release semaphore");
-
-    err = clReleaseSemaphoreKHR(sema_2);
-    test_error(err, "Could not release semaphore");
-    return TEST_PASS;
-}
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp
index 85c8fc7f..8a0de351 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/main.cpp
@@ -17,6 +17,7 @@
 
 int main(int argc, const char *argv[])
 {
-    return runTestHarness(argc, argv, test_registry::getInstance().num_tests(),
-                          test_registry::getInstance().definitions(), false, 0);
+    return runTestHarness(
+        argc, argv, static_cast<int>(test_registry::getInstance().num_tests()),
+        test_registry::getInstance().definitions(), false, 0);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h
index f8ccb570..31488d9d 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/semaphore_dx_fence_base.h
@@ -21,23 +21,118 @@
 #include "harness/errorHelpers.h"
 #include "directx_wrapper.hpp"
 
-class CLDXSemaphoreWrapper {
-public:
-    CLDXSemaphoreWrapper(cl_device_id device, cl_context context,
-                         ID3D12Device* dx_device)
-        : device(device), context(context), dx_device(dx_device){};
-
-    int createSemaphoreFromFence(ID3D12Fence* fence)
+struct DXFenceTestBase
+{
+    DXFenceTestBase(cl_device_id device, cl_context context,
+                    cl_command_queue queue, cl_int num_elems)
+        : device(device), context(context), queue(queue), num_elems(num_elems)
+    {}
+    virtual ~DXFenceTestBase()
     {
-        cl_int errcode = CL_SUCCESS;
+        if (fence_handle)
+        {
+            CloseHandle(fence_handle);
+            fence_handle = nullptr;
+        }
+        if (fence_wrapper)
+        {
+            delete fence_wrapper;
+            fence_wrapper = nullptr;
+        }
+        if (semaphore)
+        {
+            clReleaseSemaphoreKHR(semaphore);
+            semaphore = nullptr;
+        }
+    };
 
-        GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
+    virtual int SetUp()
+    {
+        REQUIRE_EXTENSION("cl_khr_external_semaphore");
+        REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
 
-        const HRESULT hr = dx_device->CreateSharedHandle(
-            fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
-        test_error(FAILED(hr), "Failed to get shared handle from D3D12 fence");
+        // Obtain pointers to semaphore's API
+        GET_FUNCTION_EXTENSION_ADDRESS(device,
+                                       clCreateSemaphoreWithPropertiesKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR);
 
-        cl_semaphore_properties_khr sem_props[] = {
+        test_error(
+            !is_import_handle_available(CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
+            "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
+            "supported import types");
+
+        // Import D3D12 fence into OpenCL
+        fence_wrapper = new DirectXFenceWrapper(dx_wrapper.getDXDevice());
+        semaphore = createSemaphoreFromFence(fence_wrapper->get());
+        test_assert_error(!!semaphore, "Could not create semaphore");
+
+        return TEST_PASS;
+    }
+
+    virtual cl_int Run() = 0;
+
+protected:
+    int errcode = CL_SUCCESS;
+
+    cl_device_id device = nullptr;
+    cl_context context = nullptr;
+    cl_command_queue queue = nullptr;
+    cl_int num_elems = 0;
+    DirectXWrapper dx_wrapper;
+
+    cl_semaphore_payload_khr semaphore_payload = 1;
+    cl_semaphore_khr semaphore = nullptr;
+    HANDLE fence_handle = nullptr;
+    DirectXFenceWrapper *fence_wrapper = nullptr;
+
+    clCreateSemaphoreWithPropertiesKHR_fn clCreateSemaphoreWithPropertiesKHR =
+        nullptr;
+    clEnqueueSignalSemaphoresKHR_fn clEnqueueSignalSemaphoresKHR = nullptr;
+    clEnqueueWaitSemaphoresKHR_fn clEnqueueWaitSemaphoresKHR = nullptr;
+    clReleaseSemaphoreKHR_fn clReleaseSemaphoreKHR = nullptr;
+    clGetSemaphoreInfoKHR_fn clGetSemaphoreInfoKHR = nullptr;
+    clRetainSemaphoreKHR_fn clRetainSemaphoreKHR = nullptr;
+    clGetSemaphoreHandleForTypeKHR_fn clGetSemaphoreHandleForTypeKHR = nullptr;
+
+    [[nodiscard]] bool is_import_handle_available(
+        const cl_external_memory_handle_type_khr handle_type)
+    {
+        size_t import_types_size = 0;
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
+                            0, nullptr, &import_types_size);
+        if (errcode != CL_SUCCESS)
+        {
+            log_error("Could not query import semaphore handle types");
+            return false;
+        }
+        std::vector<cl_external_semaphore_handle_type_khr> import_types(
+            import_types_size / sizeof(cl_external_semaphore_handle_type_khr));
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
+                            import_types_size, import_types.data(), nullptr);
+        if (errcode != CL_SUCCESS)
+        {
+            log_error("Could not query import semaphore handle types");
+            return false;
+        }
+
+        return std::find(import_types.begin(), import_types.end(), handle_type)
+            != import_types.end();
+    }
+
+    cl_semaphore_khr createSemaphoreFromFence(ID3D12Fence *src_fence)
+    {
+        const HRESULT hr = dx_wrapper.getDXDevice()->CreateSharedHandle(
+            src_fence, nullptr, GENERIC_ALL, nullptr, &fence_handle);
+        if (FAILED(hr)) return nullptr;
+
+        const cl_semaphore_properties_khr sem_props[] = {
             static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
             static_cast<cl_semaphore_properties_khr>(
                 CL_SEMAPHORE_TYPE_BINARY_KHR),
@@ -45,73 +140,30 @@ public:
                 CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
             reinterpret_cast<cl_semaphore_properties_khr>(fence_handle), 0
         };
-        semaphore =
+        cl_semaphore_khr tmp_semaphore =
             clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
-        test_error(errcode, "Could not create semaphore");
+        if (errcode != CL_SUCCESS) return nullptr;
 
-        return CL_SUCCESS;
-    }
-
-    ~CLDXSemaphoreWrapper()
-    {
-        releaseSemaphore();
-        if (fence_handle)
-        {
-            CloseHandle(fence_handle);
-        }
-    };
-
-    const cl_semaphore_khr* operator&() const { return &semaphore; };
-    cl_semaphore_khr operator*() const { return semaphore; };
-
-    HANDLE getHandle() const { return fence_handle; };
-
-private:
-    cl_semaphore_khr semaphore;
-    ComPtr<ID3D12Fence> fence;
-    HANDLE fence_handle;
-    cl_device_id device;
-    cl_context context;
-    ComPtr<ID3D12Device> dx_device;
-
-    int releaseSemaphore() const
-    {
-        GET_PFN(device, clReleaseSemaphoreKHR);
-
-        if (semaphore)
-        {
-            clReleaseSemaphoreKHR(semaphore);
-        }
-
-        return CL_SUCCESS;
+        return tmp_semaphore;
     }
 };
 
-static bool
-is_import_handle_available(cl_device_id device,
-                           const cl_external_memory_handle_type_khr handle_type)
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, cl_int nelems)
 {
-    int errcode = CL_SUCCESS;
-    size_t import_types_size = 0;
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, 0,
-                        nullptr, &import_types_size);
-    if (errcode != CL_SUCCESS)
+    cl_int status = TEST_PASS;
+    try
     {
-        log_error("Could not query import semaphore handle types");
-        return false;
-    }
-    std::vector<cl_external_semaphore_handle_type_khr> import_types(
-        import_types_size / sizeof(cl_external_semaphore_handle_type_khr));
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
-                        import_types_size, import_types.data(), nullptr);
-    if (errcode != CL_SUCCESS)
+        auto test_fixture = T(device, context, queue, nelems);
+        status = test_fixture.SetUp();
+        if (status != TEST_PASS) return status;
+        status = test_fixture.Run();
+    } catch (const std::runtime_error &e)
     {
-        log_error("Could not query import semaphore handle types");
-        return false;
+        log_error("%s", e.what());
+        return TEST_FAIL;
     }
 
-    return std::find(import_types.begin(), import_types.end(), handle_type)
-        != import_types.end();
+    return status;
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp
index 569fb204..db303bf6 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence.cpp
@@ -16,309 +16,279 @@
 
 #include "semaphore_dx_fence_base.h"
 
+struct SignalWait final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    cl_int Run() override
+    {
+        log_info("Calling clEnqueueSignalSemaphoresKHR\n");
+        clEventWrapper signal_event;
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore,
+                                               &semaphore_payload, 0, nullptr,
+                                               &signal_event);
+        test_error(errcode, "Failed to signal semaphore");
+
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(
+            queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
+        test_error(errcode, "Failed to wait semaphore");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        // Verify that the events completed.
+        test_assert_event_complete(signal_event);
+        test_assert_event_complete(wait_event);
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a signal followed by a wait in OpenCL will complete successfully
 REGISTER_TEST(test_external_semaphores_signal_wait)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueSignalSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper signal_event;
-    errcode = clEnqueueSignalSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
-    test_error(errcode, "Failed to signal semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to wait semaphore");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    // Verify that the events completed.
-    test_assert_event_complete(signal_event);
-    test_assert_event_complete(wait_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<SignalWait>(device, context, queue, num_elements);
 }
 
+struct SignalDXCPU final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    cl_int Run() override
+    {
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(
+            queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
+        test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
+
+        log_info("Calling d3d12_fence->Signal()\n");
+        const HRESULT hr = fence_wrapper->get()->Signal(semaphore_payload);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a wait in OpenCL followed by a CPU signal in DX12 will complete
 // successfully
 REGISTER_TEST(test_external_semaphores_signal_dx_cpu)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
-
-    log_info("Calling d3d12_fence->Signal()\n");
-    const HRESULT hr = (*fence)->Signal(semaphore_payload);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<SignalDXCPU>(device, context, queue, num_elements);
 }
 
+struct SignalDXGPU final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    cl_int Run() override
+    {
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(
+            queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
+        test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
+
+        log_info("Calling d3d12_command_queue->Signal()\n");
+        const HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal(
+            fence_wrapper->get(), semaphore_payload);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a wait in OpenCL followed by a GPU signal in DX12 will complete
 // successfully
 REGISTER_TEST(test_external_semaphores_signal_dx_gpu)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
-
-    log_info("Calling d3d12_command_queue->Signal()\n");
-    const HRESULT hr =
-        dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<SignalDXGPU>(device, context, queue, num_elements);
 }
 
+struct CLDXInterlock final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    cl_int Run() override
+    {
+        log_info("Calling d3d12_command_queue->Wait(1)\n");
+        HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(),
+                                                          semaphore_payload);
+        test_error(FAILED(hr), "Failed to wait on D3D12 fence");
+
+        log_info("Calling d3d12_command_queue->Signal(2)\n");
+        hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(),
+                                                    semaphore_payload + 1);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence");
+
+        log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
+        clEventWrapper signal_event;
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore,
+                                               &semaphore_payload, 0, nullptr,
+                                               &signal_event);
+        test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
+
+        log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
+        semaphore_payload += 1;
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(
+            queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
+        test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+        test_assert_event_complete(signal_event);
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that interlocking waits between OpenCL and DX12 will complete
 // successfully
 REGISTER_TEST(test_external_semaphores_cl_dx_interlock)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling d3d12_command_queue->Wait(1)\n");
-    cl_semaphore_payload_khr semaphore_payload = 1;
-    HRESULT hr =
-        dx_wrapper.getDXCommandQueue()->Wait(*fence, semaphore_payload);
-    test_error(FAILED(hr), "Failed to wait on D3D12 fence");
-
-    log_info("Calling d3d12_command_queue->Signal(2)\n");
-    hr = dx_wrapper.getDXCommandQueue()->Signal(*fence, semaphore_payload + 1);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence");
-
-    log_info("Calling clEnqueueSignalSemaphoresKHR(1)\n");
-    clEventWrapper signal_event;
-    errcode = clEnqueueSignalSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
-    test_error(errcode, "Failed to call clEnqueueSignalSemaphoresKHR");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR(2)\n");
-    semaphore_payload += 1;
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-    test_assert_event_complete(signal_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<CLDXInterlock>(device, context, queue, num_elements);
 }
 
+struct MultipleWaitSignal final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    ~MultipleWaitSignal() override
+    {
+        if (fence_handle_2)
+        {
+            CloseHandle(fence_handle_2);
+            fence_handle_2 = nullptr;
+        }
+        if (fence_wrapper_2)
+        {
+            delete fence_wrapper_2;
+            fence_wrapper_2 = nullptr;
+        }
+        if (semaphore_2)
+        {
+            clReleaseSemaphoreKHR(semaphore_2);
+            semaphore_2 = nullptr;
+        }
+        DXFenceTestBase::~DXFenceTestBase();
+    };
+
+    int SetUp() override
+    {
+        DXFenceTestBase::SetUp();
+        fence_wrapper_2 = new DirectXFenceWrapper(dx_wrapper.getDXDevice());
+        semaphore_2 = createSemaphoreFromFence(fence_wrapper_2->get());
+        test_assert_error(!!semaphore_2, "Could not create semaphore");
+
+        return TEST_PASS;
+    }
+
+    cl_int Run() override
+    {
+        const cl_semaphore_khr semaphore_list[] = { semaphore, semaphore_2 };
+        cl_semaphore_payload_khr semaphore_payload_list[] = {
+            semaphore_payload, semaphore_payload + 1
+        };
+
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list,
+                                             semaphore_payload_list, 0, nullptr,
+                                             &wait_event);
+        test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
+
+        log_info("Calling d3d12_command_queue->Signal()\n");
+        HRESULT hr = dx_wrapper.getDXCommandQueue()->Signal(
+            fence_wrapper_2->get(), semaphore_payload + 1);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
+        hr = dx_wrapper.getDXCommandQueue()->Signal(fence_wrapper->get(),
+                                                    semaphore_payload);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
+
+        log_info(
+            "Calling d3d12_command_queue->Wait() with different payloads\n");
+        hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper->get(),
+                                                  semaphore_payload + 3);
+        test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
+        hr = dx_wrapper.getDXCommandQueue()->Wait(fence_wrapper_2->get(),
+                                                  semaphore_payload + 2);
+        test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+
+        semaphore_payload_list[0] = semaphore_payload + 3;
+        semaphore_payload_list[1] = semaphore_payload + 2;
+
+        log_info("Calling clEnqueueSignalSemaphoresKHR\n");
+        clEventWrapper signal_event;
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
+                                               semaphore_payload_list, 0,
+                                               nullptr, &signal_event);
+        test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
+
+        // Wait until the GPU has completed commands up to this fence point.
+        log_info("Waiting for D3D12 command queue completion\n");
+        if (fence_wrapper->get()->GetCompletedValue()
+            < semaphore_payload_list[0])
+        {
+            const HANDLE event_handle =
+                CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
+            hr = fence_wrapper->get()->SetEventOnCompletion(
+                semaphore_payload_list[0], event_handle);
+            test_error(FAILED(hr),
+                       "Failed to set D3D12 fence 1 event on completion");
+            WaitForSingleObject(event_handle, INFINITE);
+            CloseHandle(event_handle);
+        }
+        if (fence_wrapper_2->get()->GetCompletedValue()
+            < semaphore_payload_list[1])
+        {
+            const HANDLE event_handle =
+                CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
+            hr = fence_wrapper_2->get()->SetEventOnCompletion(
+                semaphore_payload_list[1], event_handle);
+            test_error(FAILED(hr),
+                       "Failed to set D3D12 fence 2 event on completion");
+            WaitForSingleObject(event_handle, INFINITE);
+            CloseHandle(event_handle);
+        }
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(signal_event);
+
+        return TEST_PASS;
+    }
+
+protected:
+    cl_semaphore_khr semaphore_2 = nullptr;
+    HANDLE fence_handle_2 = nullptr;
+    DirectXFenceWrapper *fence_wrapper_2 = nullptr;
+};
+
 // Confirm that multiple waits in OpenCL followed by signals in DX12 and waits
 // in DX12 followed by signals in OpenCL complete successfully
 REGISTER_TEST(test_external_semaphores_multiple_wait_signal)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence_1(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore_1(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore_1.createSemaphoreFromFence(*fence_1),
-               "Could not create semaphore");
-
-    const DirectXFenceWrapper fence_2(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore_2(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore_2.createSemaphoreFromFence(*fence_2),
-               "Could not create semaphore");
-
-    const cl_semaphore_khr semaphore_list[] = { *semaphore_1, *semaphore_2 };
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    cl_semaphore_payload_khr semaphore_payload_list[] = {
-        semaphore_payload, semaphore_payload + 1
-    };
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(queue, 2, semaphore_list,
-                                         semaphore_payload_list, 0, nullptr,
-                                         &wait_event);
-    test_error(errcode, "Failed to call clEnqueueWaitSemaphoresKHR");
-
-    log_info("Calling d3d12_command_queue->Signal()\n");
-    HRESULT hr =
-        dx_wrapper.getDXCommandQueue()->Signal(*fence_2, semaphore_payload + 1);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence 2");
-    hr = dx_wrapper.getDXCommandQueue()->Signal(*fence_1, semaphore_payload);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence 1");
-
-    log_info("Calling d3d12_command_queue->Wait() with different payloads\n");
-    hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_1, semaphore_payload + 3);
-    test_error(FAILED(hr), "Failed to wait on D3D12 fence 1");
-    hr = dx_wrapper.getDXCommandQueue()->Wait(*fence_2, semaphore_payload + 2);
-    test_error(FAILED(hr), "Failed to wait on D3D12 fence 2");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-
-    semaphore_payload_list[0] = semaphore_payload + 3;
-    semaphore_payload_list[1] = semaphore_payload + 2;
-
-    log_info("Calling clEnqueueSignalSemaphoresKHR\n");
-    clEventWrapper signal_event;
-    errcode = clEnqueueSignalSemaphoresKHR(queue, 2, semaphore_list,
-                                           semaphore_payload_list, 0, nullptr,
-                                           &signal_event);
-    test_error(errcode, "Could not call clEnqueueSignalSemaphoresKHR");
-
-    // Wait until the GPU has completed commands up to this fence point.
-    log_info("Waiting for D3D12 command queue completion\n");
-    if ((*fence_1)->GetCompletedValue() < semaphore_payload_list[0])
-    {
-        const HANDLE event_handle =
-            CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
-        hr = (*fence_1)->SetEventOnCompletion(semaphore_payload_list[0],
-                                              event_handle);
-        test_error(FAILED(hr),
-                   "Failed to set D3D12 fence 1 event on completion");
-        WaitForSingleObject(event_handle, INFINITE);
-        CloseHandle(event_handle);
-    }
-    if ((*fence_2)->GetCompletedValue() < semaphore_payload_list[1])
-    {
-        const HANDLE event_handle =
-            CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
-        hr = (*fence_2)->SetEventOnCompletion(semaphore_payload_list[1],
-                                              event_handle);
-        test_error(FAILED(hr),
-                   "Failed to set D3D12 fence 2 event on completion");
-        WaitForSingleObject(event_handle, INFINITE);
-        CloseHandle(event_handle);
-    }
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(signal_event);
-
-    return TEST_PASS;
+    return MakeAndRunTest<MultipleWaitSignal>(device, context, queue,
+                                              num_elements);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp
index c54cf61b..ce7db56f 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_export.cpp
@@ -16,205 +16,205 @@
 
 #include "semaphore_dx_fence_base.h"
 
+struct ExportDXSignal final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        size_t export_types_size = 0;
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
+                            0, nullptr, &export_types_size);
+        test_error(errcode, "Could not query export semaphore handle types");
+        std::vector<cl_external_semaphore_handle_type_khr> export_types(
+            export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
+                            export_types_size, export_types.data(), nullptr);
+        test_error(errcode, "Could not query export semaphore handle types");
+
+        if (std::find(export_types.begin(), export_types.end(),
+                      CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
+            == export_types.end())
+        {
+            log_info(
+                "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
+                "the supported export types\n");
+            return TEST_FAIL;
+        }
+
+        constexpr cl_semaphore_properties_khr sem_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
+            0
+        };
+        cl_semaphore_khr exportable_semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
+        test_error(errcode, "Could not create semaphore");
+
+        cl_bool is_exportable = CL_FALSE;
+        errcode = clGetSemaphoreInfoKHR(
+            exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
+            sizeof(is_exportable), &is_exportable, nullptr);
+        test_error(errcode, "Could not get semaphore info");
+        test_error(!is_exportable, "Semaphore is not exportable");
+
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        clEventWrapper wait_event;
+        errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &exportable_semaphore,
+                                             &semaphore_payload, 0, nullptr,
+                                             &wait_event);
+        test_error(errcode, "Failed to wait semaphore");
+
+        HANDLE semaphore_handle = nullptr;
+        errcode = clGetSemaphoreHandleForTypeKHR(
+            exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
+            sizeof(semaphore_handle), &semaphore_handle, nullptr);
+        test_error(errcode, "Could not get semaphore handle");
+
+        ID3D12Fence *exported_fence = nullptr;
+        errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(
+            semaphore_handle, IID_PPV_ARGS(&exported_fence));
+        test_error(errcode, "Could not open semaphore handle");
+
+        log_info("Calling fence->Signal()\n");
+        const HRESULT hr = exported_fence->Signal(semaphore_payload);
+        test_error(FAILED(hr), "Failed to signal D3D12 fence");
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(wait_event);
+
+        // Release resources
+        CloseHandle(semaphore_handle);
+        test_error(clReleaseSemaphoreKHR(exportable_semaphore),
+                   "Could not release semaphore");
+        exported_fence->Release();
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a wait followed by a signal in DirectX 12 using an exported
 // semaphore will complete successfully
 REGISTER_TEST(test_external_semaphores_export_dx_signal)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(device, clGetSemaphoreInfoKHR);
-    GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
-
-    size_t export_types_size = 0;
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
-                        nullptr, &export_types_size);
-    test_error(errcode, "Could not query export semaphore handle types");
-    std::vector<cl_external_semaphore_handle_type_khr> export_types(
-        export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
-                        export_types_size, export_types.data(), nullptr);
-    test_error(errcode, "Could not query export semaphore handle types");
-
-    if (std::find(export_types.begin(), export_types.end(),
-                  CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
-        == export_types.end())
-    {
-        log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
-                 "the supported export types\n");
-        return TEST_FAIL;
-    }
-
-    constexpr cl_semaphore_properties_khr sem_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
-        0
-    };
-    cl_semaphore_khr semaphore =
-        clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
-    test_error(errcode, "Could not create semaphore");
-
-    cl_bool is_exportable = CL_FALSE;
-    errcode =
-        clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
-                              sizeof(is_exportable), &is_exportable, nullptr);
-    test_error(errcode, "Could not get semaphore info");
-    test_error(!is_exportable, "Semaphore is not exportable");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper wait_event;
-    errcode = clEnqueueWaitSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &wait_event);
-    test_error(errcode, "Failed to wait semaphore");
-
-    HANDLE semaphore_handle = nullptr;
-    errcode = clGetSemaphoreHandleForTypeKHR(
-        semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
-        sizeof(semaphore_handle), &semaphore_handle, nullptr);
-    test_error(errcode, "Could not get semaphore handle");
-
-    ID3D12Fence *fence = nullptr;
-    errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
-                                                         IID_PPV_ARGS(&fence));
-    test_error(errcode, "Could not open semaphore handle");
-
-    log_info("Calling fence->Signal()\n");
-    const HRESULT hr = fence->Signal(semaphore_payload);
-    test_error(FAILED(hr), "Failed to signal D3D12 fence");
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(wait_event);
-
-    // Release resources
-    CloseHandle(semaphore_handle);
-    test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
-    fence->Release();
-
-    return TEST_PASS;
+    return MakeAndRunTest<ExportDXSignal>(device, context, queue, num_elements);
 }
 
+struct ExportDXWait final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        size_t export_types_size = 0;
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
+                            0, nullptr, &export_types_size);
+        test_error(errcode, "Could not query export semaphore handle types");
+        std::vector<cl_external_semaphore_handle_type_khr> export_types(
+            export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
+        errcode =
+            clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
+                            export_types_size, export_types.data(), nullptr);
+        test_error(errcode, "Could not query export semaphore handle types");
+
+        if (std::find(export_types.begin(), export_types.end(),
+                      CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
+            == export_types.end())
+        {
+            log_info(
+                "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
+                "the supported export types\n");
+            return TEST_FAIL;
+        }
+
+        constexpr cl_semaphore_properties_khr sem_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
+            0
+        };
+        cl_semaphore_khr exportable_semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
+        test_error(errcode, "Could not create semaphore");
+
+        cl_bool is_exportable = CL_FALSE;
+        errcode = clGetSemaphoreInfoKHR(
+            exportable_semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
+            sizeof(is_exportable), &is_exportable, nullptr);
+        test_error(errcode, "Could not get semaphore info");
+        test_error(!is_exportable, "Semaphore is not exportable");
+
+        log_info("Calling clEnqueueSignalSemaphoresKHR\n");
+        constexpr cl_semaphore_payload_khr semaphore_payload = 1;
+        clEventWrapper signal_event;
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &exportable_semaphore,
+                                               &semaphore_payload, 0, nullptr,
+                                               &signal_event);
+        test_error(errcode, "Failed to signal semaphore");
+
+        HANDLE semaphore_handle = nullptr;
+        errcode = clGetSemaphoreHandleForTypeKHR(
+            exportable_semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
+            sizeof(semaphore_handle), &semaphore_handle, nullptr);
+        test_error(errcode, "Could not get semaphore handle");
+
+        ID3D12Fence *exported_fence = nullptr;
+        errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(
+            semaphore_handle, IID_PPV_ARGS(&exported_fence));
+        test_error(errcode, "Could not open semaphore handle");
+
+        log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
+        HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(exported_fence,
+                                                          semaphore_payload);
+        test_error(FAILED(hr), "Failed to wait on D3D12 fence");
+
+        log_info("Calling WaitForSingleObject\n");
+        if (exported_fence->GetCompletedValue() < semaphore_payload)
+        {
+            const HANDLE event =
+                CreateEventEx(nullptr, nullptr, false, EVENT_ALL_ACCESS);
+            hr = exported_fence->SetEventOnCompletion(semaphore_payload, event);
+            test_error(FAILED(hr), "Failed to set event on completion");
+            WaitForSingleObject(event, INFINITE);
+            CloseHandle(event);
+        }
+
+        errcode = clFinish(queue);
+        test_error(errcode, "Could not finish queue");
+
+        test_assert_event_complete(signal_event);
+
+        // Release resources
+        CloseHandle(semaphore_handle);
+        test_error(clReleaseSemaphoreKHR(exportable_semaphore),
+                   "Could not release semaphore");
+        exported_fence->Release();
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a signal in OpenCL followed by a wait in DirectX 12 using an
 // exported semaphore will complete successfully
 REGISTER_TEST(test_external_semaphores_export_dx_wait)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-    GET_PFN(device, clGetSemaphoreInfoKHR);
-    GET_PFN(device, clGetSemaphoreHandleForTypeKHR);
-
-    size_t export_types_size = 0;
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, 0,
-                        nullptr, &export_types_size);
-    test_error(errcode, "Could not query export semaphore handle types");
-    std::vector<cl_external_semaphore_handle_type_khr> export_types(
-        export_types_size / sizeof(cl_external_semaphore_handle_type_khr));
-    errcode =
-        clGetDeviceInfo(device, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR,
-                        export_types_size, export_types.data(), nullptr);
-    test_error(errcode, "Could not query export semaphore handle types");
-
-    if (std::find(export_types.begin(), export_types.end(),
-                  CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR)
-        == export_types.end())
-    {
-        log_info("Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between "
-                 "the supported export types\n");
-        return TEST_FAIL;
-    }
-
-    constexpr cl_semaphore_properties_khr sem_props[] = {
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
-        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-        static_cast<cl_semaphore_properties_khr>(
-            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
-        0
-    };
-    cl_semaphore_khr semaphore =
-        clCreateSemaphoreWithPropertiesKHR(context, sem_props, &errcode);
-    test_error(errcode, "Could not create semaphore");
-
-    cl_bool is_exportable = CL_FALSE;
-    errcode =
-        clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_EXPORTABLE_KHR,
-                              sizeof(is_exportable), &is_exportable, nullptr);
-    test_error(errcode, "Could not get semaphore info");
-    test_error(!is_exportable, "Semaphore is not exportable");
-
-    log_info("Calling clEnqueueSignalSemaphoresKHR\n");
-    constexpr cl_semaphore_payload_khr semaphore_payload = 1;
-    clEventWrapper signal_event;
-    errcode = clEnqueueSignalSemaphoresKHR(
-        queue, 1, &semaphore, &semaphore_payload, 0, nullptr, &signal_event);
-    test_error(errcode, "Failed to signal semaphore");
-
-    HANDLE semaphore_handle = nullptr;
-    errcode = clGetSemaphoreHandleForTypeKHR(
-        semaphore, device, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR,
-        sizeof(semaphore_handle), &semaphore_handle, nullptr);
-    test_error(errcode, "Could not get semaphore handle");
-
-    ID3D12Fence *fence = nullptr;
-    errcode = dx_wrapper.getDXDevice()->OpenSharedHandle(semaphore_handle,
-                                                         IID_PPV_ARGS(&fence));
-    test_error(errcode, "Could not open semaphore handle");
-
-    log_info("Calling dx_wrapper.get_d3d12_command_queue()->Wait()\n");
-    HRESULT hr = dx_wrapper.getDXCommandQueue()->Wait(fence, semaphore_payload);
-    test_error(FAILED(hr), "Failed to wait on D3D12 fence");
-
-    log_info("Calling WaitForSingleObject\n");
-    if (fence->GetCompletedValue() < semaphore_payload)
-    {
-        const HANDLE event =
-            CreateEventEx(nullptr, false, false, EVENT_ALL_ACCESS);
-        hr = fence->SetEventOnCompletion(semaphore_payload, event);
-        test_error(FAILED(hr), "Failed to set event on completion");
-        WaitForSingleObject(event, INFINITE);
-        CloseHandle(event);
-    }
-
-    errcode = clFinish(queue);
-    test_error(errcode, "Could not finish queue");
-
-    test_assert_event_complete(signal_event);
-
-    // Release resources
-    CloseHandle(semaphore_handle);
-    test_error(clReleaseSemaphoreKHR(semaphore), "Could not release semaphore");
-    fence->Release();
-
-    return TEST_PASS;
+    return MakeAndRunTest<ExportDXWait>(device, context, queue, num_elements);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp
index 6c032c56..398eb2b1 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_negative_wait_signal.cpp
@@ -16,74 +16,52 @@
 
 #include "semaphore_dx_fence_base.h"
 
+struct DXFenceNegativeWait final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
+                                             nullptr, nullptr);
+        test_assert_error(
+            errcode == CL_INVALID_VALUE,
+            "Unexpected error code returned from clEnqueueWaitSemaphores");
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a wait without a semaphore payload list will return
 // CL_INVALID_VALUE
 REGISTER_TEST(test_external_semaphores_dx_fence_negative_wait)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueWaitSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    errcode = clEnqueueWaitSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
-                                         nullptr, nullptr);
-    test_assert_error(
-        errcode == CL_INVALID_VALUE,
-        "Unexpected error code returned from clEnqueueWaitSemaphores");
-
-    return TEST_PASS;
+    return MakeAndRunTest<DXFenceNegativeWait>(device, context, queue,
+                                               num_elements);
 }
 
+struct DXFenceNegativeSignal final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        log_info("Calling clEnqueueWaitSemaphoresKHR\n");
+        errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
+                                               nullptr, nullptr);
+        test_assert_error(
+            errcode == CL_INVALID_VALUE,
+            "Unexpected error code returned from clEnqueueSignalSemaphores");
+
+        return TEST_PASS;
+    }
+};
+
 // Confirm that a signal without a semaphore payload list will return
 // CL_INVALID_VALUE
 REGISTER_TEST(test_external_semaphores_dx_fence_negative_signal)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clEnqueueSignalSemaphoresKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    log_info("Calling clEnqueueWaitSemaphoresKHR\n");
-    errcode = clEnqueueSignalSemaphoresKHR(queue, 1, &semaphore, nullptr, 0,
-                                           nullptr, nullptr);
-    test_assert_error(
-        errcode == CL_INVALID_VALUE,
-        "Unexpected error code returned from clEnqueueSignalSemaphores");
-
-    return TEST_PASS;
+    return MakeAndRunTest<DXFenceNegativeSignal>(device, context, queue,
+                                                 num_elements);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp
index 03aa2b15..87cb0caa 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore_dx_fence/test_external_semaphore_dx_fence_queries.cpp
@@ -16,54 +16,43 @@
 
 #include "semaphore_dx_fence_base.h"
 
+struct DXFenceQueryProperties final : DXFenceTestBase
+{
+    using DXFenceTestBase::DXFenceTestBase;
+
+    int Run() override
+    {
+        size_t properties_size_bytes = 0;
+        errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
+                                        0, nullptr, &properties_size_bytes);
+        test_error(errcode, "Could not get semaphore info");
+        std::vector<cl_semaphore_properties_khr> semaphore_properties(
+            properties_size_bytes / sizeof(cl_semaphore_properties_khr));
+        errcode = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
+                                        properties_size_bytes,
+                                        semaphore_properties.data(), nullptr);
+        test_error(errcode, "Could not get semaphore info");
+
+        for (unsigned i = 0; i < semaphore_properties.size() - 1; i++)
+        {
+            if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
+                && semaphore_properties[i + 1]
+                    == reinterpret_cast<cl_semaphore_properties_khr>(
+                        fence_handle))
+            {
+                return TEST_PASS;
+            }
+        }
+        log_error("Failed to find the dx fence handle type in the semaphore "
+                  "properties");
+        return TEST_FAIL;
+    }
+};
+
 // Confirm that the CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR property is in the
 // properties returned by clGetSemaphoreInfo
 REGISTER_TEST(test_external_semaphores_dx_fence_query_properties)
 {
-    int errcode = CL_SUCCESS;
-    const DirectXWrapper dx_wrapper;
-
-    REQUIRE_EXTENSION("cl_khr_external_semaphore");
-    REQUIRE_EXTENSION("cl_khr_external_semaphore_dx_fence");
-
-    // Obtain pointers to semaphore's API
-    GET_PFN(device, clCreateSemaphoreWithPropertiesKHR);
-    GET_PFN(device, clReleaseSemaphoreKHR);
-    GET_PFN(device, clGetSemaphoreInfoKHR);
-
-    test_error(!is_import_handle_available(device,
-                                           CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR),
-               "Could not find CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR between the "
-               "supported import types");
-
-    // Import D3D12 fence into OpenCL
-    const DirectXFenceWrapper fence(dx_wrapper.getDXDevice());
-    CLDXSemaphoreWrapper semaphore(device, context, dx_wrapper.getDXDevice());
-    test_error(semaphore.createSemaphoreFromFence(*fence),
-               "Could not create semaphore");
-
-    size_t properties_size_bytes = 0;
-    errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0,
-                                    nullptr, &properties_size_bytes);
-    test_error(errcode, "Could not get semaphore info");
-    std::vector<cl_semaphore_properties_khr> semaphore_properties(
-        properties_size_bytes / sizeof(cl_semaphore_properties_khr));
-    errcode = clGetSemaphoreInfoKHR(*semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
-                                    properties_size_bytes,
-                                    semaphore_properties.data(), nullptr);
-    test_error(errcode, "Could not get semaphore info");
-
-    for (unsigned i = 0; i < semaphore_properties.size() - 1; i++)
-    {
-        if (semaphore_properties[i] == CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR
-            && semaphore_properties[i + 1]
-                == reinterpret_cast<cl_semaphore_properties_khr>(
-                    semaphore.getHandle()))
-        {
-            return TEST_PASS;
-        }
-    }
-    log_error(
-        "Failed to find the dx fence handle type in the semaphore properties");
-    return TEST_FAIL;
+    return MakeAndRunTest<DXFenceQueryProperties>(device, context, queue,
+                                                  num_elements);
 }
\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
index 682ada5f..6f4ac812 100644
--- a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
@@ -9,6 +9,7 @@ set(${MODULE_NAME}_SOURCES
          test_semaphores_negative_create.cpp
          test_semaphores_cross_queue.cpp
          test_semaphores_queries.cpp
+         test_semaphores_payload.cpp
          semaphore_base.h
 )
 
diff --git a/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h b/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
index 372bdd1e..828a5e46 100644
--- a/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
+++ b/test_conformance/extensions/cl_khr_semaphore/semaphore_base.h
@@ -23,6 +23,7 @@
 #include "harness/deviceInfo.h"
 #include "harness/testHarness.h"
 #include "harness/typeWrappers.h"
+#include "harness/extensionHelpers.h"
 
 struct SemaphoreBase
 {
@@ -37,27 +38,15 @@ struct SemaphoreBase
         test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
 
         // If it is supported get the addresses of all the APIs here.
-        // clang-format off
-#define GET_EXTENSION_ADDRESS(FUNC)                                            \
-        FUNC = reinterpret_cast<FUNC##_fn>(                                    \
-            clGetExtensionFunctionAddressForPlatform(platform, #FUNC));        \
-        if (FUNC == nullptr)                                                   \
-        {                                                                      \
-            log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
-                      " with " #FUNC "\n");                                    \
-            return TEST_FAIL;                                                  \
-        }
-        // clang-format on
+        GET_FUNCTION_EXTENSION_ADDRESS(device,
+                                       clCreateSemaphoreWithPropertiesKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueSignalSemaphoresKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clEnqueueWaitSemaphoresKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clReleaseSemaphoreKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreInfoKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clRetainSemaphoreKHR);
+        GET_FUNCTION_EXTENSION_ADDRESS(device, clGetSemaphoreHandleForTypeKHR);
 
-        GET_EXTENSION_ADDRESS(clCreateSemaphoreWithPropertiesKHR);
-        GET_EXTENSION_ADDRESS(clEnqueueSignalSemaphoresKHR);
-        GET_EXTENSION_ADDRESS(clEnqueueWaitSemaphoresKHR);
-        GET_EXTENSION_ADDRESS(clReleaseSemaphoreKHR);
-        GET_EXTENSION_ADDRESS(clGetSemaphoreInfoKHR);
-        GET_EXTENSION_ADDRESS(clRetainSemaphoreKHR);
-        GET_EXTENSION_ADDRESS(clGetSemaphoreHandleForTypeKHR);
-
-#undef GET_EXTENSION_ADDRESS
         return CL_SUCCESS;
     }
 
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
index ce146b41..f4eaf5d1 100644
--- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
@@ -14,14 +14,8 @@
 // limitations under the License.
 //
 
-#include <thread>
-
 #include "semaphore_base.h"
 
-#include "semaphore_base.h"
-
-#define FLUSH_DELAY_S 5
-
 namespace {
 
 const char* source = "__kernel void empty() {}";
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp
new file mode 100644
index 00000000..94fb82e0
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_payload.cpp
@@ -0,0 +1,86 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "semaphore_base.h"
+
+namespace {
+
+struct PayloadSemaphore : public SemaphoreTestBase
+{
+    clSemaphoreWrapper sema_sec;
+
+    PayloadSemaphore(cl_device_id device, cl_context context,
+                     cl_command_queue queue, cl_int nelems)
+        : SemaphoreTestBase(device, context, queue, nelems), sema_sec(this)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int err = CL_SUCCESS;
+
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            0
+        };
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        sema_sec =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        {
+            cl_semaphore_payload_khr payload_list[] = { 1, 2 };
+            cl_semaphore_khr semaphores[2] = { semaphore, sema_sec };
+
+            // Signal semaphore
+            err = clEnqueueSignalSemaphoresKHR(
+                queue, 2, semaphores, payload_list, 0, nullptr, nullptr);
+            test_error(err, "Could not signal semaphore");
+        }
+
+        {
+            cl_semaphore_payload_khr payload_list[] = { 3, 4 };
+            cl_semaphore_khr semaphores[2] = { semaphore, sema_sec };
+
+            // Wait semaphore
+            err = clEnqueueWaitSemaphoresKHR(queue, 2, semaphores, payload_list,
+                                             0, nullptr, nullptr);
+            test_error(err, "Could not wait semaphore");
+        }
+
+        // Finish
+        err = clFinish(queue);
+        test_error(err, "Could not finish queue");
+
+        return CL_SUCCESS;
+    }
+};
+
+
+} // anonymous namespace
+
+// Confirm that a valid semaphore payload values list will be ignored if no
+// semaphores in the list of sema_objects require a payload
+REGISTER_TEST_VERSION(semaphores_payload, Version(1, 2))
+{
+    return MakeAndRunTest<PayloadSemaphore>(device, context, queue,
+                                            num_elements);
+}
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 6f1f51e7..77c6c2eb 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -336,9 +336,8 @@ int test_cl_image_write(cl_context context, cl_command_queue queue,
             get_explicit_type_name(*outType), suffix, convert);
 
     programPtr = kernelSource;
-    if (create_single_kernel_helper_with_build_options(
-            context, &program, &kernel, 1, (const char **)&programPtr,
-            "sample_test", ""))
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char **)&programPtr, "sample_test"))
     {
         return -1;
     }
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 8b82b9f9..e1fa5059 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -629,7 +629,7 @@ int image_from_small_buffer_negative(cl_device_id device, cl_context context,
 
                 clCreateImage(context, flag, &format, &image_desc, nullptr,
                               &err);
-                test_failure_error(err, CL_INVALID_MEM_OBJECT,
+                test_failure_error(err, CL_INVALID_IMAGE_SIZE,
                                    "Unexpected clCreateImage return");
 
                 err = clReleaseMemObject(buffer);
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index a66e6f7e..183edc74 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -859,7 +859,9 @@ double reference_add(double x, double y)
     __m128 vb = _mm_set_ss((float)b);
     va = _mm_add_ss(va, vb);
     _mm_store_ss((float *)&a, va);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
+    // RISC-V CPUs with default 'f' fp32 extension do not support any way to
+    // enable/disable FTZ mode, subnormals are always handled without flushing.
     // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
     // denorm's to zero. As such, the reference add with FTZ must be emulated in
     // sw.
@@ -876,7 +878,7 @@ double reference_add(double x, double y)
         } ub;
         ub.d = b;
         cl_uint mantA, mantB;
-        cl_ulong addendA, addendB, sum;
+        cl_ulong addendA, addendB;
         int expA = extractf(a, &mantA);
         int expB = extractf(b, &mantB);
         cl_uint signA = ua.u & 0x80000000U;
@@ -972,7 +974,7 @@ double reference_multiply(double x, double y)
     __m128 vb = _mm_set_ss((float)b);
     va = _mm_mul_ss(va, vb);
     _mm_store_ss((float *)&a, va);
-#elif defined(__PPC__)
+#elif defined(__PPC__) || defined(__riscv)
     // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes
     // denorm's to zero. As such, the reference multiply with FTZ must be
     // emulated in sw.
@@ -3351,7 +3353,7 @@ long double reference_cbrtl(long double x)
 
 long double reference_rintl(long double x)
 {
-#if defined(__PPC__)
+#if defined(__PPC__) || defined(__riscv)
     // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
     // mantissa can represent more than LDBL_MANT_DIG binary digits.
     x = rintl(x);
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
index 6106e0ab..78118697 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
@@ -557,8 +557,9 @@ int TestNonUniformWorkGroup::prepareDevice () {
   if (_testRange & Range::BARRIERS)
     buildOptions += " -D TESTBARRIERS";
 
-  err = create_single_kernel_helper_with_build_options (_context, &_program, &_testKernel, 1,
-    &KERNEL_FUNCTION, "testKernel", buildOptions.c_str());
+  err = create_single_kernel_helper(_context, &_program, &_testKernel, 1,
+                                    &KERNEL_FUNCTION, "testKernel",
+                                    buildOptions.c_str());
   if (err)
   {
     log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);
@@ -842,8 +843,9 @@ int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
   if (testRange & Range::BARRIERS)
     buildOptions += " -D TESTBARRIERS";
 
-  err = create_single_kernel_helper_with_build_options (_context, &program, &testKernel, 1,
-    &KERNEL_FUNCTION, "testKernel", buildOptions.c_str());
+  err = create_single_kernel_helper(_context, &program, &testKernel, 1,
+                                    &KERNEL_FUNCTION, "testKernel",
+                                    buildOptions.c_str());
   if (err)
   {
     log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);
diff --git a/test_conformance/pipes/test_pipe_info.cpp b/test_conformance/pipes/test_pipe_info.cpp
index 5525a554..4e1c83d9 100644
--- a/test_conformance/pipes/test_pipe_info.cpp
+++ b/test_conformance/pipes/test_pipe_info.cpp
@@ -63,9 +63,9 @@ REGISTER_TEST(pipe_info)
         log_info( " CL_PIPE_MAX_PACKETS passed.\n" );
     }
 
-    err = create_single_kernel_helper_with_build_options(
-        context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel",
-        "-cl-std=CL2.0 -cl-kernel-arg-info");
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &pipe_kernel_code, "pipe_kernel",
+                                      "-cl-std=CL2.0 -cl-kernel-arg-info");
     test_error_fail(err, "Error creating program");
 
     cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
diff --git a/test_conformance/printf/test_printf.h b/test_conformance/printf/test_printf.h
index 993a6126..519b9dd9 100644
--- a/test_conformance/printf/test_printf.h
+++ b/test_conformance/printf/test_printf.h
@@ -70,14 +70,15 @@ struct printDataGenParameters
 {
     std::vector<std::string> genericFormats;
     const char* dataRepresentation;
-    const char* vectorFormatFlag;
-    const char* vectorFormatSpecifier;
-    const char* dataType;
-    const char* vectorSize;
-    const char* addrSpaceArgumentTypeQualifier;
-    const char* addrSpaceVariableTypeQualifier;
-    const char* addrSpaceParameter;
-    const char* addrSpacePAdd;
+    const char* vectorFormatFlag = nullptr;
+    const char* vectorFormatSpecifier = nullptr;
+    const char* dataType = nullptr;
+    const char* vectorSize = nullptr;
+    const char* addrSpaceArgumentTypeQualifier = nullptr;
+    const char* addrSpaceVariableTypeQualifier = nullptr;
+    const char* addrSpaceParameter = nullptr;
+    const char* addrSpacePAdd = nullptr;
+    bool allowFallbackTest = false;
 };
 
 // Reference results - filled out at run-time
@@ -111,6 +112,9 @@ struct testCase
                     char*,
                     const size_t);                       //function pointer for generating reference results
     Type dataType;                                       //the data type that will be printed during reference result generation (used for setting rounding mode)
+    bool (*fallbackTestFN)(const char*,
+                           const char*) =
+        nullptr; // function pointer to perform fallback test if required
 };
 
 extern const char* strType[];
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index 83a21fbb..39b877ff 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -26,8 +26,11 @@ static void intRefBuilder(printDataGenParameters&, char*, const size_t);
 static void halfRefBuilder(printDataGenParameters&, char* rResult,
                            const size_t);
 static void floatRefBuilder(printDataGenParameters&, char* rResult, const size_t);
+static bool floatRefTest(const char* refResult, const char* analysisBuffer);
 static void doubleRefBuilder(printDataGenParameters&, char* rResult,
                              const size_t);
+static bool doubleRefTest(const char* refResult, const char* analysisBuffer);
+
 static void octalRefBuilder(printDataGenParameters&, char*, const size_t);
 static void unsignedRefBuilder(printDataGenParameters&, char*, const size_t);
 static void hexRefBuilder(printDataGenParameters&, char*, const size_t);
@@ -468,12 +471,12 @@ std::vector<printDataGenParameters> printFloatGenParameters = {
 
     // Double argument representing floating-point,in [-]xh.hhhhpAd style
 
-    { { "%.6a" }, "0.1f" },
+    { { "%.6a" }, "0.5f", 0, 0, 0, 0, 0, 0, 0, 0, true },
 
     //(Minimum)Ten-wide,Double argument representing floating-point,in
     // xh.hhhhpAd style,default(right)-justified
 
-    { { "%10.2a" }, "9990.235f" },
+    { { "%10.2a" }, "1.5f", 0, 0, 0, 0, 0, 0, 0, 0, true },
 
     //(Minimum)Ten-wide,two positions after the decimal,with
     // a blank space inserted before the value, default(right)-justified
@@ -502,8 +505,9 @@ testCase testCaseFloat = {
 
     floatRefBuilder,
 
-    kfloat
+    kfloat,
 
+    floatRefTest
 };
 
 //==============================================
@@ -673,12 +677,12 @@ std::vector<printDataGenParameters> printDoubleGenParameters = {
 
     // Double argument representing floating-point,in [-]xh.hhhhpAd style
 
-    { { "%.6a" }, "0.1" },
+    { { "%.6a" }, "0.5", 0, 0, 0, 0, 0, 0, 0, 0, true },
 
     //(Minimum)Ten-wide,Double argument representing floating-point,in
     // xh.hhhhpAd style,default(right)-justified
 
-    { { "%10.2a" }, "9990.235" },
+    { { "%10.2a" }, "1.5", 0, 0, 0, 0, 0, 0, 0, 0, true },
 };
 
 //---------------------------------------------------------
@@ -697,8 +701,9 @@ testCase testCaseDouble = {
 
     doubleRefBuilder,
 
-    kdouble
+    kdouble,
 
+    doubleRefTest
 };
 
 //==============================================
@@ -1032,6 +1037,9 @@ testCase testCaseChar = {
 
 std::vector<printDataGenParameters> printStringGenParameters = {
 
+    // empty format, no data representation
+    { {""} },
+
     // empty format
     { {""}, "\"foo\"" },
 
@@ -1094,6 +1102,8 @@ std::vector<std::string> correctBufferString = {
 
     "",
 
+    "",
+
     " foo",
 
     "f",
@@ -1752,7 +1762,15 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
         return !std::regex_match(analysisBuffer, nanRegex);
     }
 
-    return strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str());
+    size_t ret =
+        strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str());
+
+    if (ret != 0 && pTestCase->_genParameters[testId].allowFallbackTest
+        && pTestCase->fallbackTestFN)
+        if (pTestCase->fallbackTestFN(
+                analysisBuffer, pTestCase->_correctBuffer[testId].c_str()))
+            return 0;
+    return ret;
 }
 
 static void intRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
@@ -1776,6 +1794,13 @@ static void floatRefBuilder(printDataGenParameters& params, char* refResult, con
              strtof(params.dataRepresentation, NULL));
 }
 
+static bool floatRefTest(const char* refResult, const char* analysisBuffer)
+{
+    float test = strtof(analysisBuffer, NULL);
+    float expected = strtof(refResult, NULL);
+    return test == expected;
+}
+
 static void doubleRefBuilder(printDataGenParameters& params, char* refResult,
                              const size_t refSize)
 {
@@ -1783,6 +1808,13 @@ static void doubleRefBuilder(printDataGenParameters& params, char* refResult,
              strtod(params.dataRepresentation, NULL));
 }
 
+static bool doubleRefTest(const char* refResult, const char* analysisBuffer)
+{
+    double test = strtod(analysisBuffer, NULL);
+    double expected = strtod(refResult, NULL);
+    return test == expected;
+}
+
 static void octalRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
     const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10);
diff --git a/test_conformance/profiling/profiling_timebase.cpp b/test_conformance/profiling/profiling_timebase.cpp
index f26a9d89..23afcbb8 100644
--- a/test_conformance/profiling/profiling_timebase.cpp
+++ b/test_conformance/profiling/profiling_timebase.cpp
@@ -18,7 +18,7 @@
 
 const char *kernelCode = "__kernel void kernel_empty(){}";
 
-REGISTER_TEST(profiling_timebase)
+REGISTER_TEST_VERSION(profiling_timebase, Version(2, 1))
 {
     Version version = get_device_cl_version(device);
     cl_platform_id platform = getPlatformFromDevice(device);
diff --git a/test_conformance/vectors/structs.cpp b/test_conformance/vectors/structs.cpp
index c0757017..48a6473b 100644
--- a/test_conformance/vectors/structs.cpp
+++ b/test_conformance/vectors/structs.cpp
@@ -341,9 +341,9 @@ int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
     {
         if ((targetArr[i]) % minAlign != (cl_uint)0)
         {
-            vlog_error(
-                "Error %zu (of %zu).  Expected a multiple of %zx, got %x\n", i,
-                pClState->m_numThreads, minAlign, targetArr[i]);
+            vlog_error("Error in work-item %zu (of %zu).  Expected a multiple "
+                       "of 0x%zx, got 0x%x\n",
+                       i, pClState->m_numThreads, minAlign, targetArr[i]);
             return -1;
         }
     }
@@ -371,8 +371,9 @@ int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
     {
         if (targetArr[i] != targetSize)
         {
-            vlog_error("Error %zu (of %zu).  Expected %d, got %d\n", i,
-                       pClState->m_numThreads, targetSize, targetArr[i]);
+            vlog_error(
+                "Error in work-item %zu (of %zu).  Expected %d, got %d\n", i,
+                pClState->m_numThreads, targetSize, targetArr[i]);
             return -1;
         }
     }
@@ -390,10 +391,11 @@ int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
     {
         if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
         {
-            vlog_error(
-                "Error %zu (of %zu).  Expected %zu more than a multiple of "
-                "%zu, got %d \n",
-                i, pClState->m_numThreads, beforeSize, totSize, targetArr[i]);
+            vlog_error("Error in work-item %zu (of %zu).  Expected %zu more "
+                       "than a multiple of "
+                       "%zu, got %d \n",
+                       i, pClState->m_numThreads, beforeSize, totSize,
+                       targetArr[i]);
             return -1;
         }
     }
diff --git a/test_conformance/vulkan/shaders/buffer.comp b/test_conformance/vulkan/shaders/buffer.comp
index 3e4eae55..3d059ce5 100644
--- a/test_conformance/vulkan/shaders/buffer.comp
+++ b/test_conformance/vulkan/shaders/buffer.comp
@@ -1,28 +1,28 @@
-#version 450
-#extension GL_ARB_separate_shader_objects : enable
-#extension GL_EXT_shader_explicit_arithmetic_types_int8    : enable
-#extension GL_EXT_shader_explicit_arithmetic_types_int32   : enable
-
-#define MAX_BUFFERS 5
-
-layout(binding = 0) buffer Params
-{
-  uint32_t numBuffers;
-  uint32_t bufferSize;
-  uint32_t interBufferOffset;
-};
-layout(binding = 1) buffer Buffer
-{
-  uint8_t ptr[];
-} bufferPtrList[MAX_BUFFERS];
-layout(local_size_x = 128) in;
-void main() {
-    for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) {
-        uint32_t ptrIdx = gl_GlobalInvocationID.x;
-        uint32_t limit = bufferSize;
-        while (ptrIdx < limit) {
-            bufferPtrList[bufIdx].ptr[ptrIdx]++;
-            ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
-        }
-    }
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8    : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32   : enable
+
+layout(constant_id = 0) const uint MAX_BUFFERS = 5;
+
+layout(binding = 0) buffer Params
+{
+  uint32_t numBuffers;
+  uint32_t bufferSize;
+  uint32_t interBufferOffset;
+};
+layout(binding = 1) buffer Buffer
+{
+  uint8_t ptr[];
+} bufferPtrList[MAX_BUFFERS];
+layout(local_size_x = 128) in;
+void main() {
+    for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) {
+        uint32_t ptrIdx = gl_GlobalInvocationID.x;
+        uint32_t limit = bufferSize;
+        while (ptrIdx < limit) {
+            bufferPtrList[bufIdx].ptr[ptrIdx]++;
+            ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
+        }
+    }
 }
\ No newline at end of file
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
index bd19987b..0d09a748 100644
--- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -26,6 +26,7 @@
 #endif
 
 #include <assert.h>
+#include <memory>
 #include <vector>
 #include <iostream>
 #include <string.h>
@@ -79,9 +80,9 @@ struct ConsistencyExternalBufferTest : public VulkanTestBase
 
         VulkanBufferList vkBufferList(1, *vkDevice, bufferSize,
                                       vkExternalMemoryHandleType);
-        VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
+        std::unique_ptr<VulkanDeviceMemory> vkDeviceMem(new VulkanDeviceMemory(
             *vkDevice, vkBufferList[0], memoryTypeList[0],
-            vkExternalMemoryHandleType);
+            vkExternalMemoryHandleType));
 
         vkDeviceMem->bindBuffer(vkBufferList[0], 0);
 
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
index e39641f1..9e9a7fcb 100644
--- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -128,12 +128,24 @@ int run_test_with_two_queue(
     vkDescriptorSetLayoutBindingList.addBinding(
         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
     vkDescriptorSetLayoutBindingList.addBinding(
-        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
+        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
     VulkanDescriptorSetLayout vkDescriptorSetLayout(
         vkDevice, vkDescriptorSetLayoutBindingList);
     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
-    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
-                                            vkBufferShaderModule);
+
+    VkSpecializationMapEntry entry;
+    entry.constantID = 0;
+    entry.offset = 0;
+    entry.size = sizeof(uint32_t);
+
+    VkSpecializationInfo spec;
+    spec.mapEntryCount = 1;
+    spec.pMapEntries = &entry;
+    spec.dataSize = sizeof(uint32_t);
+    spec.pData = &numBuffers;
+
+    VulkanComputePipeline vkComputePipeline(
+        vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
 
     VulkanDescriptorPool vkDescriptorPool(vkDevice,
                                           vkDescriptorSetLayoutBindingList);
@@ -461,12 +473,24 @@ int run_test_with_one_queue(
     vkDescriptorSetLayoutBindingList.addBinding(
         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
     vkDescriptorSetLayoutBindingList.addBinding(
-        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
+        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
     VulkanDescriptorSetLayout vkDescriptorSetLayout(
         vkDevice, vkDescriptorSetLayoutBindingList);
     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
-    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
-                                            vkBufferShaderModule);
+
+    VkSpecializationMapEntry entry;
+    entry.constantID = 0;
+    entry.offset = 0;
+    entry.size = sizeof(uint32_t);
+
+    VkSpecializationInfo spec;
+    spec.mapEntryCount = 1;
+    spec.pMapEntries = &entry;
+    spec.dataSize = sizeof(uint32_t);
+    spec.pData = &numBuffers;
+
+    VulkanComputePipeline vkComputePipeline(
+        vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
 
     VulkanDescriptorPool vkDescriptorPool(vkDevice,
                                           vkDescriptorSetLayoutBindingList);
@@ -764,12 +788,24 @@ int run_test_with_multi_import_same_ctx(
     vkDescriptorSetLayoutBindingList.addBinding(
         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
     vkDescriptorSetLayoutBindingList.addBinding(
-        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
+        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
     VulkanDescriptorSetLayout vkDescriptorSetLayout(
         vkDevice, vkDescriptorSetLayoutBindingList);
     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
-    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
-                                            vkBufferShaderModule);
+
+    VkSpecializationMapEntry entry;
+    entry.constantID = 0;
+    entry.offset = 0;
+    entry.size = sizeof(uint32_t);
+
+    VkSpecializationInfo spec;
+    spec.mapEntryCount = 1;
+    spec.pMapEntries = &entry;
+    spec.dataSize = sizeof(uint32_t);
+    spec.pData = &numBuffers;
+
+    VulkanComputePipeline vkComputePipeline(
+        vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
 
     VulkanDescriptorPool vkDescriptorPool(vkDevice,
                                           vkDescriptorSetLayoutBindingList);
@@ -1103,12 +1139,24 @@ int run_test_with_multi_import_diff_ctx(
     vkDescriptorSetLayoutBindingList.addBinding(
         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
     vkDescriptorSetLayoutBindingList.addBinding(
-        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
+        1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers);
     VulkanDescriptorSetLayout vkDescriptorSetLayout(
         vkDevice, vkDescriptorSetLayoutBindingList);
     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
-    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
-                                            vkBufferShaderModule);
+
+    VkSpecializationMapEntry entry;
+    entry.constantID = 0;
+    entry.offset = 0;
+    entry.size = sizeof(uint32_t);
+
+    VkSpecializationInfo spec;
+    spec.mapEntryCount = 1;
+    spec.pMapEntries = &entry;
+    spec.dataSize = sizeof(uint32_t);
+    spec.pData = &numBuffers;
+
+    VulkanComputePipeline vkComputePipeline(
+        vkDevice, vkPipelineLayout, vkBufferShaderModule, "main", &spec);
 
     VulkanDescriptorPool vkDescriptorPool(vkDevice,
                                           vkDescriptorSetLayoutBindingList);
@@ -1586,7 +1634,7 @@ struct BufferTestBase : public VulkanTestBase
 {
     BufferTestBase(cl_device_id device, cl_context context,
                    cl_command_queue queue, cl_int nelems)
-        : VulkanTestBase(device, context, queue, nelems)
+        : VulkanTestBase(device, context, queue, nelems, true)
     {}
 
     int test_buffer_common(bool use_fence)
diff --git a/test_conformance/vulkan/vulkan_test_base.h b/test_conformance/vulkan/vulkan_test_base.h
index 8c7b07cc..1e3f8e2f 100644
--- a/test_conformance/vulkan/vulkan_test_base.h
+++ b/test_conformance/vulkan/vulkan_test_base.h
@@ -37,11 +37,13 @@ inline void params_reset()
 struct VulkanTestBase
 {
     VulkanTestBase(cl_device_id device, cl_context context,
-                   cl_command_queue queue, cl_int nelems)
+                   cl_command_queue queue, cl_int nelems,
+                   bool useShaderInt8 = false)
         : device(device), context(context), num_elems(nelems)
     {
         vkDevice.reset(new VulkanDevice(
-            getAssociatedVulkanPhysicalDevice(device, useValidationLayers)));
+            getAssociatedVulkanPhysicalDevice(device, useValidationLayers),
+            getDefaultVulkanQueueFamilyToQueueCountMap(), useShaderInt8));
 
         cl_platform_id platform;
         cl_int error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,